From dd3ce02644b1c2abc290cd988e52a831922316e5 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Mon, 19 Aug 2019 12:53:23 +0300
Subject: [PATCH 001/222] Typo fix.

---
 docs/en/query_language/functions/functions_for_nulls.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/query_language/functions/functions_for_nulls.md b/docs/en/query_language/functions/functions_for_nulls.md
index 4b7257fd4b3..41fec479d0d 100644
--- a/docs/en/query_language/functions/functions_for_nulls.md
+++ b/docs/en/query_language/functions/functions_for_nulls.md
@@ -241,7 +241,7 @@ SHOW CREATE TABLE t_null
 └───┴──────┘
 ```
 
-Apply the `resumenotnull` function to the `y` column.
+Apply the `assumeNotNull` function to the `y` column.
 
 ```
 SELECT assumeNotNull(y) FROM t_null

From 4cdb4d5ff229ccfb56653c8770a13e1024e186c8 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Tue, 10 Sep 2019 13:09:36 +0300
Subject: [PATCH 002/222] Links fix.

---
 docs/en/operations/system_tables.md | 6 +++---
 docs/en/query_language/system.md    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md
index 4e3386764fd..0b6481de3c1 100644
--- a/docs/en/operations/system_tables.md
+++ b/docs/en/operations/system_tables.md
@@ -64,9 +64,9 @@ Please note that `errors_count` is updated once per query to the cluster, but `e
 
 ** See also **
 
-- [Table engine Distributed](../../operations/table_engines/distributed.md)
-- [distributed_replica_error_cap setting](../settings/settings.md#settings-distributed_replica_error_cap)
-- [distributed_replica_error_half_life setting](../settings/settings.md#settings-distributed_replica_error_half_life)
+- [Table engine Distributed](table_engines/distributed.md)
+- [distributed_replica_error_cap setting](settings/settings.md#settings-distributed_replica_error_cap)
+- [distributed_replica_error_half_life setting](settings/settings.md#settings-distributed_replica_error_half_life)
 
 ## system.columns
 
diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md
index 648aa07f5e7..3ef504e46b3 100644
--- a/docs/en/query_language/system.md
+++ b/docs/en/query_language/system.md
@@ -15,7 +15,7 @@
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
 
 Reloads all dictionaries that have been successfully loaded before.
-By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../operations/server_settings/settings.md#dictionaries-lazy-load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED).
+By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../operations/server_settings/settings.md#server_settings-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED).
 Always returns `Ok.` regardless of the result of the dictionary update.
 
 ## RELOAD DICTIONARY dictionary_name {#query_language-system-reload-dictionary}

From 92b3183bceee9cd70a783bccbf4dfff5dd95d066 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Tue, 10 Sep 2019 14:27:20 +0300
Subject: [PATCH 003/222] Fixed links in docs.

---
 docs/ru/interfaces/formats.md | 2 +-
 docs/toc_ru.yml               | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index fc28d97ecb9..5b26d23d80a 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -936,7 +936,7 @@ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parq
 clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
 ```
 
-Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../../operations/table_engines/hdfs.md) и `URL`.
+Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../operations/table_engines/hdfs.md) и `URL`.
 
 ## Схема формата {#formatschema}
 
diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml
index 2b7a7f156ab..b21bcc838dc 100644
--- a/docs/toc_ru.yml
+++ b/docs/toc_ru.yml
@@ -88,6 +88,7 @@ nav:
     - 'MySQL': 'operations/table_engines/mysql.md'
     - 'JDBC': 'operations/table_engines/jdbc.md'
     - 'ODBC': 'operations/table_engines/odbc.md'
+    - 'HDFS': 'operations/table_engines/hdfs.md'
   - 'Особые':
     - 'Distributed': 'operations/table_engines/distributed.md'
     - 'Внешние данные': 'operations/table_engines/external_data.md'
@@ -159,6 +160,7 @@ nav:
     - 'mysql': 'query_language/table_functions/mysql.md'
     - 'jdbc': 'query_language/table_functions/jdbc.md'
     - 'odbc': 'query_language/table_functions/odbc.md'
+    - 'hdfs': 'query_language/table_functions/hdfs.md'    
     - 'input': 'query_language/table_functions/input.md'
   - 'Словари':
     - 'Введение': 'query_language/dicts/index.md'

From 16e3428891c02f0a61c25b4f7a5aacf1b9417e52 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Tue, 10 Sep 2019 14:30:32 +0300
Subject: [PATCH 004/222] More fixes.

---
 docs/ru/interfaces/formats.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index 5b26d23d80a..9acf2d67e4a 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -936,7 +936,7 @@ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parq
 clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
 ```
 
-Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../operations/table_engines/hdfs.md) и `URL`.
+Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../operations/table_engines/hdfs.md) и `URL`.
 
 ## Схема формата {#formatschema}
 

From 4576e1f4b2a11b166e074542d6425d8265dd7ba7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 13 Sep 2019 11:59:46 +0300
Subject: [PATCH 005/222] Enable Processors by default.

---
 dbms/src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 0678aaeedc6..5f23c0a4be8 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -338,7 +338,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \
     M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \
     \
-    M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \
+    M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \
     \
     M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \
     M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \

From 1335aa75d795f0dc04af7a55d1f83ce1f27ee5c8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 13 Sep 2019 15:34:05 +0300
Subject: [PATCH 006/222] Added TreeExecutor.

---
 .../src/Processors/Executors/TreeExecutor.cpp | 157 ++++++++++++++++++
 dbms/src/Processors/Executors/TreeExecutor.h  |  28 ++++
 2 files changed, 185 insertions(+)
 create mode 100644 dbms/src/Processors/Executors/TreeExecutor.cpp
 create mode 100644 dbms/src/Processors/Executors/TreeExecutor.h

diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp
new file mode 100644
index 00000000000..d7fc1b78ede
--- /dev/null
+++ b/dbms/src/Processors/Executors/TreeExecutor.cpp
@@ -0,0 +1,157 @@
+#include <Processors/Executors/TreeExecutor.h>
+#include <stack>
+
+namespace DB
+{
+
+static void checkProcessorHasSingleOutput(IProcessor * processor)
+{
+    size_t num_outputs = processor->getOutputs().size();
+    if (num_outputs != 1)
+        throw Exception("All processors in TreeExecutor must have single output, "
+                        "but processor with name " + processor->getName() + " has " + std::to_string(num_outputs),
+                        ErrorCodes::LOGICAL_ERROR);
+}
+
+static void validateTree(const Processors & processors, IProcessor * root)
+{
+    std::unordered_map<IProcessor *, size_t> index;
+
+    for (auto & processor : processors)
+    {
+        bool is_inserted = index.try_emplace(processor.get(), index.size()).second;
+
+        if (!is_inserted)
+            throw Exception("Duplicate processor in TreeExecutor with name " + processor->getName(),
+                            ErrorCodes::LOGICAL_ERROR);
+    }
+
+    std::vector<bool> is_visited(processors.size(), false);
+    std::stack<IProcessor *> stack;
+
+    stack.push(root);
+
+    while (!stack.empty())
+    {
+        IProcessor * node = stack.top();
+        stack.pop();
+
+        auto it = index.find(node);
+
+        if (it == index.end())
+            throw Exception("Processor with name " + node->getName() + " "
+                            "was not mentioned in list passed to TreeExecutor, "
+                            "but was traversed to from other processors.", ErrorCodes::LOGICAL_ERROR);
+
+        size_t position = it->second;
+
+        if (is_visited[position])
+            throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutor. "
+                            "Passed processors are not tree.", ErrorCodes::LOGICAL_ERROR);
+
+        checkProcessorHasSingleOutput(node);
+
+        auto & children = node->getInputs();
+        for (auto & child : children)
+            stack.push(&child.getOutputPort().getProcessor());
+    }
+
+    for (size_t i = 0; i < is_visited.size(); ++i)
+        if (!is_visited[i])
+            throw Exception("Processor with name " + processors[i]->getName() +
+                            " was not visited by traverse in TreeExecutor.", ErrorCodes::LOGICAL_ERROR);
+}
+
+void TreeExecutor::init()
+{
+    if (processors.empty())
+        throw Exception("No processors were passed to TreeExecutor.", ErrorCodes::LOGICAL_ERROR);
+
+    root = processors.back().get();
+
+    validateTree(processors, root);
+
+    port = std::make_unique<InputPort>(getHeader(), root);
+    port->setNeeded();
+}
+
+void TreeExecutor::execute()
+{
+    std::stack<IProcessor *> stack;
+    stack.push(root);
+
+    while (!stack.empty())
+    {
+        IProcessor * node = stack.top();
+
+        auto status = node->prepare();
+
+        switch (status)
+        {
+            case IProcessor::Status::NeedData:
+            {
+                auto & inputs = node->getInputs();
+
+                if (inputs.empty())
+                    throw Exception("Processors " + node->getName() + " with empty input "
+                                    "has returned NeedData in TreeExecutor", ErrorCodes::LOGICAL_ERROR);
+
+                bool all_finished = true;
+
+                for (auto & input : inputs)
+                {
+                    if (input.isFinished())
+                        continue;
+
+                    all_finished = false;
+
+                    stack.push(&input.getOutputPort().getProcessor());
+                }
+
+                if (all_finished)
+                    throw Exception("Processors " + node->getName() + " has returned NeedData in TreeExecutor, "
+                                    "but all it's inputs are finished.", ErrorCodes::LOGICAL_ERROR);
+                break;
+            }
+            case IProcessor::Status::PortFull:
+            {
+                stack.pop();
+                break;
+            }
+            case IProcessor::Status::Finished:
+            {
+                stack.pop();
+                break;
+            }
+            case IProcessor::Status::Ready:
+            {
+                node->work();
+                break;
+            }
+            case IProcessor::Status::Async:
+            case IProcessor::Status::Wait:
+            case IProcessor::Status::ExpandPipeline:
+            {
+                throw Exception("Processor with name " + node->getName() + " "
+                                "returned status " + IProcessor::statusToName(status) + " "
+                                "which is not supported in TreeExecutor.", ErrorCodes::LOGICAL_ERROR);
+            }
+        }
+    }
+}
+
+Block TreeExecutor::readImpl()
+{
+    while (true)
+    {
+        if (port->isFinished())
+            return {};
+
+        if (port->hasData())
+            return getHeader().cloneWithColumns(port->pull().detachColumns());
+
+        execute();
+    }
+}
+
+}
diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h
new file mode 100644
index 00000000000..0aad5b3024a
--- /dev/null
+++ b/dbms/src/Processors/Executors/TreeExecutor.h
@@ -0,0 +1,28 @@
+#pragma once
+#include <DataStreams/IBlockInputStream.h>
+#include <Processors/IProcessor.h>
+
+namespace DB
+{
+
+class TreeExecutor : public IBlockInputStream
+{
+public:
+    explicit TreeExecutor(Processors processors_) : processors(std::move(processors_)) { init(); }
+
+    String getName() const override { return root->getName(); }
+    Block getHeader() const override { return root->getOutputs().front().getHeader(); }
+
+protected:
+    Block readImpl() override;
+
+private:
+    Processors processors;
+    IProcessor * root = nullptr;
+    std::unique_ptr<InputPort> port;
+
+    void init();
+    void execute();
+};
+
+}

From 1f5e62d741bd43c8bc24d838d34cf1a3e85efc71 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 13 Sep 2019 15:59:48 +0300
Subject: [PATCH 007/222] Added IStorage::readWithProcessors.

---
 dbms/src/Storages/IStorage.cpp | 21 +++++++++++++++++++++
 dbms/src/Storages/IStorage.h   | 13 +++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp
index cbd14666006..cd4b4f2e362 100644
--- a/dbms/src/Storages/IStorage.cpp
+++ b/dbms/src/Storages/IStorage.cpp
@@ -4,6 +4,8 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSetQuery.h>
 
+#include <Processors/Executors/TreeExecutor.h>
+
 #include <sparsehash/dense_hash_map>
 #include <sparsehash/dense_hash_set>
 
@@ -423,4 +425,23 @@ void IStorage::alter(
     }
 }
 
+BlockInputStreams IStorage::read(
+    const Names & column_names,
+    const SelectQueryInfo & query_info,
+    const Context & context,
+    QueryProcessingStage::Enum processed_stage,
+    size_t max_block_size,
+    unsigned num_streams)
+{
+    auto pipes = readWithProcessors(column_names, query_info, context, processed_stage, max_block_size, num_streams);
+
+    BlockInputStreams res;
+    res.reserve(pipes.size());
+
+    for (auto & pipe : pipes)
+        res.emplace_back(std::make_shared<TreeExecutor>(std::move(pipe)));
+
+    return res;
+}
+
 }
diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index d92b06029d8..11fcaad1a03 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -41,6 +41,11 @@ class AlterCommands;
 class MutationCommands;
 class PartitionCommands;
 
+class IProcessor;
+using ProcessorPtr = std::shared_ptr<IProcessor>;
+using Processors = std::vector<ProcessorPtr>;
+using Pipes = std::vector<Processors>;
+
 struct ColumnSize
 {
     size_t marks = 0;
@@ -234,6 +239,14 @@ public:
       * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP).
       */
     virtual BlockInputStreams read(
+        const Names & /*column_names*/,
+        const SelectQueryInfo & /*query_info*/,
+        const Context & /*context*/,
+        QueryProcessingStage::Enum /*processed_stage*/,
+        size_t /*max_block_size*/,
+        unsigned /*num_streams*/);
+
+    virtual Pipes readWithProcessors(
         const Names & /*column_names*/,
         const SelectQueryInfo & /*query_info*/,
         const Context & /*context*/,

From 3c53dfd227be24a4f3189e15ee94db7d5b047e57 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 13 Sep 2019 18:41:09 +0300
Subject: [PATCH 008/222] Add processors to StorageMergeTree [WIP].

---
 .../MergeTreeBaseSelectBlockInputStream.cpp   | 107 ++++++++++++++----
 .../MergeTreeBaseSelectBlockInputStream.h     |  22 ++--
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  14 +--
 .../MergeTree/MergeTreeDataSelectExecutor.h   |  10 +-
 .../Storages/MergeTree/MergeTreeRangeReader.h |   2 +-
 .../MergeTreeThreadSelectBlockInputStream.cpp |  14 +--
 .../MergeTreeThreadSelectBlockInputStream.h   |   4 +-
 dbms/src/Storages/StorageMergeTree.cpp        |   2 +-
 dbms/src/Storages/StorageMergeTree.h          |   2 +-
 9 files changed, 112 insertions(+), 65 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
index 0489182fe55..077e3ea0712 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
@@ -19,7 +19,8 @@ namespace ErrorCodes
 }
 
 
-MergeTreeBaseSelectBlockInputStream::MergeTreeBaseSelectBlockInputStream(
+MergeTreeBaseSelectBlockInputProcessor::MergeTreeBaseSelectBlockInputProcessor(
+    Block header,
     const MergeTreeData & storage_,
     const PrewhereInfoPtr & prewhere_info_,
     UInt64 max_block_size_rows_,
@@ -31,6 +32,7 @@ MergeTreeBaseSelectBlockInputStream::MergeTreeBaseSelectBlockInputStream(
     bool save_marks_in_cache_,
     const Names & virt_column_names_)
 :
+    ISource(getHeader(std::move(header), prewhere_info_, virt_column_names_)),
     storage(storage_),
     prewhere_info(prewhere_info_),
     max_block_size_rows(max_block_size_rows_),
@@ -45,26 +47,27 @@ MergeTreeBaseSelectBlockInputStream::MergeTreeBaseSelectBlockInputStream(
 }
 
 
-Block MergeTreeBaseSelectBlockInputStream::readImpl()
+Chunk MergeTreeBaseSelectBlockInputProcessor::generate()
 {
-    Block res;
-
-    while (!res && !isCancelled())
+    while (!isCancelled())
     {
         if ((!task || task->isFinished()) && !getNewTask())
-            break;
+            return {};
 
-        res = readFromPart();
+        auto res = readFromPart();
 
-        if (res)
-            injectVirtualColumns(res);
+        if (!res.hasNoRows())
+        {
+            injectVirtualColumns(res, task.get(), virt_column_names);
+            return res;
+        }
     }
 
-    return res;
+    return {};
 }
 
 
-void MergeTreeBaseSelectBlockInputStream::initializeRangeReaders(MergeTreeReadTask & current_task)
+void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeReadTask & current_task)
 {
     if (prewhere_info)
     {
@@ -103,7 +106,7 @@ void MergeTreeBaseSelectBlockInputStream::initializeRangeReaders(MergeTreeReadTa
 }
 
 
-Block MergeTreeBaseSelectBlockInputStream::readFromPartImpl()
+Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
 {
     if (task->size_predictor)
         task->size_predictor->startBlock();
@@ -160,7 +163,8 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPartImpl()
 
     UInt64 num_filtered_rows = read_result.numReadRows() - read_result.block.rows();
 
-    progressImpl({ read_result.numReadRows(), read_result.numBytesRead() });
+    /// TODO
+    /// progressImpl({ read_result.numReadRows(), read_result.numBytesRead() });
 
     if (task->size_predictor)
     {
@@ -177,13 +181,14 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPartImpl()
         column.column = column.column->convertToFullColumnIfConst();
     }
 
-    read_result.block.checkNumberOfRows();
+    UInt64 num_rows = read_result.columns.empty() ? 0
+                                                  : read_result.columns[0]->size();
 
-    return read_result.block;
+    return Chunk(std::move(read_result.columns), num_rows);
 }
 
 
-Block MergeTreeBaseSelectBlockInputStream::readFromPart()
+Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPart()
 {
     if (!task->range_reader.isInitialized())
         initializeRangeReaders(*task);
@@ -192,15 +197,18 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPart()
 }
 
 
-void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) const
+template <typename InsertCallback>
+static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, MergeTreeReadTask * task, const Names & virtual_columns)
 {
     /// add virtual columns
     /// Except _sample_factor, which is added from the outside.
-    if (!virt_column_names.empty())
+    if (!virtual_columns.empty())
     {
-        const auto rows = block.rows();
+        if (unlikely(rows && !task))
+            throw Exception("Cannot insert virtual columns to non-empty chunk without specified task.",
+                            ErrorCodes::LOGICAL_ERROR);
 
-        for (const auto & virt_column_name : virt_column_names)
+        for (const auto & virt_column_name : virtual_columns)
         {
             if (virt_column_name == "_part")
             {
@@ -210,7 +218,7 @@ void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) co
                 else
                     column = DataTypeString().createColumn();
 
-                block.insert({ column, std::make_shared<DataTypeString>(), virt_column_name});
+                callback.template insert<DataTypeString>(column, virt_column_name);
             }
             else if (virt_column_name == "_part_index")
             {
@@ -220,7 +228,7 @@ void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) co
                 else
                     column = DataTypeUInt64().createColumn();
 
-                block.insert({ column, std::make_shared<DataTypeUInt64>(), virt_column_name});
+                callback.template insert<DataTypeUInt64>(column, virt_column_name);
             }
             else if (virt_column_name == "_partition_id")
             {
@@ -230,14 +238,55 @@ void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) co
                 else
                     column = DataTypeString().createColumn();
 
-                block.insert({ column, std::make_shared<DataTypeString>(), virt_column_name});
+                callback.template insert<DataTypeString>(column, virt_column_name);
             }
         }
     }
 }
 
+namespace
+{
+    struct InsertIntoBlockCallback
+    {
+        template <typename DataType>
+        void insert(const ColumnPtr & column, const String & name)
+        {
+            block.insert({column, std::make_shared<DataType>(), name});
+        }
 
-void MergeTreeBaseSelectBlockInputStream::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info)
+        Block & block;
+    };
+
+    struct InsertIntoColumnsCallback
+    {
+        template <typename>
+        void insert(const ColumnPtr & column, const String &)
+        {
+            columns.push_back(column);
+        }
+
+        Columns & columns;
+    };
+}
+
+void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns)
+{
+    InsertIntoBlockCallback callback { block };
+    injectVirtualColumnsImpl(block.rows(), callback, task, virtual_columns);
+}
+
+void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns)
+{
+    UInt64 num_rows = chunk.getNumRows();
+    auto columns = chunk.detachColumns();
+
+    InsertIntoColumnsCallback callback { columns };
+    injectVirtualColumnsImpl(num_rows, callback, task, virtual_columns);
+
+    chunk.setColumns(columns, num_rows);
+}
+
+void MergeTreeBaseSelectBlockInputProcessor::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info)
 {
     if (prewhere_info)
     {
@@ -253,7 +302,15 @@ void MergeTreeBaseSelectBlockInputStream::executePrewhereActions(Block & block,
     }
 }
 
+Block MergeTreeBaseSelectBlockInputProcessor::getHeader(
+    Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns)
+{
+    executePrewhereActions(block, prewhere_info);
+    injectVirtualColumns(block, nullptr, virtual_columns);
+    return block;
+}
 
-MergeTreeBaseSelectBlockInputStream::~MergeTreeBaseSelectBlockInputStream() = default;
+
+MergeTreeBaseSelectBlockInputProcessor::~MergeTreeBaseSelectBlockInputProcessor() = default;
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h
index 640f73652e4..0abbb2d001c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h
@@ -5,6 +5,8 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/SelectQueryInfo.h>
 
+#include <Processors/ISource.h>
+
 namespace DB
 {
 
@@ -14,10 +16,11 @@ class MarkCache;
 
 
 /// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream
-class MergeTreeBaseSelectBlockInputStream : public IBlockInputStream
+class MergeTreeBaseSelectBlockInputProcessor : public ISource
 {
 public:
-    MergeTreeBaseSelectBlockInputStream(
+    MergeTreeBaseSelectBlockInputProcessor(
+        Block header,
         const MergeTreeData & storage_,
         const PrewhereInfoPtr & prewhere_info_,
         UInt64 max_block_size_rows_,
@@ -29,24 +32,23 @@ public:
         bool save_marks_in_cache_ = true,
         const Names & virt_column_names_ = {});
 
-    ~MergeTreeBaseSelectBlockInputStream() override;
+    ~MergeTreeBaseSelectBlockInputProcessor() override;
 
     static void executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info);
 
 protected:
-    Block readImpl() final;
+    Chunk generate() final;
 
     /// Creates new this->task, and initilizes readers
     virtual bool getNewTask() = 0;
 
-    /// We will call progressImpl manually.
-    void progress(const Progress &) override {}
+    virtual Chunk readFromPart();
 
-    virtual Block readFromPart();
+    Chunk readFromPartImpl();
 
-    Block readFromPartImpl();
-
-    void injectVirtualColumns(Block & block) const;
+    static void injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns);
+    static void injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns);
+    static Block getHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns);
 
     void initializeRangeReaders(MergeTreeReadTask & task);
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 40dc0bf6b52..95f76a4c7f7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -141,7 +141,7 @@ static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, siz
 }
 
 
-BlockInputStreams MergeTreeDataSelectExecutor::read(
+Pipes MergeTreeDataSelectExecutor::read(
     const Names & column_names_to_return,
     const SelectQueryInfo & query_info,
     const Context & context,
@@ -154,7 +154,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
         max_block_size, num_streams, max_block_numbers_to_read);
 }
 
-BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
+Pipes MergeTreeDataSelectExecutor::readFromParts(
     MergeTreeData::DataPartsVector parts,
     const Names & column_names_to_return,
     const SelectQueryInfo & query_info,
@@ -565,7 +565,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
     ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges);
     ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks);
 
-    BlockInputStreams res;
+    Pipes res;
 
     if (select.final())
     {
@@ -658,7 +658,7 @@ size_t roundRowsOrBytesToMarks(
 }
 
 
-BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
+Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
     RangesInDataParts && parts,
     size_t num_streams,
     const Names & column_names,
@@ -707,7 +707,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
     if (sum_marks > max_marks_to_use_cache)
         use_uncompressed_cache = false;
 
-    BlockInputStreams res;
+    Pipes res;
 
     if (sum_marks > 0 && settings.merge_tree_uniform_read_distribution == 1)
     {
@@ -817,7 +817,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
     return res;
 }
 
-BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
+Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     RangesInDataParts && parts,
     size_t num_streams,
     const Names & column_names,
@@ -1026,7 +1026,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO
 }
 
 
-BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
+Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     RangesInDataParts && parts,
     const Names & column_names,
     UInt64 max_block_size,
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index 44857799d01..9b46b663ab2 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -24,7 +24,7 @@ public:
       */
     using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;
 
-    BlockInputStreams read(
+    Pipes read(
         const Names & column_names,
         const SelectQueryInfo & query_info,
         const Context & context,
@@ -32,7 +32,7 @@ public:
         unsigned num_streams,
         const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const;
 
-    BlockInputStreams readFromParts(
+    Pipes readFromParts(
         MergeTreeData::DataPartsVector parts,
         const Names & column_names,
         const SelectQueryInfo & query_info,
@@ -46,7 +46,7 @@ private:
 
     Logger * log;
 
-    BlockInputStreams spreadMarkRangesAmongStreams(
+    Pipes spreadMarkRangesAmongStreams(
         RangesInDataParts && parts,
         size_t num_streams,
         const Names & column_names,
@@ -56,7 +56,7 @@ private:
         const Names & virt_columns,
         const Settings & settings) const;
 
-    BlockInputStreams spreadMarkRangesAmongStreamsWithOrder(
+    Pipes spreadMarkRangesAmongStreamsWithOrder(
         RangesInDataParts && parts,
         size_t num_streams,
         const Names & column_names,
@@ -67,7 +67,7 @@ private:
         const Names & virt_columns,
         const Settings & settings) const;
 
-    BlockInputStreams spreadMarkRangesAmongStreamsFinal(
+    Pipes spreadMarkRangesAmongStreamsFinal(
         RangesInDataParts && parts,
         const Names & column_names,
         UInt64 max_block_size,
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
index 0eae69ee17e..4261509d7fc 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
@@ -157,7 +157,7 @@ public:
 
         void addNumBytesRead(size_t count) { num_bytes_read += count; }
 
-        Block block;
+        Columns columns;
 
     private:
         RangesInfo started_ranges;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp
index 69cf173212d..cd6efa6b7d1 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp
@@ -20,7 +20,7 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream(
     const Settings & settings,
     const Names & virt_column_names_)
     :
-    MergeTreeBaseSelectBlockInputStream{storage_, prewhere_info_, max_block_size_rows_,
+    MergeTreeBaseSelectBlockInputProcessor{pool->getHeader(), storage_, prewhere_info_, max_block_size_rows_,
         preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io,
         settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_},
     thread{thread_},
@@ -38,19 +38,9 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream(
     else
         min_marks_to_read = min_marks_to_read_;
 
-    ordered_names = getHeader().getNames();
+    ordered_names = getPort().getHeader().getNames();
 }
 
-
-Block MergeTreeThreadSelectBlockInputStream::getHeader() const
-{
-    auto res = pool->getHeader();
-    executePrewhereActions(res, prewhere_info);
-    injectVirtualColumns(res);
-    return res;
-}
-
-
 /// Requests read task from MergeTreeReadPool and signals whether it got one
 bool MergeTreeThreadSelectBlockInputStream::getNewTask()
 {
diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h
index 3c7dfb7927d..9603d21fb33 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h
@@ -11,7 +11,7 @@ class MergeTreeReadPool;
 /** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked
   * to perform.
   */
-class MergeTreeThreadSelectBlockInputStream : public MergeTreeBaseSelectBlockInputStream
+class MergeTreeThreadSelectBlockInputStream : public MergeTreeBaseSelectBlockInputProcessor
 {
 public:
     MergeTreeThreadSelectBlockInputStream(
@@ -31,8 +31,6 @@ public:
 
     ~MergeTreeThreadSelectBlockInputStream() override;
 
-    Block getHeader() const override;
-
 protected:
     /// Requests read task from MergeTreeReadPool and signals whether it got one
     bool getNewTask() override;
diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp
index 77c5a909f0c..bc50dec5b72 100644
--- a/dbms/src/Storages/StorageMergeTree.cpp
+++ b/dbms/src/Storages/StorageMergeTree.cpp
@@ -121,7 +121,7 @@ StorageMergeTree::~StorageMergeTree()
     shutdown();
 }
 
-BlockInputStreams StorageMergeTree::read(
+Pipes StorageMergeTree::readWithProcessors(
     const Names & column_names,
     const SelectQueryInfo & query_info,
     const Context & context,
diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h
index 04b20fda5b9..6d55b4655ce 100644
--- a/dbms/src/Storages/StorageMergeTree.h
+++ b/dbms/src/Storages/StorageMergeTree.h
@@ -35,7 +35,7 @@ public:
 
     bool supportsIndexForIn() const override { return true; }
 
-    BlockInputStreams read(
+    Pipes readWithProcessors(
         const Names & column_names,
         const SelectQueryInfo & query_info,
         const Context & context,

From 5108ebeece9db5b7146690e0047c3f9d8c7f6a4f Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 23 Sep 2019 22:22:02 +0300
Subject: [PATCH 009/222] Remove Block from RangeReader.

---
 .../MergeTreeBaseSelectBlockInputStream.cpp   |  36 +--
 .../MergeTree/MergeTreeRangeReader.cpp        | 257 +++++++++++-------
 .../Storages/MergeTree/MergeTreeRangeReader.h |  19 +-
 .../Storages/MergeTree/MergeTreeReader.cpp    | 189 +++++++------
 dbms/src/Storages/MergeTree/MergeTreeReader.h |  28 +-
 5 files changed, 296 insertions(+), 233 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
index 077e3ea0712..731624d1997 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
@@ -158,10 +158,10 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
     auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges);
 
     /// All rows were filtered. Repeat.
-    if (read_result.block.rows() == 0)
-        read_result.block.clear();
+    if (read_result.num_rows == 0)
+        read_result.columns.clear();
 
-    UInt64 num_filtered_rows = read_result.numReadRows() - read_result.block.rows();
+    UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows;
 
     /// TODO
     /// progressImpl({ read_result.numReadRows(), read_result.numBytesRead() });
@@ -170,21 +170,11 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
     {
         task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows);
 
-        if (read_result.block)
-            task->size_predictor->update(read_result.block);
+        if (!read_result.columns.empty())
+            task->size_predictor->update(read_result.columns);
     }
 
-    if (read_result.block && prewhere_info && !task->remove_prewhere_column)
-    {
-        /// Convert const column to full here because it's cheaper to filter const column than full.
-        auto & column = read_result.block.getByName(prewhere_info->prewhere_column_name);
-        column.column = column.column->convertToFullColumnIfConst();
-    }
-
-    UInt64 num_rows = read_result.columns.empty() ? 0
-                                                  : read_result.columns[0]->size();
-
-    return Chunk(std::move(read_result.columns), num_rows);
+    return Chunk(std::move(read_result.columns), read_result.num_rows);
 }
 
 
@@ -208,9 +198,9 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
             throw Exception("Cannot insert virtual columns to non-empty chunk without specified task.",
                             ErrorCodes::LOGICAL_ERROR);
 
-        for (const auto & virt_column_name : virtual_columns)
+        for (const auto & virtual_column_name : virtual_columns)
         {
-            if (virt_column_name == "_part")
+            if (virtual_column_name == "_part")
             {
                 ColumnPtr column;
                 if (rows)
@@ -218,9 +208,9 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeString().createColumn();
 
-                callback.template insert<DataTypeString>(column, virt_column_name);
+                callback.template insert<DataTypeString>(column, virtual_column_name);
             }
-            else if (virt_column_name == "_part_index")
+            else if (virtual_column_name == "_part_index")
             {
                 ColumnPtr column;
                 if (rows)
@@ -228,9 +218,9 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeUInt64().createColumn();
 
-                callback.template insert<DataTypeUInt64>(column, virt_column_name);
+                callback.template insert<DataTypeUInt64>(column, virtual_column_name);
             }
-            else if (virt_column_name == "_partition_id")
+            else if (virtual_column_name == "_partition_id")
             {
                 ColumnPtr column;
                 if (rows)
@@ -238,7 +228,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeString().createColumn();
 
-                callback.template insert<DataTypeString>(column, virt_column_name);
+                callback.template insert<DataTypeString>(column, virtual_column_name);
             }
         }
     }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 932721eb028..99d83789f45 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -27,11 +27,11 @@ size_t MergeTreeRangeReader::DelayedStream::position() const
     return num_rows_before_current_mark + current_offset + num_delayed_rows;
 }
 
-size_t MergeTreeRangeReader::DelayedStream::readRows(Block & block, size_t num_rows)
+size_t MergeTreeRangeReader::DelayedStream::readRows(Columns & columns, size_t num_rows)
 {
     if (num_rows)
     {
-        size_t rows_read = merge_tree_reader->readRows(current_mark, continue_reading, num_rows, block);
+        size_t rows_read = merge_tree_reader->readRows(current_mark, continue_reading, num_rows, columns);
         continue_reading = true;
 
         /// Zero rows_read maybe either because reading has finished
@@ -47,7 +47,7 @@ size_t MergeTreeRangeReader::DelayedStream::readRows(Block & block, size_t num_r
     return 0;
 }
 
-size_t MergeTreeRangeReader::DelayedStream::read(Block & block, size_t from_mark, size_t offset, size_t num_rows)
+size_t MergeTreeRangeReader::DelayedStream::read(Columns & columns, size_t from_mark, size_t offset, size_t num_rows)
 {
     size_t num_rows_before_from_mark = index_granularity->getMarkStartingRow(from_mark);
     /// We already stand accurately in required position,
@@ -60,7 +60,7 @@ size_t MergeTreeRangeReader::DelayedStream::read(Block & block, size_t from_mark
     }
     else
     {
-        size_t read_rows = finalize(block);
+        size_t read_rows = finalize(columns);
 
         continue_reading = false;
         current_mark = from_mark;
@@ -71,7 +71,7 @@ size_t MergeTreeRangeReader::DelayedStream::read(Block & block, size_t from_mark
     }
 }
 
-size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block)
+size_t MergeTreeRangeReader::DelayedStream::finalize(Columns & columns)
 {
     /// We need to skip some rows before reading
     if (current_offset && !continue_reading)
@@ -89,13 +89,14 @@ size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block)
 
         }
 
-        /// Skip some rows from beging of granule
+        /// Skip some rows from begin of granule.
         /// We don't know size of rows in compressed granule,
-        /// so have to read them and throw out
+        /// so have to read them and throw out.
         if (current_offset)
         {
-            Block temp_block;
-            readRows(temp_block, current_offset);
+            Columns tmp_columns;
+            tmp_columns.resize(columns.size());
+            readRows(tmp_columns, current_offset);
         }
     }
 
@@ -103,7 +104,7 @@ size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block)
     current_offset += num_delayed_rows;
     num_delayed_rows = 0;
 
-    return readRows(block, rows_to_read);
+    return readRows(columns, rows_to_read);
 }
 
 
@@ -138,9 +139,9 @@ void MergeTreeRangeReader::Stream::checkEnoughSpaceInCurrentGranule(size_t num_r
         throw Exception("Cannot read from granule more than index_granularity.", ErrorCodes::LOGICAL_ERROR);
 }
 
-size_t MergeTreeRangeReader::Stream::readRows(Block & block, size_t num_rows)
+size_t MergeTreeRangeReader::Stream::readRows(Columns & columns, size_t num_rows)
 {
-    size_t rows_read = stream.read(block, current_mark, offset_after_current_mark, num_rows);
+    size_t rows_read = stream.read(columns, current_mark, offset_after_current_mark, num_rows);
 
     if (stream.isFinished())
         finish();
@@ -163,7 +164,7 @@ void MergeTreeRangeReader::Stream::toNextMark()
     offset_after_current_mark = 0;
 }
 
-size_t MergeTreeRangeReader::Stream::read(Block & block, size_t num_rows, bool skip_remaining_rows_in_current_granule)
+size_t MergeTreeRangeReader::Stream::read(Columns & columns, size_t num_rows, bool skip_remaining_rows_in_current_granule)
 {
     checkEnoughSpaceInCurrentGranule(num_rows);
 
@@ -171,7 +172,7 @@ size_t MergeTreeRangeReader::Stream::read(Block & block, size_t num_rows, bool s
     {
         checkNotFinished();
 
-        size_t read_rows = readRows(block, num_rows);
+        size_t read_rows = readRows(columns, num_rows);
 
         offset_after_current_mark += num_rows;
 
@@ -212,9 +213,9 @@ void MergeTreeRangeReader::Stream::skip(size_t num_rows)
     }
 }
 
-size_t MergeTreeRangeReader::Stream::finalize(Block & block)
+size_t MergeTreeRangeReader::Stream::finalize(Columns & columns)
 {
-    size_t read_rows = stream.finalize(block);
+    size_t read_rows = stream.finalize(columns);
 
     if (stream.isFinished())
         finish();
@@ -223,10 +224,10 @@ size_t MergeTreeRangeReader::Stream::finalize(Block & block)
 }
 
 
-void MergeTreeRangeReader::ReadResult::addGranule(size_t num_rows)
+void MergeTreeRangeReader::ReadResult::addGranule(size_t num_rows_)
 {
-    rows_per_granule.push_back(num_rows);
-    total_rows_per_granule += num_rows;
+    rows_per_granule.push_back(num_rows_);
+    total_rows_per_granule += num_rows_;
 }
 
 void MergeTreeRangeReader::ReadResult::adjustLastGranule()
@@ -353,13 +354,13 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
                         zero16)))
                 | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
-                        zero16))) << 16)
+                        zero16))) << 16u)
                 | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
-                        zero16))) << 32)
+                        zero16))) << 32u)
                 | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
-                        zero16))) << 48);
+                        zero16))) << 48u);
         if (val == 0)
             count += 64;
         else
@@ -412,7 +413,7 @@ MergeTreeRangeReader::MergeTreeRangeReader(
         bool always_reorder_, bool remove_prewhere_column_, bool last_reader_in_chain_)
         : merge_tree_reader(merge_tree_reader_), index_granularity(&(merge_tree_reader->data_part->index_granularity))
         , prev_reader(prev_reader_), prewhere_column_name(prewhere_column_name_)
-        , ordered_names(ordered_names_), alias_actions(alias_actions_), prewhere_actions(std::move(prewhere_actions_))
+        , ordered_names(ordered_names_), alias_actions(std::move(alias_actions_)), prewhere_actions(std::move(prewhere_actions_))
         , always_reorder(always_reorder_), remove_prewhere_column(remove_prewhere_column_)
         , last_reader_in_chain(last_reader_in_chain_), is_initialized(true)
 {
@@ -476,92 +477,100 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
 
     ReadResult read_result;
     size_t prev_bytes = 0;
-    bool should_reorder = false;
 
     if (prev_reader)
     {
         read_result = prev_reader->read(max_rows, ranges);
-        prev_bytes = read_result.block.bytes();
-        Block block = continueReadingChain(read_result);
+        prev_bytes = read_result.numBytesRead();
+
+        size_t num_read_rows;
+        Columns columns = continueReadingChain(read_result, num_read_rows);
+
+        /// Nothing to do. Return empty result.
+        if (read_result.num_rows == 0)
+            return read_result;
+
+        bool has_columns = false;
+        for (auto & column : columns)
+            if (column)
+                has_columns = true;
 
         bool should_evaluate_missing_defaults = false;
-        if (block)
+
+        if (has_columns)
         {
-            /// block.rows() <= read_result.block. We must filter block before adding columns to read_result.block
+            /// num_read_rows >= read_result.num_rows
+            /// We must filter block before adding columns to read_result.block
 
             /// Fill missing columns before filtering because some arrays from Nested may have empty data.
-            merge_tree_reader->fillMissingColumns(block, should_reorder, should_evaluate_missing_defaults, block.rows());
+            merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_read_rows);
 
             if (read_result.getFilter())
-                filterBlock(block, read_result.getFilter()->getData());
+                filterColumns(columns, read_result.getFilter()->getData());
         }
         else
         {
-            size_t num_rows = read_result.block.rows();
-            if (!read_result.block)
-            {
-                if (auto * filter = read_result.getFilter())
-                    num_rows = countBytesInFilter(filter->getData()); /// All columns were removed and filter is not always true.
-                else if (read_result.totalRowsPerGranule())
-                    num_rows = read_result.numReadRows();   /// All columns were removed and filter is always true.
-                /// else filter is always false.
-            }
+            size_t num_rows = read_result.num_rows;
 
             /// If block is empty, we still may need to add missing columns.
             /// In that case use number of rows in result block and don't filter block.
             if (num_rows)
-                merge_tree_reader->fillMissingColumns(block, should_reorder, should_evaluate_missing_defaults, num_rows);
+                merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows);
         }
 
-        for (auto i : ext::range(0, block.columns()))
-            read_result.block.insert(std::move(block.getByPosition(i)));
+        read_result.columns.reserve(read_result.columns.size() + columns.size());
+        for (auto & column : columns)
+            read_result.columns.emplace_back(std::move(column));
 
-        if (read_result.block)
+        if (!read_result.columns.empty())
         {
             if (should_evaluate_missing_defaults)
-                merge_tree_reader->evaluateMissingDefaults(read_result.block);
+                merge_tree_reader->evaluateMissingDefaults(read_result.columns);
         }
     }
     else
     {
         read_result = startReadingChain(max_rows, ranges);
-        if (read_result.block)
+        read_result.num_rows = read_result.numReadRows();
+
+        if (read_result.num_rows)
         {
             bool should_evaluate_missing_defaults;
-            merge_tree_reader->fillMissingColumns(read_result.block, should_reorder, should_evaluate_missing_defaults,
-                                                  read_result.block.rows());
+            merge_tree_reader->fillMissingColumns(read_result.columns, should_evaluate_missing_defaults,
+                                                  read_result.num_rows);
 
             if (should_evaluate_missing_defaults)
-                merge_tree_reader->evaluateMissingDefaults(read_result.block);
+                merge_tree_reader->evaluateMissingDefaults(read_result.columns);
         }
+        else
+            read_result.columns.clear();
     }
 
-    if (!read_result.block)
+    if (read_result.num_rows == 0)
         return read_result;
 
-    read_result.addNumBytesRead(read_result.block.bytes() - prev_bytes);
+    size_t total_bytes = 0;
+    for (auto & column : read_result.columns)
+        total_bytes += column->byteSize();
+
+    read_result.addNumBytesRead(total_bytes - prev_bytes);
 
     executePrewhereActionsAndFilterColumns(read_result);
 
-    if (last_reader_in_chain && (should_reorder || always_reorder))
-        merge_tree_reader->reorderColumns(read_result.block, *ordered_names, prewhere_column_name);
-
     return read_result;
 }
 
-void MergeTreeRangeReader::filterBlock(Block & block, const IColumn::Filter & filter) const
+void MergeTreeRangeReader::filterColumns(Columns & columns, const IColumn::Filter & filter) const
 {
-    for (const auto i : ext::range(0, block.columns()))
+    for (auto & column : columns)
     {
-        auto & col = block.getByPosition(i);
-
-        if (col.column)
+        if (column)
         {
-            col.column = col.column->filter(filter, -1);
+            column = column->filter(filter, -1);
 
-            if (col.column->empty())
+            if (column->empty())
             {
-                block.clear();
+                columns.clear();
                 return;
             }
         }
@@ -571,6 +580,7 @@ void MergeTreeRangeReader::filterBlock(Block & block, const IColumn::Filter & fi
 MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t max_rows, MarkRanges & ranges)
 {
     ReadResult result;
+    result.columns.resize(merge_tree_reader->getColumns().size());
 
     /// Stream is lazy. result.num_added_rows is the number of rows added to block which is not equal to
     /// result.num_rows_read until call to stream.finalize(). Also result.num_added_rows may be less than
@@ -581,7 +591,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t
         {
             if (stream.isFinished())
             {
-                result.addRows(stream.finalize(result.block));
+                result.addRows(stream.finalize(result.columns));
                 stream = Stream(ranges.back().begin, ranges.back().end, merge_tree_reader);
                 result.addRange(ranges.back());
                 ranges.pop_back();
@@ -589,13 +599,13 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t
 
             auto rows_to_read = std::min(space_left, stream.numPendingRowsInCurrentGranule());
             bool last = rows_to_read == space_left;
-            result.addRows(stream.read(result.block, rows_to_read, !last));
+            result.addRows(stream.read(result.columns, rows_to_read, !last));
             result.addGranule(rows_to_read);
             space_left -= rows_to_read;
         }
     }
 
-    result.addRows(stream.finalize(result.block));
+    result.addRows(stream.finalize(result.columns));
 
     /// Last granule may be incomplete.
     result.adjustLastGranule();
@@ -603,22 +613,24 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t
     return result;
 }
 
-Block MergeTreeRangeReader::continueReadingChain(ReadResult & result)
+Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & num_rows)
 {
-    Block block;
+    Columns columns;
+    num_rows = 0;
 
     if (result.rowsPerGranule().empty())
     {
         /// If zero rows were read on prev step, than there is no more rows to read.
         /// Last granule may have less rows than index_granularity, so finish reading manually.
         stream.finish();
-        return block;
+        return columns;
     }
 
+    columns.resize(merge_tree_reader->getColumns().size());
+
     auto & rows_per_granule = result.rowsPerGranule();
     auto & started_ranges = result.startedRanges();
 
-    size_t added_rows = 0;
     size_t next_range_to_start = 0;
 
     auto size = rows_per_granule.size();
@@ -627,25 +639,25 @@ Block MergeTreeRangeReader::continueReadingChain(ReadResult & result)
         if (next_range_to_start < started_ranges.size()
             && i == started_ranges[next_range_to_start].num_granules_read_before_start)
         {
-            added_rows += stream.finalize(block);
+            num_rows += stream.finalize(columns);
             auto & range = started_ranges[next_range_to_start].range;
             ++next_range_to_start;
             stream = Stream(range.begin, range.end, merge_tree_reader);
         }
 
         bool last = i + 1 == size;
-        added_rows += stream.read(block, rows_per_granule[i], !last);
+        num_rows += stream.read(columns, rows_per_granule[i], !last);
     }
 
     stream.skip(result.numRowsToSkipInLastGranule());
-    added_rows += stream.finalize(block);
+    num_rows += stream.finalize(columns);
 
     /// added_rows may be zero if all columns were read in prewhere and it's ok.
-    if (added_rows && added_rows != result.totalRowsPerGranule())
-        throw Exception("RangeReader read " + toString(added_rows) + " rows, but "
+    if (num_rows && num_rows != result.totalRowsPerGranule())
+        throw Exception("RangeReader read " + toString(num_rows) + " rows, but "
                         + toString(result.totalRowsPerGranule()) + " expected.", ErrorCodes::LOGICAL_ERROR);
 
-    return block;
+    return columns;
 }
 
 void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result)
@@ -653,14 +665,38 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
     if (!prewhere_actions)
         return;
 
-    if (alias_actions)
-        alias_actions->execute(result.block);
+    auto & header = merge_tree_reader->getColumns();
+    size_t num_columns = header.size();
 
-    prewhere_actions->execute(result.block);
-    auto & prewhere_column = result.block.getByName(*prewhere_column_name);
-    size_t prev_rows = result.block.rows();
-    ColumnPtr filter = prewhere_column.column;
-    prewhere_column.column = nullptr;
+    if (result.columns.size() != num_columns)
+        throw Exception("Invalid number of columns passed to MergeTreeRangeReader. "
+                        "Expected " + toString(num_columns) + ", "
+                        "got " + toString(result.columns.size()), ErrorCodes::LOGICAL_ERROR);
+
+    ColumnPtr filter;
+    size_t prewhere_column_pos;
+
+    {
+        /// Restore block from columns list.
+        Block block;
+        auto name_and_type = header.begin();
+        for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
+            block.insert({result.columns[pos], name_and_type->type, name_and_type->name});
+
+        if (alias_actions)
+            alias_actions->execute(block);
+
+        prewhere_actions->execute(block);
+
+        prewhere_column_pos = block.getPositionByName(*prewhere_column_name);
+
+        result.columns.clear();
+        result.columns.resize(block.columns());
+        for (auto & col : block)
+            result.columns.emplace_back(std::move(col.column));
+
+        filter.swap(result.columns[prewhere_column_pos]);
+    }
 
     if (result.getFilter())
     {
@@ -677,46 +713,57 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
     bool filter_always_true = !result.getFilter() && result.totalRowsPerGranule() == filter->size();
 
     if (result.totalRowsPerGranule() == 0)
-        result.block.clear();
+    {
+        result.columns.clear();
+        result.num_rows = 0;
+    }
     else if (!filter_always_true)
     {
         FilterDescription filter_description(*filter);
 
+        size_t num_bytes_in_filter = 0;
+        bool calculated_num_bytes_in_filter = false;
+
+        auto getNumBytesInFilter = [&]()
+        {
+            if (!calculated_num_bytes_in_filter)
+                num_bytes_in_filter = countBytesInFilter(*filter_description.data);
+
+            calculated_num_bytes_in_filter = true;
+            return num_bytes_in_filter;
+        };
+
         if (last_reader_in_chain)
         {
-            size_t num_bytes_in_filter = countBytesInFilter(*filter_description.data);
-            if (num_bytes_in_filter == 0)
-                result.block.clear();
-            else if (num_bytes_in_filter == filter->size())
+            size_t bytes_in_filter = getNumBytesInFilter();
+            if (bytes_in_filter == 0)
+            {
+                result.columns.clear();
+                result.num_rows = 0;
+            }
+            else if (bytes_in_filter == filter->size())
                 filter_always_true = true;
         }
 
         if (!filter_always_true)
-            filterBlock(result.block, *filter_description.data);
+        {
+            filterColumns(result.columns, *filter_description.data);
+
+            if (result.columns.empty())
+                result.num_rows = getNumBytesInFilter();
+            else
+                result.num_rows = result.columns[0]->size();
+        }
     }
 
-    if (!result.block)
+    if (result.num_rows == 0)
         return;
 
-    auto getNumRows = [&]()
-    {
-        /// If block has single column, it's filter. We need to count bytes in it in order to get the number of rows.
-        if (result.block.columns() > 1)
-            return result.block.rows();
-        else if (result.getFilter())
-            return countBytesInFilter(result.getFilter()->getData());
-        else
-            return prev_rows;
-    };
-
     if (remove_prewhere_column)
-        result.block.erase(*prewhere_column_name);
+        result.columns.erase(result.columns.begin() + prewhere_column_pos);
     else
-        prewhere_column.column = prewhere_column.type->createColumnConst(getNumRows(), 1u);
-
-    /// If block is empty, create column in order to store rows number.
-    if (last_reader_in_chain && result.block.columns() == 0)
-        result.block.insert({ColumnNothing::create(getNumRows()), std::make_shared<DataTypeNothing>(), "_nothing"});
+        result.columns[prewhere_column_pos] =
+                DataTypeUInt8().createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst();
 }
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
index 4261509d7fc..d3f1333289b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
@@ -47,10 +47,10 @@ public:
         /// Returns the number of rows added to block.
         /// NOTE: have to return number of rows because block has broken invariant:
         ///       some columns may have different size (for example, default columns may be zero size).
-        size_t read(Block & block, size_t from_mark, size_t offset, size_t num_rows);
+        size_t read(Columns & columns, size_t from_mark, size_t offset, size_t num_rows);
 
         /// Skip extra rows to current_offset and perform actual reading
-        size_t finalize(Block & block);
+        size_t finalize(Columns & columns);
 
         bool isFinished() const { return is_finished; }
 
@@ -69,7 +69,7 @@ public:
 
         /// Current position from the begging of file in rows
         size_t position() const;
-        size_t readRows(Block & block, size_t num_rows);
+        size_t readRows(Columns & columns, size_t num_rows);
     };
 
     /// Very thin wrapper for DelayedStream
@@ -81,8 +81,8 @@ public:
         Stream(size_t from_mark, size_t to_mark, MergeTreeReader * merge_tree_reader);
 
         /// Returns the number of rows added to block.
-        size_t read(Block & block, size_t num_rows, bool skip_remaining_rows_in_current_granule);
-        size_t finalize(Block & block);
+        size_t read(Columns & columns, size_t num_rows, bool skip_remaining_rows_in_current_granule);
+        size_t finalize(Columns & columns);
         void skip(size_t num_rows);
 
         void finish() { current_mark = last_mark; }
@@ -112,7 +112,7 @@ public:
 
         void checkNotFinished() const;
         void checkEnoughSpaceInCurrentGranule(size_t num_rows) const;
-        size_t readRows(Block & block, size_t num_rows);
+        size_t readRows(Columns & columns, size_t num_rows);
         void toNextMark();
     };
 
@@ -143,7 +143,7 @@ public:
         /// Filter you need to apply to newly-read columns in order to add them to block.
         const ColumnUInt8 * getFilter() const { return filter; }
 
-        void addGranule(size_t num_rows);
+        void addGranule(size_t num_rows_);
         void adjustLastGranule();
         void addRows(size_t rows) { num_read_rows += rows; }
         void addRange(const MarkRange & range) { started_ranges.push_back({rows_per_granule.size(), range}); }
@@ -158,6 +158,7 @@ public:
         void addNumBytesRead(size_t count) { num_bytes_read += count; }
 
         Columns columns;
+        size_t num_rows = 0;
 
     private:
         RangesInfo started_ranges;
@@ -187,9 +188,9 @@ public:
 private:
 
     ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges);
-    Block continueReadingChain(ReadResult & result);
+    Columns continueReadingChain(ReadResult & result, size_t & num_rows);
     void executePrewhereActionsAndFilterColumns(ReadResult & result);
-    void filterBlock(Block & block, const IColumn::Filter & filter) const;
+    void filterColumns(Columns & columns, const IColumn::Filter & filter) const;
 
     MergeTreeReader * merge_tree_reader = nullptr;
     const MergeTreeIndexGranularity * index_granularity = nullptr;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp
index d9732c8ac6f..cab963d0f66 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp
@@ -6,7 +6,6 @@
 #include <Interpreters/evaluateMissingDefaults.h>
 #include <Storages/MergeTree/MergeTreeReader.h>
 #include <Common/typeid_cast.h>
-#include <Poco/File.h>
 
 
 namespace DB
@@ -31,16 +30,30 @@ namespace ErrorCodes
 MergeTreeReader::~MergeTreeReader() = default;
 
 
-MergeTreeReader::MergeTreeReader(const String & path_,
-    const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_,
-    UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, bool save_marks_in_cache_,
-    const MergeTreeData & storage_, const MarkRanges & all_mark_ranges_,
-    size_t aio_threshold_, size_t max_read_buffer_size_, const ValueSizeMap & avg_value_size_hints_,
+MergeTreeReader::MergeTreeReader(
+    String path_,
+    MergeTreeData::DataPartPtr data_part_,
+    NamesAndTypesList columns_,
+    UncompressedCache * uncompressed_cache_,
+    MarkCache * mark_cache_,
+    bool save_marks_in_cache_,
+    const MergeTreeData & storage_,
+    MarkRanges all_mark_ranges_,
+    size_t aio_threshold_,
+    size_t max_read_buffer_size_,
+    ValueSizeMap avg_value_size_hints_,
     const ReadBufferFromFileBase::ProfileCallback & profile_callback_,
     clockid_t clock_type_)
-    : data_part(data_part_), avg_value_size_hints(avg_value_size_hints_), path(path_), columns(columns_)
-    , uncompressed_cache(uncompressed_cache_), mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_), storage(storage_)
-    , all_mark_ranges(all_mark_ranges_), aio_threshold(aio_threshold_), max_read_buffer_size(max_read_buffer_size_)
+    : data_part(std::move(data_part_))
+    , avg_value_size_hints(std::move(avg_value_size_hints_))
+    , path(std::move(path_)), columns(std::move(columns_))
+    , uncompressed_cache(uncompressed_cache_)
+    , mark_cache(mark_cache_)
+    , save_marks_in_cache(save_marks_in_cache_)
+    , storage(storage_)
+    , all_mark_ranges(std::move(all_mark_ranges_))
+    , aio_threshold(aio_threshold_)
+    , max_read_buffer_size(max_read_buffer_size_)
 {
     try
     {
@@ -61,34 +74,44 @@ const MergeTreeReader::ValueSizeMap & MergeTreeReader::getAvgValueSizeHints() co
 }
 
 
-size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res)
+size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
 {
     size_t read_rows = 0;
     try
     {
+        size_t num_columns = columns.size();
+
+        if (res_columns.size() != num_columns)
+            throw Exception("invalid number of columns passed to MergeTreeReader::readRows. "
+                            "Expected " + toString(num_columns) + ", "
+                            "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR);
+
         /// Pointers to offset columns that are common to the nested data structure columns.
         /// If append is true, then the value will be equal to nullptr and will be used only to
         /// check that the offsets column has been already read.
         OffsetColumns offset_columns;
 
-        for (const NameAndTypePair & it : columns)
+        auto name_and_type = columns.begin();
+        for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
         {
+            auto & [name, type] = *name_and_type;
+
             /// The column is already present in the block so we will append the values to the end.
-            bool append = res.has(it.name);
+            bool append = res_columns[pos] != nullptr;
             if (!append)
-                res.insert(ColumnWithTypeAndName(it.type->createColumn(), it.type, it.name));
+                res_columns[pos] = name_and_type->type->createColumn();
 
             /// To keep offsets shared. TODO Very dangerous. Get rid of this.
-            MutableColumnPtr column = res.getByName(it.name).column->assumeMutable();
+            MutableColumnPtr column = res_columns[pos]->assumeMutable();
 
             bool read_offsets = true;
 
             /// For nested data structures collect pointers to offset columns.
-            if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(it.type.get()))
+            if (const auto * type_arr = typeid_cast<const DataTypeArray *>(type.get()))
             {
-                String name = Nested::extractTableName(it.name);
+                String table_name = Nested::extractTableName(name);
 
-                auto it_inserted = offset_columns.emplace(name, nullptr);
+                auto it_inserted = offset_columns.emplace(table_name, nullptr);
 
                 /// offsets have already been read on the previous iteration and we don't need to read it again
                 if (!it_inserted.second)
@@ -108,27 +131,28 @@ size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t
             {
                 size_t column_size_before_reading = column->size();
 
-                readData(it.name, *it.type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets);
+                readData(name, *type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets);
 
                 /// For elements of Nested, column_size_before_reading may be greater than column size
                 ///  if offsets are not empty and were already read, but elements are empty.
-                if (column->size())
+                if (!column->empty())
                     read_rows = std::max(read_rows, column->size() - column_size_before_reading);
             }
             catch (Exception & e)
             {
                 /// Better diagnostics.
-                e.addMessage("(while reading column " + it.name + ")");
+                e.addMessage("(while reading column " + name + ")");
                 throw;
             }
 
-            if (column->size())
-                res.getByName(it.name).column = std::move(column);
+            if (column->empty())
+                res_columns[pos] = nullptr;
             else
-                res.erase(it.name);
+                res_columns[pos] = std::move(column);
         }
 
-        /// NOTE: positions for all streams must be kept in sync. In particular, even if for some streams there are no rows to be read,
+        /// NOTE: positions for all streams must be kept in sync.
+        /// In particular, even if for some streams there are no rows to be read,
         /// you must ensure that no seeks are skipped and at this point they all point to to_mark.
     }
     catch (Exception & e)
@@ -137,7 +161,9 @@ size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t
             storage.reportBrokenPart(data_part->name);
 
         /// Better diagnostics.
-        e.addMessage("(while reading from part " + path + " from mark " + toString(from_mark) + " with max_rows_to_read = " + toString(max_rows_to_read) + ")");
+        e.addMessage("(while reading from part " + path + " "
+                     "from mark " + toString(from_mark) + " "
+                     "with max_rows_to_read = " + toString(max_rows_to_read) + ")");
         throw;
     }
     catch (...)
@@ -235,7 +261,7 @@ void MergeTreeReader::readData(
 
 static bool arrayHasNoElementsRead(const IColumn & column)
 {
-    const ColumnArray * column_array = typeid_cast<const ColumnArray *>(&column);
+    const auto * column_array = typeid_cast<const ColumnArray *>(&column);
 
     if (!column_array)
         return false;
@@ -253,22 +279,31 @@ static bool arrayHasNoElementsRead(const IColumn & column)
 }
 
 
-void MergeTreeReader::fillMissingColumns(Block & res, bool & should_reorder, bool & should_evaluate_missing_defaults, size_t num_rows)
+void MergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows)
 {
     try
     {
+        size_t num_columns = columns.size();
+
+        if (res_columns.size() != num_columns)
+            throw Exception("invalid number of columns passed to MergeTreeReader::fillMissingColumns. "
+                            "Expected " + toString(num_columns) + ", "
+                            "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR);
+
         /// For a missing column of a nested data structure we must create not a column of empty
         /// arrays, but a column of arrays of correct length.
 
         /// First, collect offset columns for all arrays in the block.
         OffsetColumns offset_columns;
-        for (size_t i = 0; i < res.columns(); ++i)
+        auto requested_column = columns.begin();
+        for (size_t i = 0; i < num_columns; ++i, ++requested_column)
         {
-            const ColumnWithTypeAndName & column = res.safeGetByPosition(i);
+            if (res_columns[i] == nullptr)
+                continue;
 
-            if (const ColumnArray * array = typeid_cast<const ColumnArray *>(column.column.get()))
+            if (const auto * array = typeid_cast<const ColumnArray *>(res_columns[i].get()))
             {
-                String offsets_name = Nested::extractTableName(column.name);
+                String offsets_name = Nested::extractTableName(requested_column->name);
                 auto & offsets_column = offset_columns[offsets_name];
 
                 /// If for some reason multiple offsets columns are present for the same nested data structure,
@@ -279,54 +314,43 @@ void MergeTreeReader::fillMissingColumns(Block & res, bool & should_reorder, boo
         }
 
         should_evaluate_missing_defaults = false;
-        should_reorder = false;
 
         /// insert default values only for columns without default expressions
-        for (const auto & requested_column : columns)
+        requested_column = columns.begin();
+        for (size_t i = 0; i < num_columns; ++i, ++requested_column)
         {
-            bool has_column = res.has(requested_column.name);
-            if (has_column)
-            {
-                const auto & col = *res.getByName(requested_column.name).column;
-                if (arrayHasNoElementsRead(col))
-                {
-                    res.erase(requested_column.name);
-                    has_column = false;
-                }
-            }
+            auto & [name, type] = *requested_column;
 
-            if (!has_column)
+            if (res_columns[i] && arrayHasNoElementsRead(*res_columns[i]))
+                res_columns[i] = nullptr;
+
+            if (res_columns[i])
             {
-                should_reorder = true;
-                if (storage.getColumns().hasDefault(requested_column.name))
+                if (storage.getColumns().hasDefault(name))
                 {
                     should_evaluate_missing_defaults = true;
                     continue;
                 }
 
-                ColumnWithTypeAndName column_to_add;
-                column_to_add.name = requested_column.name;
-                column_to_add.type = requested_column.type;
-
-                String offsets_name = Nested::extractTableName(column_to_add.name);
-                if (offset_columns.count(offsets_name))
+                String offsets_name = Nested::extractTableName(name);
+                auto offset_it = offset_columns.find(offsets_name);
+                if (offset_it != offset_columns.end())
                 {
-                    ColumnPtr offsets_column = offset_columns[offsets_name];
-                    DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column_to_add.type).getNestedType();
+                    ColumnPtr offsets_column = offset_it->second;
+                    DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType();
                     size_t nested_rows = typeid_cast<const ColumnUInt64 &>(*offsets_column).getData().back();
 
-                    ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst();
+                    ColumnPtr nested_column =
+                        nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst();
 
-                    column_to_add.column = ColumnArray::create(nested_column, offsets_column);
+                    res_columns[i] = ColumnArray::create(nested_column, offsets_column);
                 }
                 else
                 {
-                    /// We must turn a constant column into a full column because the interpreter could infer that it is constant everywhere
-                    /// but in some blocks (from other parts) it can be a full column.
-                    column_to_add.column = column_to_add.type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
+                    /// We must turn a constant column into a full column because the interpreter could infer
+                    /// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
+                    res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
                 }
-
-                res.insert(std::move(column_to_add));
             }
         }
     }
@@ -338,34 +362,35 @@ void MergeTreeReader::fillMissingColumns(Block & res, bool & should_reorder, boo
     }
 }
 
-void MergeTreeReader::reorderColumns(Block & res, const Names & ordered_names, const String * filter_name)
+void MergeTreeReader::evaluateMissingDefaults(Columns & res_columns)
 {
     try
     {
-        Block ordered_block;
+        size_t num_columns = columns.size();
 
-        for (const auto & name : ordered_names)
-            if (res.has(name))
-                ordered_block.insert(res.getByName(name));
+        if (res_columns.size() != num_columns)
+            throw Exception("invalid number of columns passed to MergeTreeReader::fillMissingColumns. "
+                            "Expected " + toString(num_columns) + ", "
+                            "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR);
 
-        if (filter_name && !ordered_block.has(*filter_name) && res.has(*filter_name))
-            ordered_block.insert(res.getByName(*filter_name));
+        /// Convert columns list to block.
+        /// TODO: rewrite with columns interface. It wll be possible after changes in ExpressionActions.
+        Block block;
+        auto name_and_type = columns.begin();
+        for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
+        {
+            if (res_columns[pos] == nullptr)
+                continue;
 
-        std::swap(res, ordered_block);
-    }
-    catch (Exception & e)
-    {
-        /// Better diagnostics.
-        e.addMessage("(while reading from part " + path + ")");
-        throw;
-    }
-}
+            block.insert({res_columns[pos], name_and_type->type, name_and_type->name});
+        }
 
-void MergeTreeReader::evaluateMissingDefaults(Block & res)
-{
-    try
-    {
-        DB::evaluateMissingDefaults(res, columns, storage.getColumns().getDefaults(), storage.global_context);
+        DB::evaluateMissingDefaults(block, columns, storage.getColumns().getDefaults(), storage.global_context);
+
+        /// Move columns from block.
+        name_and_type = columns.begin();
+        for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
+            res_columns[pos] = std::move(block.getByName(name_and_type->name).column);
     }
     catch (Exception & e)
     {
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h
index 25f4c9ddd32..367f1bbb530 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReader.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h
@@ -19,14 +19,17 @@ public:
     using ValueSizeMap = std::map<std::string, double>;
     using DeserializeBinaryBulkStateMap = std::map<std::string, IDataType::DeserializeBinaryBulkStatePtr>;
 
-    MergeTreeReader(const String & path_, /// Path to the directory containing the part
-        const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_,
+    MergeTreeReader(String path_, /// Path to the directory containing the part
+        MergeTreeData::DataPartPtr data_part_,
+        NamesAndTypesList columns_,
         UncompressedCache * uncompressed_cache_,
         MarkCache * mark_cache_,
         bool save_marks_in_cache_,
-        const MergeTreeData & storage_, const MarkRanges & all_mark_ranges_,
-        size_t aio_threshold_, size_t max_read_buffer_size_,
-        const ValueSizeMap & avg_value_size_hints_ = ValueSizeMap{},
+        const MergeTreeData & storage_,
+        MarkRanges all_mark_ranges_,
+        size_t aio_threshold_,
+        size_t max_read_buffer_size_,
+        ValueSizeMap avg_value_size_hints_ = ValueSizeMap{},
         const ReadBufferFromFileBase::ProfileCallback & profile_callback_ = ReadBufferFromFileBase::ProfileCallback{},
         clockid_t clock_type_ = CLOCK_MONOTONIC_COARSE);
 
@@ -36,20 +39,17 @@ public:
 
     /// Add columns from ordered_names that are not present in the block.
     /// Missing columns are added in the order specified by ordered_names.
-    /// If at least one column was added, reorders all columns in the block according to ordered_names.
-    /// num_rows is needed in case block is empty.
-    void fillMissingColumns(Block & res, bool & should_reorder, bool & should_evaluate_missing_defaults, size_t num_rows);
-    /// Sort columns to ensure consistent order among all blocks.
-    /// If filter_name is not nullptr and block has filter column, move it to the end of block.
-    void reorderColumns(Block & res, const Names & ordered_names, const String * filter_name);
+    /// num_rows is needed in case if all res_columns are nullptr.
+    void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows);
     /// Evaluate defaulted columns if necessary.
-    void evaluateMissingDefaults(Block & res);
+    void evaluateMissingDefaults(Columns & res_columns);
 
     const NamesAndTypesList & getColumns() const { return columns; }
 
     /// Return the number of rows has been read or zero if there is no columns to read.
-    /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark
-    size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res);
+    /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark.
+    /// Fills res_columns in order specified in getColumns() list. If column was not read it will be nullptr.
+    size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns);
 
     MergeTreeData::DataPartPtr data_part;
 

From 01579296f13d4556b650368bd57cd4f6a9fbf202 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Wed, 25 Sep 2019 19:00:42 +0300
Subject: [PATCH 010/222] Link fixes.

---
 docs/en/operations/table_engines/hdfs.md                     | 2 +-
 docs/ru/development/build_cross.md                           | 1 +
 docs/ru/operations/table_engines/hdfs.md                     | 2 +-
 docs/ru/query_language/agg_functions/parametric_functions.md | 2 +-
 docs/ru/query_language/functions/other_functions.md          | 2 +-
 docs/ru/query_language/select.md                             | 2 +-
 docs/toc_en.yml                                              | 1 +
 docs/toc_ru.yml                                              | 1 +
 8 files changed, 8 insertions(+), 5 deletions(-)
 create mode 120000 docs/ru/development/build_cross.md

diff --git a/docs/en/operations/table_engines/hdfs.md b/docs/en/operations/table_engines/hdfs.md
index 1f6ecc50a79..9e2947341bf 100644
--- a/docs/en/operations/table_engines/hdfs.md
+++ b/docs/en/operations/table_engines/hdfs.md
@@ -58,7 +58,7 @@ Multiple path components can have globs. For being processed file should exists
 - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
 - `{N..M}` — Substitutes any number in range from N to M including both borders.
 
- Constructions with `{}` are similar to the [remote table function](../../query_language/table_functions/remote.md)).
+ Constructions with `{}` are similar to the [remote](../../query_language/table_functions/remote.md) table function.
 
 **Example**
 
diff --git a/docs/ru/development/build_cross.md b/docs/ru/development/build_cross.md
new file mode 120000
index 00000000000..f595f252de3
--- /dev/null
+++ b/docs/ru/development/build_cross.md
@@ -0,0 +1 @@
+../../en/development/build_cross.md
\ No newline at end of file
diff --git a/docs/ru/operations/table_engines/hdfs.md b/docs/ru/operations/table_engines/hdfs.md
index 303f0a07d19..b384eb3bf60 100644
--- a/docs/ru/operations/table_engines/hdfs.md
+++ b/docs/ru/operations/table_engines/hdfs.md
@@ -55,7 +55,7 @@ SELECT * FROM hdfs_engine_table LIMIT 2
 - `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
 - `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно.
 
-Конструкция с `{}` аналогична табличной функции [remote](remote.md).
+Конструкция с `{}` аналогична табличной функции [remote](../../query_language/table_functions/remote.md).
 
 **Пример**
 
diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md
index 5adf20dfce5..b0ece3ced11 100644
--- a/docs/ru/query_language/agg_functions/parametric_functions.md
+++ b/docs/ru/query_language/agg_functions/parametric_functions.md
@@ -45,7 +45,7 @@ FROM (
 └─────────────────────────────────────────────────────────────────────────┘
 ```
 
-С помощью функции [bar](../other_functions.md#function-bar) можно визуализировать гистограмму, например:
+С помощью функции [bar](../functions/other_functions.md#function-bar) можно визуализировать гистограмму, например:
 
 ```sql
 WITH histogram(5)(rand() % 100) AS hist
diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md
index 987840cac99..3cc56bb1217 100644
--- a/docs/ru/query_language/functions/other_functions.md
+++ b/docs/ru/query_language/functions/other_functions.md
@@ -117,7 +117,7 @@ SELECT visibleWidth(NULL)
 Функция кидает исключение, если таблица не существует.
 Для элементов вложенной структуры данных функция проверяет существование столбца. Для самой же вложенной структуры данных функция возвращает 0.
 
-## bar
+## bar {#function-bar}
 
 Позволяет построить unicode-art диаграмму.
 
diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md
index d206ba42c0b..61854066f32 100644
--- a/docs/ru/query_language/select.md
+++ b/docs/ru/query_language/select.md
@@ -92,7 +92,7 @@ FROM
 └───────────┴───────────┘
 ```
 
-### Секция FROM
+### Секция FROM {#select-from}
 
 Если секция FROM отсутствует, то данные будут читаться из таблицы `system.one`.
 Таблица `system.one` содержит ровно одну строку (то есть, эта таблица выполняет такую же роль, как таблица DUAL, которую можно найти в других СУБД).
diff --git a/docs/toc_en.yml b/docs/toc_en.yml
index dccd51f3cb1..b3a46303e49 100644
--- a/docs/toc_en.yml
+++ b/docs/toc_en.yml
@@ -210,6 +210,7 @@ nav:
   - 'Overview of ClickHouse Architecture': 'development/architecture.md'
   - 'How to Build ClickHouse on Linux': 'development/build.md'
   - 'How to Build ClickHouse on Mac OS X': 'development/build_osx.md'
+  - 'How to Build ClickHouse on Linux for Mac OS X': 'development/build_cross.md'
   - 'How to Write C++ code': 'development/style.md'
   - 'How to Run ClickHouse Tests': 'development/tests.md'
   - 'Third-Party Libraries Used': 'development/contrib.md'
diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml
index b21bcc838dc..98c7b27a746 100644
--- a/docs/toc_ru.yml
+++ b/docs/toc_ru.yml
@@ -211,6 +211,7 @@ nav:
   - 'Обзор архитектуры ClickHouse': 'development/architecture.md'
   - 'Как собрать ClickHouse на Linux': 'development/build.md'
   - 'Как собрать ClickHouse на Mac OS X': 'development/build_osx.md'
+  - 'Как собрать ClickHouse на Linux для Mac OS X': 'development/build_cross.md'
   - 'Как писать код на C++': 'development/style.md'
   - 'Как запустить тесты': 'development/tests.md'
   - 'Сторонние библиотеки': 'development/contrib.md'

From b65fe57319073da8cd6769324aef9a1d6908686e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 26 Sep 2019 20:29:41 +0300
Subject: [PATCH 011/222] Update MergeTreeRangeReader.

---
 .../MergeTreeBaseSelectBlockInputStream.cpp   | 10 +++++-
 .../MergeTree/MergeTreeBlockReadUtils.cpp     | 36 +++++++++++--------
 .../MergeTree/MergeTreeBlockReadUtils.h       |  4 +--
 .../MergeTree/MergeTreeRangeReader.cpp        | 22 +++++++++---
 .../Storages/MergeTree/MergeTreeRangeReader.h |  9 ++---
 5 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
index 731624d1997..1f899c6b592 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
@@ -161,6 +161,14 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
     if (read_result.num_rows == 0)
         read_result.columns.clear();
 
+    auto & sample_block = getPort().getHeader();
+    if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size())
+        throw Exception("Inconsistent number of columns got from MergeTreeRangeReader. "
+                        "Have " + toString(sample_block.columns()) + " in sample block "
+                        "and " + toString(read_result.columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR);
+
+    /// TODO: check columns have the same types as in header.
+
     UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows;
 
     /// TODO
@@ -171,7 +179,7 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
         task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows);
 
         if (!read_result.columns.empty())
-            task->size_predictor->update(read_result.columns);
+            task->size_predictor->update(sample_block, read_result.columns, read_result.num_rows);
     }
 
     return Chunk(std::move(read_result.columns), read_result.num_rows);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
index 7dc9a40e89a..920697f3c32 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
@@ -84,22 +84,25 @@ MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor(
 {
     number_of_rows_in_part = data_part->rows_count;
     /// Initialize with sample block until update won't called.
-    initialize(sample_block, columns);
+    initialize(sample_block, {}, columns);
 }
 
-void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const Names & columns, bool from_update)
+void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const Columns & columns, const Names & names, bool from_update)
 {
     fixed_columns_bytes_per_row = 0;
     dynamic_columns_infos.clear();
 
     std::unordered_set<String> names_set;
     if (!from_update)
-        names_set.insert(columns.begin(), columns.end());
+        names_set.insert(names.begin(), names.end());
 
-    for (const auto & column_with_type_and_name : sample_block)
+    size_t num_columns = sample_block.columns();
+    for (size_t pos = 0; pos < num_columns; ++pos)
     {
+        const auto & column_with_type_and_name = sample_block.getByPosition(pos);
         const String & column_name = column_with_type_and_name.name;
-        const ColumnPtr & column_data = column_with_type_and_name.column;
+        const ColumnPtr & column_data = from_update ? columns[pos]
+                                                    : column_with_type_and_name.column;
 
         if (!from_update && !names_set.count(column_name))
             continue;
@@ -151,25 +154,30 @@ void MergeTreeBlockSizePredictor::startBlock()
 
 
 /// TODO: add last_read_row_in_part parameter to take into account gaps between adjacent ranges
-void MergeTreeBlockSizePredictor::update(const Block & block, double decay)
+void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay)
 {
+    if (columns.size() != sample_block.columns())
+        throw Exception("Inconsistent number of columns passed to MergeTreeBlockSizePredictor. "
+                        "Have " + toString(sample_block.columns()) + " in sample block "
+                        "and " + toString(columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR);
+
     if (!is_initialized_in_update)
     {
         /// Reinitialize with read block to update estimation for DEFAULT and MATERIALIZED columns without data.
-        initialize(block, {}, true);
+        initialize(sample_block, columns, {}, true);
         is_initialized_in_update = true;
     }
-    size_t new_rows = block.rows();
-    if (new_rows < block_size_rows)
+
+    if (num_rows < block_size_rows)
     {
-        throw Exception("Updated block has less rows (" + toString(new_rows) + ") than previous one (" + toString(block_size_rows) + ")",
+        throw Exception("Updated block has less rows (" + toString(num_rows) + ") than previous one (" + toString(block_size_rows) + ")",
                         ErrorCodes::LOGICAL_ERROR);
     }
 
-    size_t diff_rows = new_rows - block_size_rows;
-    block_size_bytes = new_rows * fixed_columns_bytes_per_row;
+    size_t diff_rows = num_rows - block_size_rows;
+    block_size_bytes = num_rows * fixed_columns_bytes_per_row;
     bytes_per_row_current = fixed_columns_bytes_per_row;
-    block_size_rows = new_rows;
+    block_size_rows = num_rows;
 
     /// Make recursive updates for each read row: v_{i+1} = (1 - decay) v_{i} + decay v_{target}
     /// Use sum of geometric sequence formula to update multiple rows: v{n} = (1 - decay)^n v_{0} + (1 - (1 - decay)^n) v_{target}
@@ -179,7 +187,7 @@ void MergeTreeBlockSizePredictor::update(const Block & block, double decay)
     max_size_per_row_dynamic = 0;
     for (auto & info : dynamic_columns_infos)
     {
-        size_t new_size = block.getByName(info.name).column->byteSize();
+        size_t new_size = columns[sample_block.getPositionByName(info.name)]->byteSize();
         size_t diff_size = new_size - info.size_bytes;
 
         double local_bytes_per_row = static_cast<double>(diff_size) / diff_rows;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
index a031255b3ab..19c6adbd9c7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
@@ -85,7 +85,7 @@ struct MergeTreeBlockSizePredictor
     void startBlock();
 
     /// Updates statistic for more accurate prediction
-    void update(const Block & block, double decay = DECAY());
+    void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = DECAY());
 
     /// Return current block size (after update())
     inline size_t getBlockSize() const
@@ -148,7 +148,7 @@ protected:
 
     bool is_initialized_in_update = false;
 
-    void initialize(const Block & sample_block, const Names & columns, bool from_update = false);
+    void initialize(const Block & sample_block, const Columns & columns, const Names & names, bool from_update = false);
 
 public:
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 99d83789f45..8cac9fcfad8 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -1,7 +1,6 @@
 #include <Storages/MergeTree/MergeTreeReader.h>
 #include <Columns/FilterDescription.h>
 #include <Columns/ColumnsCommon.h>
-#include <Columns/ColumnNothing.h>
 #include <ext/range.h>
 #include <DataTypes/DataTypeNothing.h>
 
@@ -409,14 +408,27 @@ void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter)
 MergeTreeRangeReader::MergeTreeRangeReader(
         MergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_,
         ExpressionActionsPtr alias_actions_, ExpressionActionsPtr prewhere_actions_,
-        const String * prewhere_column_name_, const Names * ordered_names_,
-        bool always_reorder_, bool remove_prewhere_column_, bool last_reader_in_chain_)
+        const String * prewhere_column_name_, bool remove_prewhere_column_, bool last_reader_in_chain_)
         : merge_tree_reader(merge_tree_reader_), index_granularity(&(merge_tree_reader->data_part->index_granularity))
         , prev_reader(prev_reader_), prewhere_column_name(prewhere_column_name_)
-        , ordered_names(ordered_names_), alias_actions(std::move(alias_actions_)), prewhere_actions(std::move(prewhere_actions_))
-        , always_reorder(always_reorder_), remove_prewhere_column(remove_prewhere_column_)
+        , alias_actions(std::move(alias_actions_)), prewhere_actions(std::move(prewhere_actions_))
+        , remove_prewhere_column(remove_prewhere_column_)
         , last_reader_in_chain(last_reader_in_chain_), is_initialized(true)
 {
+    if (prev_reader)
+        sample_block = prev_reader->getSampleBlock();
+
+    for (auto & name_and_type : merge_tree_reader->getColumns())
+        sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name});
+
+    if (alias_actions)
+        alias_actions->execute(sample_block, true);
+
+    if (prewhere_actions)
+        prewhere_actions->execute(sample_block, true);
+
+    if (remove_prewhere_column)
+        sample_block.erase(*prewhere_column_name);
 }
 
 bool MergeTreeRangeReader::isReadingFinished() const
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
index d3f1333289b..67d5cbc3908 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h
@@ -22,8 +22,7 @@ class MergeTreeRangeReader
 public:
     MergeTreeRangeReader(MergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_,
                          ExpressionActionsPtr alias_actions_, ExpressionActionsPtr prewhere_actions_,
-                         const String * prewhere_column_name_, const Names * ordered_names_,
-                         bool always_reorder_, bool remove_prewhere_column_, bool last_reader_in_chain_);
+                         const String * prewhere_column_name_, bool remove_prewhere_column_, bool last_reader_in_chain_);
 
     MergeTreeRangeReader() = default;
 
@@ -185,6 +184,8 @@ public:
 
     ReadResult read(size_t max_rows, MarkRanges & ranges);
 
+    const Block & getSampleBlock() const { return sample_block; }
+
 private:
 
     ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges);
@@ -197,13 +198,13 @@ private:
     MergeTreeRangeReader * prev_reader = nullptr; /// If not nullptr, read from prev_reader firstly.
 
     const String * prewhere_column_name = nullptr;
-    const Names * ordered_names = nullptr;
     ExpressionActionsPtr alias_actions = nullptr; /// If not nullptr, calculate aliases.
     ExpressionActionsPtr prewhere_actions = nullptr; /// If not nullptr, calculate filter.
 
     Stream stream;
 
-    bool always_reorder = true;
+    Block sample_block;
+
     bool remove_prewhere_column = false;
     bool last_reader_in_chain = false;
     bool is_initialized = false;

From 1689576770a0909043b4cb72b964ded31425d557 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 1 Oct 2019 19:50:08 +0300
Subject: [PATCH 012/222] Update MergeTreeDataSelectExecutor.

---
 .../Transforms/AddingConstColumnTransform.h   |  40 +++++
 .../Transforms/ReverseTransform.cpp           |  22 +++
 .../Processors/Transforms/ReverseTransform.h  |  17 ++
 ...m.cpp => MergeTreeBaseSelectProcessor.cpp} |  54 +++---
 ...tream.h => MergeTreeBaseSelectProcessor.h} |   6 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 163 +++++++++++++-----
 .../MergeTree/MergeTreeDataSelectExecutor.h   |   2 +-
 .../Storages/MergeTree/MergeTreeReadPool.cpp  |   2 +-
 ...pp => MergeTreeReverseSelectProcessor.cpp} |  90 +++++-----
 ...am.h => MergeTreeReverseSelectProcessor.h} |  16 +-
 ...tream.cpp => MergeTreeSelectProcessor.cpp} |  70 ++++----
 ...putStream.h => MergeTreeSelectProcessor.h} |  13 +-
 .../MergeTreeSequentialBlockInputStream.cpp   |  27 ++-
 ...geTreeThreadSelectBlockInputProcessor.cpp} |  18 +-
 ...ergeTreeThreadSelectBlockInputProcessor.h} |   8 +-
 .../MergeTree/StorageFromMergeTreeDataPart.h  |  13 +-
 .../Storages/StorageReplicatedMergeTree.cpp   |   2 +-
 .../src/Storages/StorageReplicatedMergeTree.h |   2 +-
 18 files changed, 373 insertions(+), 192 deletions(-)
 create mode 100644 dbms/src/Processors/Transforms/AddingConstColumnTransform.h
 create mode 100644 dbms/src/Processors/Transforms/ReverseTransform.cpp
 create mode 100644 dbms/src/Processors/Transforms/ReverseTransform.h
 rename dbms/src/Storages/MergeTree/{MergeTreeBaseSelectBlockInputStream.cpp => MergeTreeBaseSelectProcessor.cpp} (84%)
 rename dbms/src/Storages/MergeTree/{MergeTreeBaseSelectBlockInputStream.h => MergeTreeBaseSelectProcessor.h} (93%)
 rename dbms/src/Storages/MergeTree/{MergeTreeReverseSelectBlockInputStream.cpp => MergeTreeReverseSelectProcessor.cpp} (80%)
 rename dbms/src/Storages/MergeTree/{MergeTreeReverseSelectBlockInputStream.h => MergeTreeReverseSelectProcessor.h} (82%)
 rename dbms/src/Storages/MergeTree/{MergeTreeSelectBlockInputStream.cpp => MergeTreeSelectProcessor.cpp} (85%)
 rename dbms/src/Storages/MergeTree/{MergeTreeSelectBlockInputStream.h => MergeTreeSelectProcessor.h} (85%)
 rename dbms/src/Storages/MergeTree/{MergeTreeThreadSelectBlockInputStream.cpp => MergeTreeThreadSelectBlockInputProcessor.cpp} (84%)
 rename dbms/src/Storages/MergeTree/{MergeTreeThreadSelectBlockInputStream.h => MergeTreeThreadSelectBlockInputProcessor.h} (83%)

diff --git a/dbms/src/Processors/Transforms/AddingConstColumnTransform.h b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h
new file mode 100644
index 00000000000..aea9ee392b5
--- /dev/null
+++ b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h
@@ -0,0 +1,40 @@
+#pragma once
+#include <Processors/ISimpleTransform.h>
+
+namespace DB
+{
+
+/// Adds a materialized const column to the chunk with a specified value.
+template <typename T>
+class AddingConstColumnTransform : public ISimpleTransform
+{
+public:
+    AddingConstColumnTransform(const Block & header, DataTypePtr data_type_, T value_, const String & column_name_)
+        : ISimpleTransform(header, addColumn(header, data_type_, column_name_), false)
+        , data_type(std::move(data_type_)), value(value_) {}
+
+    String getName() const override { return "AddingConstColumnTransform"; }
+
+protected:
+    void transform(Chunk & chunk) override
+    {
+        auto num_rows = chunk.getNumRows();
+        auto columns = chunk.detachColumns();
+
+        columns.emplace_back(data_type->createColumnConst(num_rows, value)->convertToFullColumnIfConst());
+
+        chunk.setColumns(std::move(columns), num_rows);
+    }
+
+private:
+    static Block addColumn(Block header, const DataTypePtr & data_type, const String & column_name)
+    {
+        header.insert({data_type->createColumn(), data_type, column_name});
+        return header;
+    }
+
+    DataTypePtr data_type;
+    T value;
+};
+
+}
diff --git a/dbms/src/Processors/Transforms/ReverseTransform.cpp b/dbms/src/Processors/Transforms/ReverseTransform.cpp
new file mode 100644
index 00000000000..eb2b39d26d1
--- /dev/null
+++ b/dbms/src/Processors/Transforms/ReverseTransform.cpp
@@ -0,0 +1,22 @@
+#include <Processors/Transforms/ReverseTransform.h>
+
+namespace DB
+{
+
+void ReverseTransform::transform(Chunk & chunk)
+{
+    IColumn::Permutation permutation;
+
+    size_t num_rows = chunk.getNumRows();
+    for (size_t i = 0; i < num_rows; ++i)
+        permutation.emplace_back(num_rows - 1 - i);
+
+    auto columns = chunk.detachColumns();
+
+    for (auto & column : columns)
+        column = column->permute(permutation, 0);
+
+    chunk.setColumns(std::move(columns), num_rows);
+}
+
+}
diff --git a/dbms/src/Processors/Transforms/ReverseTransform.h b/dbms/src/Processors/Transforms/ReverseTransform.h
new file mode 100644
index 00000000000..2e3eca25648
--- /dev/null
+++ b/dbms/src/Processors/Transforms/ReverseTransform.h
@@ -0,0 +1,17 @@
+#pragma once
+#include <Processors/ISimpleTransform.h>
+
+namespace DB
+{
+
+class ReverseTransform : public ISimpleTransform
+{
+public:
+    explicit ReverseTransform(const Block & header) : ISimpleTransform(header, header, false) {}
+    String getName() const override { return "ReverseTransform"; }
+
+protected:
+    void transform(Chunk & chunk) override;
+};
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
similarity index 84%
rename from dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
rename to dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 1f899c6b592..0f03a2c8f57 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -1,4 +1,4 @@
-#include <Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeRangeReader.h>
 #include <Storages/MergeTree/MergeTreeReader.h>
 #include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
@@ -19,7 +19,7 @@ namespace ErrorCodes
 }
 
 
-MergeTreeBaseSelectBlockInputProcessor::MergeTreeBaseSelectBlockInputProcessor(
+MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor(
     Block header,
     const MergeTreeData & storage_,
     const PrewhereInfoPtr & prewhere_info_,
@@ -47,7 +47,7 @@ MergeTreeBaseSelectBlockInputProcessor::MergeTreeBaseSelectBlockInputProcessor(
 }
 
 
-Chunk MergeTreeBaseSelectBlockInputProcessor::generate()
+Chunk MergeTreeBaseSelectProcessor::generate()
 {
     while (!isCancelled())
     {
@@ -67,7 +67,7 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::generate()
 }
 
 
-void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeReadTask & current_task)
+void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & current_task)
 {
     if (prewhere_info)
     {
@@ -76,8 +76,8 @@ void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeRea
             current_task.range_reader = MergeTreeRangeReader(
                 pre_reader.get(), nullptr,
                 prewhere_info->alias_actions, prewhere_info->prewhere_actions,
-                &prewhere_info->prewhere_column_name, &current_task.ordered_names,
-                current_task.should_reorder, current_task.remove_prewhere_column, true);
+                &prewhere_info->prewhere_column_name,
+                current_task.remove_prewhere_column, true);
         }
         else
         {
@@ -87,26 +87,26 @@ void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeRea
                 current_task.pre_range_reader = MergeTreeRangeReader(
                     pre_reader.get(), nullptr,
                     prewhere_info->alias_actions, prewhere_info->prewhere_actions,
-                    &prewhere_info->prewhere_column_name, &current_task.ordered_names,
-                    current_task.should_reorder, current_task.remove_prewhere_column, false);
+                    &prewhere_info->prewhere_column_name,
+                    current_task.remove_prewhere_column, false);
                 pre_reader_ptr = &current_task.pre_range_reader;
             }
 
             current_task.range_reader = MergeTreeRangeReader(
                 reader.get(), pre_reader_ptr, nullptr, nullptr,
-                nullptr, &current_task.ordered_names, true, false, true);
+                nullptr, false, true);
         }
     }
     else
     {
         current_task.range_reader = MergeTreeRangeReader(
             reader.get(), nullptr, nullptr, nullptr,
-            nullptr, &current_task.ordered_names, current_task.should_reorder, false, true);
+            nullptr, false, true);
     }
 }
 
 
-Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
+Chunk MergeTreeBaseSelectProcessor::readFromPartImpl()
 {
     if (task->size_predictor)
         task->size_predictor->startBlock();
@@ -171,8 +171,7 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
 
     UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows;
 
-    /// TODO
-    /// progressImpl({ read_result.numReadRows(), read_result.numBytesRead() });
+    /// TODO: progressImpl({ read_result.numReadRows(), read_result.numBytesRead() });
 
     if (task->size_predictor)
     {
@@ -182,11 +181,26 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl()
             task->size_predictor->update(sample_block, read_result.columns, read_result.num_rows);
     }
 
-    return Chunk(std::move(read_result.columns), read_result.num_rows);
+    if (read_result.num_rows == 0)
+        return {};
+
+    auto & header = getPort().getHeader();
+    Columns ordered_columns;
+    size_t num_virtual_columns = virt_column_names.size();
+    ordered_columns.reserve(header.columns() - num_virtual_columns);
+
+    /// Reorder columns. TODO: maybe skip for default case.
+    for (size_t ps = 0; ps + num_virtual_columns < header.columns(); ++ps)
+    {
+        auto pos_in_sample_block = sample_block.getPositionByName(header.getByPosition(ps).name);
+        ordered_columns.emplace_back(std::move(read_result.columns[pos_in_sample_block]));
+    }
+
+    return Chunk(std::move(ordered_columns), read_result.num_rows);
 }
 
 
-Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPart()
+Chunk MergeTreeBaseSelectProcessor::readFromPart()
 {
     if (!task->range_reader.isInitialized())
         initializeRangeReaders(*task);
@@ -267,13 +281,13 @@ namespace
     };
 }
 
-void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns)
+void MergeTreeBaseSelectProcessor::injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns)
 {
     InsertIntoBlockCallback callback { block };
     injectVirtualColumnsImpl(block.rows(), callback, task, virtual_columns);
 }
 
-void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns)
+void MergeTreeBaseSelectProcessor::injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns)
 {
     UInt64 num_rows = chunk.getNumRows();
     auto columns = chunk.detachColumns();
@@ -284,7 +298,7 @@ void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Chunk & chunk,
     chunk.setColumns(columns, num_rows);
 }
 
-void MergeTreeBaseSelectBlockInputProcessor::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info)
+void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info)
 {
     if (prewhere_info)
     {
@@ -300,7 +314,7 @@ void MergeTreeBaseSelectBlockInputProcessor::executePrewhereActions(Block & bloc
     }
 }
 
-Block MergeTreeBaseSelectBlockInputProcessor::getHeader(
+Block MergeTreeBaseSelectProcessor::getHeader(
     Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns)
 {
     executePrewhereActions(block, prewhere_info);
@@ -309,6 +323,6 @@ Block MergeTreeBaseSelectBlockInputProcessor::getHeader(
 }
 
 
-MergeTreeBaseSelectBlockInputProcessor::~MergeTreeBaseSelectBlockInputProcessor() = default;
+MergeTreeBaseSelectProcessor::~MergeTreeBaseSelectProcessor() = default;
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
similarity index 93%
rename from dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h
rename to dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
index 0abbb2d001c..0197d481f13 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@@ -16,10 +16,10 @@ class MarkCache;
 
 
 /// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream
-class MergeTreeBaseSelectBlockInputProcessor : public ISource
+class MergeTreeBaseSelectProcessor : public ISource
 {
 public:
-    MergeTreeBaseSelectBlockInputProcessor(
+    MergeTreeBaseSelectProcessor(
         Block header,
         const MergeTreeData & storage_,
         const PrewhereInfoPtr & prewhere_info_,
@@ -32,7 +32,7 @@ public:
         bool save_marks_in_cache_ = true,
         const Names & virt_column_names_ = {});
 
-    ~MergeTreeBaseSelectBlockInputProcessor() override;
+    ~MergeTreeBaseSelectProcessor() override;
 
     static void executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info);
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 95f76a4c7f7..af410c6ed14 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -5,10 +5,10 @@
 
 #include <Common/FieldVisitors.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
-#include <Storages/MergeTree/MergeTreeSelectBlockInputStream.h>
-#include <Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
-#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h>
 #include <Storages/MergeTree/MergeTreeIndices.h>
 #include <Storages/MergeTree/MergeTreeIndexReader.h>
 #include <Storages/MergeTree/KeyCondition.h>
@@ -53,7 +53,13 @@ namespace std
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <Storages/VirtualColumnUtils.h>
-
+#include <Processors/Transforms/FilterTransform.h>
+#include <Processors/Transforms/AddingConstColumnTransform.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/ReverseTransform.h>
+#include <Processors/Transforms/MergingSortedTransform.h>
+#include <Processors/Executors/TreeExecutor.h>
+#include <Processors/Sources/SourceFromInputStream.h>
 
 namespace ProfileEvents
 {
@@ -624,18 +630,36 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
     }
 
     if (use_sampling)
-        for (auto & stream : res)
-            stream = std::make_shared<FilterBlockInputStream>(stream, filter_expression, filter_function->getColumnName());
+    {
+        for (auto & pipe : res)
+        {
+            auto & output = pipe.back()->getOutputs().front();
+            pipe.emplace_back(std::make_shared<FilterTransform>(output.getHeader(), filter_expression, filter_function->getColumnName(), false));
+            connect(output, pipe.back()->getInputs().front());
+        }
+    }
 
     /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
     if (sample_factor_column_queried)
-        for (auto & stream : res)
-            stream = std::make_shared<AddingConstColumnBlockInputStream<Float64>>(
-                stream, std::make_shared<DataTypeFloat64>(), used_sample_factor, "_sample_factor");
+    {
+        for (auto & pipe : res)
+        {
+            auto & output = pipe.back()->getOutputs().front();
+            pipe.emplace_back(std::make_shared<AddingConstColumnTransform<Float64>>(
+                    output.getHeader(), std::make_shared<DataTypeFloat64>(), used_sample_factor, "_sample_factor"));
+            connect(output, pipe.back()->getInputs().front());
+        }
+    }
 
     if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions)
-        for (auto & stream : res)
-            stream = std::make_shared<ExpressionBlockInputStream>(stream, query_info.prewhere_info->remove_columns_actions);
+    {
+        for (auto & pipe : res)
+        {
+            auto & output = pipe.back()->getOutputs().front();
+            pipe.emplace_back(std::make_shared<ExpressionTransform>(
+                    output.getHeader(), query_info.prewhere_info->remove_columns_actions));
+        }
+    }
 
     return res;
 }
@@ -724,15 +748,16 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
 
         for (size_t i = 0; i < num_streams; ++i)
         {
-            res.emplace_back(std::make_shared<MergeTreeThreadSelectBlockInputStream>(
+            res.push_back({std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
                 i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes,
                 settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache,
-                query_info.prewhere_info, settings, virt_columns));
+                query_info.prewhere_info, settings, virt_columns)});
 
             if (i == 0)
             {
                 /// Set the approximate number of rows for the first source only
-                res.front()->addTotalRowsApprox(total_rows);
+                /// TODO
+                /// res.front()->addTotalRowsApprox(total_rows);
             }
         }
     }
@@ -800,13 +825,13 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
                     parts.emplace_back(part);
                 }
 
-                BlockInputStreamPtr source_stream = std::make_shared<MergeTreeSelectBlockInputStream>(
+                auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
                     data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
                     settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part,
                     use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io,
                     settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query);
 
-                res.push_back(source_stream);
+                res.push_back({std::move(source_processor)});
             }
         }
 
@@ -865,10 +890,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     if (sum_marks > max_marks_to_use_cache)
         use_uncompressed_cache = false;
 
-    BlockInputStreams streams;
+    Pipes pipes;
 
     if (sum_marks == 0)
-        return streams;
+        return pipes;
 
     /// Let's split ranges to avoid reading much data.
     auto split_ranges = [rows_granularity = data_settings->index_granularity, max_block_size](const auto & ranges, int direction)
@@ -922,7 +947,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     {
         size_t need_marks = min_marks_per_stream;
 
-        BlockInputStreams streams_per_thread;
+        std::vector<OutputPort *> streams_per_thread;
+        Processors pipe;
 
         /// Loop over parts.
         /// We will iteratively take part or some subrange of a part from the back
@@ -982,27 +1008,29 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
 
             ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, sorting_info->direction);
 
-            BlockInputStreamPtr source_stream;
             if (sorting_info->direction == 1)
             {
-                source_stream = std::make_shared<MergeTreeSelectBlockInputStream>(
+                pipe.push_back({std::make_shared<MergeTreeSelectProcessor>(
                     data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
                     settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part,
                     use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io,
-                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query);
+                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)});
             }
             else
             {
-                source_stream = std::make_shared<MergeTreeReverseSelectBlockInputStream>(
+                pipe.push_back({std::make_shared<MergeTreeReverseSelectProcessor>(
                     data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
                     settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part,
                     use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io,
-                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query);
+                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)});
 
-                source_stream = std::make_shared<ReverseBlockInputStream>(source_stream);
+                auto & output = pipe.back()->getOutputs().front();
+                auto reverse_processor = std::make_shared<ReverseTransform>(output.getHeader());
+                connect(output, reverse_processor->getInputs().front());
+                pipe.emplace_back(std::move(reverse_processor));
             }
 
-            streams_per_thread.push_back(source_stream);
+            streams_per_thread.emplace_back(&pipe.back()->getOutputs().front());
         }
 
         if (streams_per_thread.size() > 1)
@@ -1013,16 +1041,27 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
                     sorting_info->direction, 1);
 
             for (auto & stream : streams_per_thread)
-                stream = std::make_shared<ExpressionBlockInputStream>(stream, sorting_key_prefix_expr);
+            {
+                pipe.emplace_back(std::make_shared<ExpressionTransform>(stream->getHeader(), sorting_key_prefix_expr));
+                connect(*stream, pipe.back()->getInputs().front());
+                stream = &pipe.back()->getOutputs().front();
+            }
 
-            streams.push_back(std::make_shared<MergingSortedBlockInputStream>(
-                streams_per_thread, sort_description, max_block_size));
+            pipe.push_back(std::make_shared<MergingSortedTransform>(
+                streams_per_thread.back()->getHeader(), streams_per_thread.size(), sort_description, max_block_size));
+
+            auto it = streams_per_thread.begin();
+            for (auto & input : pipe.back()->getInputs())
+            {
+                connect(**it, input);
+                ++it;
+            }
         }
-        else
-            streams.push_back(streams_per_thread.at(0));
+
+        pipes.push_back(std::move(pipe));
     }
 
-    return streams;
+    return pipes;
 }
 
 
@@ -1060,7 +1099,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     if (sum_marks > max_marks_to_use_cache)
         use_uncompressed_cache = false;
 
-    BlockInputStreams to_merge;
+    Pipes pipes;
+    std::vector<OutputPort *> to_merge;
 
     /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL
 
@@ -1068,13 +1108,20 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     {
         RangesInDataPart & part = parts[part_index];
 
-        BlockInputStreamPtr source_stream = std::make_shared<MergeTreeSelectBlockInputStream>(
+        auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
             data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
             settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache,
             query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true,
             virt_columns, part.part_index_in_query);
 
-        to_merge.emplace_back(std::make_shared<ExpressionBlockInputStream>(source_stream, data.sorting_key_expr));
+        auto & output = source_processor->getPort();
+        auto expression_transform = std::make_shared<ExpressionTransform>(output.getHeader(), data.sorting_key_expr);
+        connect(output, expression_transform->getInputPort());
+
+        to_merge.emplace_back(&expression_transform->getOutputPort());
+
+        Processors pipe { std::move(source_processor), std::move(expression_transform) };
+        pipes.emplace_back(std::move(pipe));
     }
 
     Names sort_columns = data.sorting_key_columns;
@@ -1086,42 +1133,74 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     for (size_t i = 0; i < sort_columns_size; ++i)
         sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
 
+    auto streams_to_merge = [&]()
+    {
+        size_t num_streams = to_merge.size();
+
+        BlockInputStreams streams;
+        streams.reserve(num_streams);
+
+        for (size_t i = 0; i < num_streams; ++i)
+            streams.emplace_back(std::make_shared<TreeExecutor>(pipes[i]));
+
+        pipes.clear();
+        return streams;
+    };
+
+    ProcessorPtr merged_processor;
     BlockInputStreamPtr merged;
     switch (data.merging_params.mode)
     {
         case MergeTreeData::MergingParams::Ordinary:
-            merged = std::make_shared<MergingSortedBlockInputStream>(to_merge, sort_description, max_block_size);
+            merged_processor = std::make_shared<MergingSortedTransform>(header, to_merge.size(), sort_description, max_block_size);
             break;
 
         case MergeTreeData::MergingParams::Collapsing:
             merged = std::make_shared<CollapsingFinalBlockInputStream>(
-                    to_merge, sort_description, data.merging_params.sign_column);
+                    streams_to_merge(), sort_description, data.merging_params.sign_column);
             break;
 
         case MergeTreeData::MergingParams::Summing:
-            merged = std::make_shared<SummingSortedBlockInputStream>(to_merge,
+            merged = std::make_shared<SummingSortedBlockInputStream>(streams_to_merge(),
                     sort_description, data.merging_params.columns_to_sum, max_block_size);
             break;
 
         case MergeTreeData::MergingParams::Aggregating:
-            merged = std::make_shared<AggregatingSortedBlockInputStream>(to_merge, sort_description, max_block_size);
+            merged = std::make_shared<AggregatingSortedBlockInputStream>(streams_to_merge(), sort_description, max_block_size);
             break;
 
         case MergeTreeData::MergingParams::Replacing:    /// TODO Make ReplacingFinalBlockInputStream
-            merged = std::make_shared<ReplacingSortedBlockInputStream>(to_merge,
+            merged = std::make_shared<ReplacingSortedBlockInputStream>(streams_to_merge(),
                     sort_description, data.merging_params.version_column, max_block_size);
             break;
 
         case MergeTreeData::MergingParams::VersionedCollapsing: /// TODO Make VersionedCollapsingFinalBlockInputStream
             merged = std::make_shared<VersionedCollapsingSortedBlockInputStream>(
-                    to_merge, sort_description, data.merging_params.sign_column, max_block_size);
+                    streams_to_merge(), sort_description, data.merging_params.sign_column, max_block_size);
             break;
 
         case MergeTreeData::MergingParams::Graphite:
             throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR);
     }
 
-    return {merged};
+    if (merged)
+        return {{std::make_shared<SourceFromInputStream>(merged)}};
+
+    auto it = to_merge.begin();
+    for (auto & input : merged_processor->getInputs())
+    {
+        connect(**it, input);
+        ++it;
+    }
+
+    Processors result;
+    result.reserve(2 * pipes.size() + 1);
+    for (auto & pipe : pipes)
+        for (auto & processor : pipe)
+            result.emplace_back(std::move(processor));
+
+    result.emplace_back(merged_processor);
+    return {result};
 }
 
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index 9b46b663ab2..bcb80ff9a37 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -17,7 +17,7 @@ class KeyCondition;
 class MergeTreeDataSelectExecutor
 {
 public:
-    MergeTreeDataSelectExecutor(const MergeTreeData & data_);
+    explicit MergeTreeDataSelectExecutor(const MergeTreeData & data_);
 
     /** When reading, selects a set of parts that covers the desired range of the index.
       * max_blocks_number_to_read - if not nullptr, do not read all the parts whose right border is greater than max_block in partition.
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp
index 6298c098220..d308667a67b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp
@@ -1,6 +1,6 @@
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <ext/range.h>
-#include <Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 
 
 namespace ProfileEvents
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
similarity index 80%
rename from dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.cpp
rename to dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
index 9b78517e742..ea250789dce 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
@@ -1,5 +1,5 @@
-#include <Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h>
-#include <Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReader.h>
 #include <Core/Defines.h>
 
@@ -12,8 +12,27 @@ namespace ErrorCodes
     extern const int MEMORY_LIMIT_EXCEEDED;
 }
 
+static Block replaceTypes(Block && header, const MergeTreeData::DataPartPtr & data_part)
+{
+    /// Types may be different during ALTER (when this stream is used to perform an ALTER).
+    /// NOTE: We may use similar code to implement non blocking ALTERs.
+    for (const auto & name_type : data_part->columns)
+    {
+        if (header.has(name_type.name))
+        {
+            auto & elem = header.getByName(name_type.name);
+            if (!elem.type->equals(*name_type.type))
+            {
+                elem.type = name_type.type;
+                elem.column = elem.type->createColumn();
+            }
+        }
+    }
 
-MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream(
+    return std::move(header);
+}
+
+MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
     const MergeTreeData & storage_,
     const MergeTreeData::DataPartPtr & owned_data_part_,
     UInt64 max_block_size_rows_,
@@ -31,7 +50,9 @@ MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream(
     size_t part_index_in_query_,
     bool quiet)
     :
-    MergeTreeBaseSelectBlockInputStream{storage_, prewhere_info_, max_block_size_rows_,
+    MergeTreeBaseSelectProcessor{
+        replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_),
+        storage_, prewhere_info_, max_block_size_rows_,
         preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_,
         max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_},
     required_columns{required_columns_},
@@ -55,28 +76,12 @@ MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream(
         : "")
         << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
 
-    addTotalRowsApprox(total_rows);
-    header = storage.getSampleBlockForColumns(required_columns);
+    /// TODO
+    /// addTotalRowsApprox(total_rows);
 
-    /// Types may be different during ALTER (when this stream is used to perform an ALTER).
-    /// NOTE: We may use similar code to implement non blocking ALTERs.
-    for (const auto & name_type : data_part->columns)
-    {
-        if (header.has(name_type.name))
-        {
-            auto & elem = header.getByName(name_type.name);
-            if (!elem.type->equals(*name_type.type))
-            {
-                elem.type = name_type.type;
-                elem.column = elem.type->createColumn();
-            }
-        }
-    }
-
-    executePrewhereActions(header, prewhere_info);
-    injectVirtualColumns(header);
-
-    ordered_names = getHeader().getNames();
+    ordered_names = getPort().getHeader().getNames();
+    /// Remove virtual columns.
+    ordered_names.resize(ordered_names.size() - virt_column_names.size());
 
     task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns);
 
@@ -101,17 +106,10 @@ MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream(
             all_mark_ranges, min_bytes_to_use_direct_io, max_read_buffer_size);
 }
 
-
-Block MergeTreeReverseSelectBlockInputStream::getHeader() const
-{
-    return header;
-}
-
-
-bool MergeTreeReverseSelectBlockInputStream::getNewTask()
+bool MergeTreeReverseSelectProcessor::getNewTask()
 try
 {
-    if ((blocks.empty() && all_mark_ranges.empty()) || total_marks_count == 0)
+    if ((chunks.empty() && all_mark_ranges.empty()) || total_marks_count == 0)
     {
         finish();
         return false;
@@ -145,14 +143,14 @@ catch (...)
     throw;
 }
 
-Block MergeTreeReverseSelectBlockInputStream::readFromPart()
+Chunk MergeTreeReverseSelectProcessor::readFromPart()
 {
-    Block res;
+    Chunk res;
 
-    if (!blocks.empty())
+    if (!chunks.empty())
     {
-        res = std::move(blocks.back());
-        blocks.pop_back();
+        res = std::move(chunks.back());
+        chunks.pop_back();
         return res;
     }
 
@@ -161,20 +159,20 @@ Block MergeTreeReverseSelectBlockInputStream::readFromPart()
 
     while (!task->isFinished())
     {
-        Block block = readFromPartImpl();
-        blocks.push_back(std::move(block));
+        Chunk chunk = readFromPartImpl();
+        chunks.push_back(std::move(chunk));
     }
 
-    if (blocks.empty())
+    if (chunks.empty())
         return {};
 
-    res = std::move(blocks.back());
-    blocks.pop_back();
+    res = std::move(chunks.back());
+    chunks.pop_back();
 
     return res;
 }
 
-void MergeTreeReverseSelectBlockInputStream::finish()
+void MergeTreeReverseSelectProcessor::finish()
 {
     /** Close the files (before destroying the object).
     * When many sources are created, but simultaneously reading only a few of them,
@@ -186,6 +184,6 @@ void MergeTreeReverseSelectBlockInputStream::finish()
     data_part.reset();
 }
 
-MergeTreeReverseSelectBlockInputStream::~MergeTreeReverseSelectBlockInputStream() = default;
+MergeTreeReverseSelectProcessor::~MergeTreeReverseSelectProcessor() = default;
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
similarity index 82%
rename from dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h
rename to dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
index 40af5d5d92a..dcba0ca5e36 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
@@ -1,6 +1,6 @@
 #pragma once
 #include <DataStreams/IBlockInputStream.h>
-#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MarkRange.h>
 #include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
@@ -13,10 +13,10 @@ namespace DB
 /// Used to read data from single part with select query
 /// Cares about PREWHERE, virtual columns, indexes etc.
 /// To read data from multiple parts, Storage (MergeTree) creates multiple such objects.
-class MergeTreeReverseSelectBlockInputStream : public MergeTreeBaseSelectBlockInputStream
+class MergeTreeReverseSelectProcessor : public MergeTreeBaseSelectProcessor
 {
 public:
-    MergeTreeReverseSelectBlockInputStream(
+    MergeTreeReverseSelectProcessor(
         const MergeTreeData & storage,
         const MergeTreeData::DataPartPtr & owned_data_part,
         UInt64 max_block_size_rows,
@@ -34,19 +34,17 @@ public:
         size_t part_index_in_query = 0,
         bool quiet = false);
 
-    ~MergeTreeReverseSelectBlockInputStream() override;
+    ~MergeTreeReverseSelectProcessor() override;
 
     String getName() const override { return "MergeTreeReverse"; }
 
-    Block getHeader() const override;
-
     /// Closes readers and unlock part locks
     void finish();
 
 protected:
 
     bool getNewTask() override;
-    Block readFromPart() override;
+    Chunk readFromPart() override;
 
 private:
     Block header;
@@ -73,9 +71,9 @@ private:
 
     String path;
 
-    Blocks blocks;
+    Chunks chunks;
 
-    Logger * log = &Logger::get("MergeTreeReverseSelectBlockInputStream");
+    Logger * log = &Logger::get("MergeTreeReverseSelectProcessor");
 };
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
similarity index 85%
rename from dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.cpp
rename to dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
index 7a6e6f197dd..2a28cb9f738 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@@ -1,5 +1,5 @@
-#include <Storages/MergeTree/MergeTreeSelectBlockInputStream.h>
-#include <Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReader.h>
 #include <Core/Defines.h>
 
@@ -12,8 +12,27 @@ namespace ErrorCodes
     extern const int MEMORY_LIMIT_EXCEEDED;
 }
 
+static Block replaceTypes(Block && header, const MergeTreeData::DataPartPtr & data_part)
+{
+    /// Types may be different during ALTER (when this stream is used to perform an ALTER).
+    /// NOTE: We may use similar code to implement non blocking ALTERs.
+    for (const auto & name_type : data_part->columns)
+    {
+        if (header.has(name_type.name))
+        {
+            auto & elem = header.getByName(name_type.name);
+            if (!elem.type->equals(*name_type.type))
+            {
+                elem.type = name_type.type;
+                elem.column = elem.type->createColumn();
+            }
+        }
+    }
 
-MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream(
+    return std::move(header);
+}
+
+MergeTreeSelectProcessor::MergeTreeSelectProcessor(
     const MergeTreeData & storage_,
     const MergeTreeData::DataPartPtr & owned_data_part_,
     UInt64 max_block_size_rows_,
@@ -31,10 +50,12 @@ MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream(
     size_t part_index_in_query_,
     bool quiet)
     :
-    MergeTreeBaseSelectBlockInputStream{storage_, prewhere_info_, max_block_size_rows_,
+    MergeTreeBaseSelectProcessor{
+        replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_),
+        storage_, prewhere_info_, max_block_size_rows_,
         preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_,
         max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_},
-    required_columns{required_columns_},
+    required_columns{std::move(required_columns_)},
     data_part{owned_data_part_},
     part_columns_lock(data_part->columns_lock),
     all_mark_ranges(mark_ranges_),
@@ -56,39 +77,16 @@ MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream(
         : "")
         << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
 
-    addTotalRowsApprox(total_rows);
+    /// TODO
+    /// addTotalRowsApprox(total_rows);
 
-    header = storage.getSampleBlockForColumns(required_columns);
-
-    /// Types may be different during ALTER (when this stream is used to perform an ALTER).
-    /// NOTE: We may use similar code to implement non blocking ALTERs.
-    for (const auto & name_type : data_part->columns)
-    {
-        if (header.has(name_type.name))
-        {
-            auto & elem = header.getByName(name_type.name);
-            if (!elem.type->equals(*name_type.type))
-            {
-                elem.type = name_type.type;
-                elem.column = elem.type->createColumn();
-            }
-        }
-    }
-
-    executePrewhereActions(header, prewhere_info);
-    injectVirtualColumns(header);
-
-    ordered_names = getHeader().getNames();
+    ordered_names = getPort().getHeader().getNames();
+    /// Remove virtual columns.
+    ordered_names.resize(ordered_names.size() - virt_column_names.size());
 }
 
 
-Block MergeTreeSelectBlockInputStream::getHeader() const
-{
-    return header;
-}
-
-
-bool MergeTreeSelectBlockInputStream::getNewTask()
+bool MergeTreeSelectProcessor::getNewTask()
 try
 {
     /// Produce no more than one task
@@ -149,7 +147,7 @@ catch (...)
 }
 
 
-void MergeTreeSelectBlockInputStream::finish()
+void MergeTreeSelectProcessor::finish()
 {
     /** Close the files (before destroying the object).
     * When many sources are created, but simultaneously reading only a few of them,
@@ -162,7 +160,7 @@ void MergeTreeSelectBlockInputStream::finish()
 }
 
 
-MergeTreeSelectBlockInputStream::~MergeTreeSelectBlockInputStream() = default;
+MergeTreeSelectProcessor::~MergeTreeSelectProcessor() = default;
 
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h
similarity index 85%
rename from dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.h
rename to dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h
index 0fc9830f5d0..0551d966481 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h
@@ -1,6 +1,6 @@
 #pragma once
 #include <DataStreams/IBlockInputStream.h>
-#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MarkRange.h>
 #include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
@@ -13,10 +13,10 @@ namespace DB
 /// Used to read data from single part with select query
 /// Cares about PREWHERE, virtual columns, indexes etc.
 /// To read data from multiple parts, Storage (MergeTree) creates multiple such objects.
-class MergeTreeSelectBlockInputStream : public MergeTreeBaseSelectBlockInputStream
+class MergeTreeSelectProcessor : public MergeTreeBaseSelectProcessor
 {
 public:
-    MergeTreeSelectBlockInputStream(
+    MergeTreeSelectProcessor(
         const MergeTreeData & storage,
         const MergeTreeData::DataPartPtr & owned_data_part,
         UInt64 max_block_size_rows,
@@ -34,12 +34,10 @@ public:
         size_t part_index_in_query = 0,
         bool quiet = false);
 
-    ~MergeTreeSelectBlockInputStream() override;
+    ~MergeTreeSelectProcessor() override;
 
     String getName() const override { return "MergeTree"; }
 
-    Block getHeader() const override;
-
     /// Closes readers and unlock part locks
     void finish();
 
@@ -48,7 +46,6 @@ protected:
     bool getNewTask() override;
 
 private:
-    Block header;
 
     /// Used by Task
     Names required_columns;
@@ -74,7 +71,7 @@ private:
     String path;
     bool is_first_task = true;
 
-    Logger * log = &Logger::get("MergeTreeSelectBlockInputStream");
+    Logger * log = &Logger::get("MergeTreeSelectProcessor");
 };
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
index 74cff479e5f..96e4d89ca84 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
@@ -91,23 +91,32 @@ try
     {
         size_t rows_to_read = data_part->index_granularity.getMarkRows(current_mark);
         bool continue_reading = (current_mark != 0);
-        size_t rows_readed = reader->readRows(current_mark, continue_reading, rows_to_read, res);
 
-        if (res)
+        auto & sample = reader->getColumns();
+        Columns columns(sample.size());
+        size_t rows_readed = reader->readRows(current_mark, continue_reading, rows_to_read, columns);
+
+        if (rows_readed)
         {
-            res.checkNumberOfRows();
-
             current_row += rows_readed;
             current_mark += (rows_to_read == rows_readed);
 
-            bool should_reorder = false, should_evaluate_missing_defaults = false;
-            reader->fillMissingColumns(res, should_reorder, should_evaluate_missing_defaults, res.rows());
+            bool should_evaluate_missing_defaults = false;
+            reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_readed);
 
             if (should_evaluate_missing_defaults)
-                reader->evaluateMissingDefaults(res);
+                reader->evaluateMissingDefaults(columns);
 
-            if (should_reorder)
-                reader->reorderColumns(res, header.getNames(), nullptr);
+            /// Reorder columns and fill result block.
+            size_t num_columns = sample.size();
+            auto it = sample.begin();
+            for (size_t i = 0; i < num_columns; ++i)
+            {
+                res.insert({columns[i], it->type, it->name});
+                ++it;
+            }
+
+            res.checkNumberOfRows();
         }
     }
     else
diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp
similarity index 84%
rename from dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp
rename to dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp
index cd6efa6b7d1..78122c53ac1 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp
@@ -1,13 +1,13 @@
 #include <Storages/MergeTree/MergeTreeReader.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
-#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h>
 
 
 namespace DB
 {
 
 
-MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream(
+MergeTreeThreadSelectBlockInputProcessor::MergeTreeThreadSelectBlockInputProcessor(
     const size_t thread_,
     const MergeTreeReadPoolPtr & pool_,
     const size_t min_marks_to_read_,
@@ -20,11 +20,11 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream(
     const Settings & settings,
     const Names & virt_column_names_)
     :
-    MergeTreeBaseSelectBlockInputProcessor{pool->getHeader(), storage_, prewhere_info_, max_block_size_rows_,
-        preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io,
-        settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_},
-    thread{thread_},
-    pool{pool_}
+        MergeTreeBaseSelectProcessor{pool_->getHeader(), storage_, prewhere_info_, max_block_size_rows_,
+                                     preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io,
+                                     settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_},
+        thread{thread_},
+        pool{pool_}
 {
     /// round min_marks_to_read up to nearest multiple of block_size expressed in marks
     /// If granularity is adaptive it doesn't make sense
@@ -42,7 +42,7 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream(
 }
 
 /// Requests read task from MergeTreeReadPool and signals whether it got one
-bool MergeTreeThreadSelectBlockInputStream::getNewTask()
+bool MergeTreeThreadSelectBlockInputProcessor::getNewTask()
 {
     task = pool->getTask(min_marks_to_read, thread, ordered_names);
 
@@ -105,6 +105,6 @@ bool MergeTreeThreadSelectBlockInputStream::getNewTask()
 }
 
 
-MergeTreeThreadSelectBlockInputStream::~MergeTreeThreadSelectBlockInputStream() = default;
+MergeTreeThreadSelectBlockInputProcessor::~MergeTreeThreadSelectBlockInputProcessor() = default;
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h
similarity index 83%
rename from dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h
rename to dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h
index 9603d21fb33..fa760e319cb 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 
 
 namespace DB
@@ -11,10 +11,10 @@ class MergeTreeReadPool;
 /** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked
   * to perform.
   */
-class MergeTreeThreadSelectBlockInputStream : public MergeTreeBaseSelectBlockInputProcessor
+class MergeTreeThreadSelectBlockInputProcessor : public MergeTreeBaseSelectProcessor
 {
 public:
-    MergeTreeThreadSelectBlockInputStream(
+    MergeTreeThreadSelectBlockInputProcessor(
         const size_t thread_,
         const std::shared_ptr<MergeTreeReadPool> & pool_,
         const size_t min_marks_to_read_,
@@ -29,7 +29,7 @@ public:
 
     String getName() const override { return "MergeTreeThread"; }
 
-    ~MergeTreeThreadSelectBlockInputStream() override;
+    ~MergeTreeThreadSelectBlockInputProcessor() override;
 
 protected:
     /// Requests read task from MergeTreeReadPool and signals whether it got one
diff --git a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
index 0e15a5660a9..37a3b931fa8 100644
--- a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
+++ b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
@@ -6,6 +6,7 @@
 #include <Core/Defines.h>
 
 #include <ext/shared_ptr_helper.h>
+#include <Processors/Executors/TreeExecutor.h>
 
 
 namespace DB
@@ -28,8 +29,16 @@ public:
         size_t max_block_size,
         unsigned num_streams) override
     {
-        return MergeTreeDataSelectExecutor(part->storage).readFromParts(
-            {part}, column_names, query_info, context, max_block_size, num_streams);
+        auto pipes = MergeTreeDataSelectExecutor(part->storage).readFromParts(
+                {part}, column_names, query_info, context, max_block_size, num_streams);
+
+        BlockInputStreams streams;
+        streams.reserve(pipes.size());
+
+        for (auto & pipe : pipes)
+            streams.emplace_back(std::make_shared<TreeExecutor>(std::move(pipe)));
+
+        return streams;
     }
 
     bool supportsIndexForIn() const override { return true; }
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index e5821c1bcaf..4b07e03dcba 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2952,7 +2952,7 @@ StorageReplicatedMergeTree::~StorageReplicatedMergeTree()
 }
 
 
-BlockInputStreams StorageReplicatedMergeTree::read(
+Pipes StorageReplicatedMergeTree::readWithProcessors(
     const Names & column_names,
     const SelectQueryInfo & query_info,
     const Context & context,
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h
index c5000944439..4378b9fc23b 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.h
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.h
@@ -87,7 +87,7 @@ public:
     bool supportsReplication() const override { return true; }
     bool supportsDeduplication() const override { return true; }
 
-    BlockInputStreams read(
+    Pipes readWithProcessors(
         const Names & column_names,
         const SelectQueryInfo & query_info,
         const Context & context,

From 54d32da5a180f860fa631705396291322e659d1d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 1 Oct 2019 21:30:23 +0300
Subject: [PATCH 013/222] Update TreeExecutor.

---
 dbms/src/Processors/Executors/TreeExecutor.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp
index d7fc1b78ede..667c3a0e565 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.cpp
+++ b/dbms/src/Processors/Executors/TreeExecutor.cpp
@@ -49,6 +49,8 @@ static void validateTree(const Processors & processors, IProcessor * root)
             throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutor. "
                             "Passed processors are not tree.", ErrorCodes::LOGICAL_ERROR);
 
+        is_visited[position] = true;
+
         checkProcessorHasSingleOutput(node);
 
         auto & children = node->getInputs();
@@ -72,6 +74,7 @@ void TreeExecutor::init()
     validateTree(processors, root);
 
     port = std::make_unique<InputPort>(getHeader(), root);
+    connect(root->getOutputs().front(), *port);
     port->setNeeded();
 }
 

From e48f7faebc1776c1a73d2f7767e87f2e05f0d7cf Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 2 Oct 2019 14:57:17 +0300
Subject: [PATCH 014/222] Fix MergeTreeRangeReader.

Fix MergeTreeReader.

Fix MergeTreeBaseSelectProcessor.

Better exception message for TreeExecutor.

Added header_without_virtual_columns to MergeTreeBaseSelectProcessor.

Fix MergeTreeReverseSelectProcessor.

Fix MergeTreeDataSelectExecutor.
---
 .../src/Processors/Executors/TreeExecutor.cpp | 15 ++++++-
 .../MergeTreeBaseSelectProcessor.cpp          | 15 ++++---
 .../MergeTree/MergeTreeBaseSelectProcessor.h  |  1 +
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  1 +
 .../MergeTree/MergeTreeRangeReader.cpp        | 45 +++++++++++++------
 .../Storages/MergeTree/MergeTreeReader.cpp    | 11 +++--
 dbms/src/Storages/MergeTree/MergeTreeReader.h |  2 +-
 .../MergeTreeReverseSelectProcessor.cpp       |  6 +--
 .../MergeTree/MergeTreeSelectProcessor.cpp    |  6 +--
 .../MergeTreeSequentialBlockInputStream.cpp   |  2 +-
 10 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp
index 667c3a0e565..593d455e672 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.cpp
+++ b/dbms/src/Processors/Executors/TreeExecutor.cpp
@@ -83,11 +83,24 @@ void TreeExecutor::execute()
     std::stack<IProcessor *> stack;
     stack.push(root);
 
+    auto prepare_processor = [](IProcessor * processor)
+    {
+        try
+        {
+            return processor->prepare();
+        }
+        catch (Exception & exception)
+        {
+            exception.addMessage(" While executing processor " + processor->getName());
+            throw;
+        }
+    };
+
     while (!stack.empty())
     {
         IProcessor * node = stack.top();
 
-        auto status = node->prepare();
+        auto status = prepare_processor(node);
 
         switch (status)
         {
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 0f03a2c8f57..77f33dce01d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -44,6 +44,11 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor(
     save_marks_in_cache(save_marks_in_cache_),
     virt_column_names(virt_column_names_)
 {
+    header_without_virtual_columns = getPort().getHeader();
+
+    for (auto it = virt_column_names.rbegin(); it != virt_column_names.rend(); ++it)
+        if (header_without_virtual_columns.has(*it))
+            header_without_virtual_columns.erase(*it);
 }
 
 
@@ -161,7 +166,7 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl()
     if (read_result.num_rows == 0)
         read_result.columns.clear();
 
-    auto & sample_block = getPort().getHeader();
+    auto & sample_block = task->range_reader.getSampleBlock();
     if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size())
         throw Exception("Inconsistent number of columns got from MergeTreeRangeReader. "
                         "Have " + toString(sample_block.columns()) + " in sample block "
@@ -184,15 +189,13 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl()
     if (read_result.num_rows == 0)
         return {};
 
-    auto & header = getPort().getHeader();
     Columns ordered_columns;
-    size_t num_virtual_columns = virt_column_names.size();
-    ordered_columns.reserve(header.columns() - num_virtual_columns);
+    ordered_columns.reserve(header_without_virtual_columns.columns());
 
     /// Reorder columns. TODO: maybe skip for default case.
-    for (size_t ps = 0; ps + num_virtual_columns < header.columns(); ++ps)
+    for (size_t ps = 0; ps < header_without_virtual_columns.columns(); ++ps)
     {
-        auto pos_in_sample_block = sample_block.getPositionByName(header.getByPosition(ps).name);
+        auto pos_in_sample_block = sample_block.getPositionByName(header_without_virtual_columns.getByPosition(ps).name);
         ordered_columns.emplace_back(std::move(read_result.columns[pos_in_sample_block]));
     }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
index 0197d481f13..db369a5f267 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@@ -70,6 +70,7 @@ protected:
     bool save_marks_in_cache;
 
     Names virt_column_names;
+    Block header_without_virtual_columns;
 
     std::unique_ptr<MergeTreeReadTask> task;
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index af410c6ed14..7df4178f58f 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -658,6 +658,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
             auto & output = pipe.back()->getOutputs().front();
             pipe.emplace_back(std::make_shared<ExpressionTransform>(
                     output.getHeader(), query_info.prewhere_info->remove_columns_actions));
+            connect(output, pipe.back()->getInputs().front());
         }
     }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 8cac9fcfad8..ec554d72339 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -530,15 +530,13 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
                 merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows);
         }
 
+        if (!columns.empty() && should_evaluate_missing_defaults)
+                merge_tree_reader->evaluateMissingDefaults(
+                        prev_reader->getSampleBlock().cloneWithColumns(read_result.columns), columns);
+
         read_result.columns.reserve(read_result.columns.size() + columns.size());
         for (auto & column : columns)
             read_result.columns.emplace_back(std::move(column));
-
-        if (!read_result.columns.empty())
-        {
-            if (should_evaluate_missing_defaults)
-                merge_tree_reader->evaluateMissingDefaults(read_result.columns);
-        }
     }
     else
     {
@@ -552,7 +550,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
                                                   read_result.num_rows);
 
             if (should_evaluate_missing_defaults)
-                merge_tree_reader->evaluateMissingDefaults(read_result.columns);
+                merge_tree_reader->evaluateMissingDefaults({}, read_result.columns);
         }
         else
             read_result.columns.clear();
@@ -691,8 +689,18 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
     {
         /// Restore block from columns list.
         Block block;
-        auto name_and_type = header.begin();
-        for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
+        size_t pos = 0;
+
+        if (prev_reader)
+        {
+            for (auto & col : prev_reader->getSampleBlock())
+            {
+                block.insert({result.columns[pos], col.type, col.name});
+                ++pos;
+            }
+        }
+
+        for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type)
             block.insert({result.columns[pos], name_and_type->type, name_and_type->name});
 
         if (alias_actions)
@@ -703,7 +711,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
         prewhere_column_pos = block.getPositionByName(*prewhere_column_name);
 
         result.columns.clear();
-        result.columns.resize(block.columns());
+        result.columns.reserve(block.columns());
         for (auto & col : block)
             result.columns.emplace_back(std::move(col.column));
 
@@ -761,10 +769,21 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
         {
             filterColumns(result.columns, *filter_description.data);
 
-            if (result.columns.empty())
+            /// Get num rows after filtration.
+            bool has_column = false;
+
+            for (auto & column : result.columns)
+            {
+                if (column)
+                {
+                    has_column = true;
+                    result.num_rows = column->size();
+                    break;
+                }
+            }
+
+            if (!has_column)
                 result.num_rows = getNumBytesInFilter();
-            else
-                result.num_rows = result.columns[0]->size();
         }
     }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp
index cab963d0f66..29d1dac7587 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp
@@ -324,7 +324,7 @@ void MergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_ev
             if (res_columns[i] && arrayHasNoElementsRead(*res_columns[i]))
                 res_columns[i] = nullptr;
 
-            if (res_columns[i])
+            if (res_columns[i] == nullptr)
             {
                 if (storage.getColumns().hasDefault(name))
                 {
@@ -362,7 +362,7 @@ void MergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_ev
     }
 }
 
-void MergeTreeReader::evaluateMissingDefaults(Columns & res_columns)
+void MergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns & res_columns)
 {
     try
     {
@@ -375,22 +375,21 @@ void MergeTreeReader::evaluateMissingDefaults(Columns & res_columns)
 
         /// Convert columns list to block.
         /// TODO: rewrite with columns interface. It wll be possible after changes in ExpressionActions.
-        Block block;
         auto name_and_type = columns.begin();
         for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
         {
             if (res_columns[pos] == nullptr)
                 continue;
 
-            block.insert({res_columns[pos], name_and_type->type, name_and_type->name});
+            additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name});
         }
 
-        DB::evaluateMissingDefaults(block, columns, storage.getColumns().getDefaults(), storage.global_context);
+        DB::evaluateMissingDefaults(additional_columns, columns, storage.getColumns().getDefaults(), storage.global_context);
 
         /// Move columns from block.
         name_and_type = columns.begin();
         for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
-            res_columns[pos] = std::move(block.getByName(name_and_type->name).column);
+            res_columns[pos] = std::move(additional_columns.getByName(name_and_type->name).column);
     }
     catch (Exception & e)
     {
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h
index 367f1bbb530..a690e56155a 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReader.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h
@@ -42,7 +42,7 @@ public:
     /// num_rows is needed in case if all res_columns are nullptr.
     void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows);
     /// Evaluate defaulted columns if necessary.
-    void evaluateMissingDefaults(Columns & res_columns);
+    void evaluateMissingDefaults(Block additional_columns, Columns & res_columns);
 
     const NamesAndTypesList & getColumns() const { return columns; }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
index ea250789dce..ff8d599135b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
@@ -51,7 +51,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
     bool quiet)
     :
     MergeTreeBaseSelectProcessor{
-        replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_),
+        replaceTypes(storage_.getSampleBlockForColumns(required_columns_), owned_data_part_),
         storage_, prewhere_info_, max_block_size_rows_,
         preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_,
         max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_},
@@ -79,9 +79,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
     /// TODO
     /// addTotalRowsApprox(total_rows);
 
-    ordered_names = getPort().getHeader().getNames();
-    /// Remove virtual columns.
-    ordered_names.resize(ordered_names.size() - virt_column_names.size());
+    ordered_names = header_without_virtual_columns.getNames();
 
     task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns);
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
index 2a28cb9f738..04954d6ff82 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@@ -51,7 +51,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
     bool quiet)
     :
     MergeTreeBaseSelectProcessor{
-        replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_),
+        replaceTypes(storage_.getSampleBlockForColumns(required_columns_), owned_data_part_),
         storage_, prewhere_info_, max_block_size_rows_,
         preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_,
         max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_},
@@ -80,9 +80,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
     /// TODO
     /// addTotalRowsApprox(total_rows);
 
-    ordered_names = getPort().getHeader().getNames();
-    /// Remove virtual columns.
-    ordered_names.resize(ordered_names.size() - virt_column_names.size());
+    ordered_names = header_without_virtual_columns.getNames();
 }
 
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
index 96e4d89ca84..eeeb07f1c26 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
@@ -105,7 +105,7 @@ try
             reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_readed);
 
             if (should_evaluate_missing_defaults)
-                reader->evaluateMissingDefaults(columns);
+                reader->evaluateMissingDefaults({}, columns);
 
             /// Reorder columns and fill result block.
             size_t num_columns = sample.size();

From 627d48c19a0fc7e7149a2ec82ea54fb650a37ed5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 3 Oct 2019 21:27:11 +0300
Subject: [PATCH 015/222] Added ExecutionSpeedLimits.

---
 dbms/src/DataStreams/ExecutionSpeedLimits.cpp |  86 +++++++++++++++
 dbms/src/DataStreams/ExecutionSpeedLimits.h   |  27 +++++
 dbms/src/DataStreams/IBlockInputStream.cpp    | 102 ++----------------
 dbms/src/DataStreams/IBlockInputStream.h      |  12 +--
 dbms/src/DataStreams/SizeLimits.cpp           |  11 +-
 dbms/src/DataStreams/SizeLimits.h             |   1 +
 .../Interpreters/InterpreterSelectQuery.cpp   |  12 +--
 .../Transforms/LimitsCheckingTransform.cpp    |   6 +-
 .../Storages/Kafka/KafkaBlockInputStream.cpp  |   2 +-
 dbms/src/Storages/Kafka/StorageKafka.cpp      |   2 +-
 10 files changed, 144 insertions(+), 117 deletions(-)
 create mode 100644 dbms/src/DataStreams/ExecutionSpeedLimits.cpp
 create mode 100644 dbms/src/DataStreams/ExecutionSpeedLimits.h

diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.cpp b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp
new file mode 100644
index 00000000000..8886ca4b2b8
--- /dev/null
+++ b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp
@@ -0,0 +1,86 @@
+#include <DataStreams/ExecutionSpeedLimits.h>
+
+#include <Common/ProfileEvents.h>
+#include <Common/CurrentThread.h>
+#include <IO/WriteHelpers.h>
+#include <common/sleep.h>
+
+namespace ProfileEvents
+{
+    extern const Event ThrottlerSleepMicroseconds;
+}
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int TOO_SLOW;
+}
+
+static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_in_seconds, UInt64 total_elapsed_microseconds)
+{
+    /// How much time to wait for the average speed to become `max_speed_in_seconds`.
+    UInt64 desired_microseconds = total_progress_size * 1000000 / max_speed_in_seconds;
+
+    if (desired_microseconds > total_elapsed_microseconds)
+    {
+        UInt64 sleep_microseconds = desired_microseconds - total_elapsed_microseconds;
+
+        /// Never sleep more than one second (it should be enough to limit speed for a reasonable amount, and otherwise it's too easy to make query hang).
+        sleep_microseconds = std::min(UInt64(1000000), sleep_microseconds);
+
+        sleepForMicroseconds(sleep_microseconds);
+
+        ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_microseconds);
+    }
+}
+
+void ExecutionSpeedLimits::throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds)
+{
+    if ((min_execution_speed || max_execution_speed || min_execution_speed_bytes ||
+         max_execution_speed_bytes || (total_rows && timeout_before_checking_execution_speed != 0)) &&
+        (static_cast<Int64>(total_elapsed_microseconds) > timeout_before_checking_execution_speed.totalMicroseconds()))
+    {
+        /// Do not count sleeps in throttlers
+        UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds];
+
+        double elapsed_seconds = 0;
+        if (throttler_sleep_microseconds > total_elapsed_microseconds)
+            elapsed_seconds = static_cast<double>(total_elapsed_microseconds - throttler_sleep_microseconds) / 1000000.0;
+
+        if (elapsed_seconds > 0)
+        {
+            if (min_execution_speed && read_rows / elapsed_seconds < min_execution_speed)
+                throw Exception("Query is executing too slow: " + toString(read_rows / elapsed_seconds)
+                                + " rows/sec., minimum: " + toString(min_execution_speed),
+                                ErrorCodes::TOO_SLOW);
+
+            if (min_execution_speed_bytes && read_bytes / elapsed_seconds < min_execution_speed_bytes)
+                throw Exception("Query is executing too slow: " + toString(read_bytes / elapsed_seconds)
+                                + " bytes/sec., minimum: " + toString(min_execution_speed_bytes),
+                                ErrorCodes::TOO_SLOW);
+
+            /// If the predicted execution time is longer than `max_execution_time`.
+            if (max_execution_time != 0 && total_rows && read_rows)
+            {
+                double estimated_execution_time_seconds = elapsed_seconds * (static_cast<double>(total_rows) / read_rows);
+
+                if (estimated_execution_time_seconds > max_execution_time.totalSeconds())
+                    throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)"
+                                    + " is too long. Maximum: " + toString(max_execution_time.totalSeconds())
+                                    + ". Estimated rows to process: " + toString(total_rows),
+                                    ErrorCodes::TOO_SLOW);
+            }
+
+            if (max_execution_speed && read_rows / elapsed_seconds >= max_execution_speed)
+                limitProgressingSpeed(read_rows, max_execution_speed, total_elapsed_microseconds);
+
+            if (max_execution_speed_bytes && read_bytes / elapsed_seconds >= max_execution_speed_bytes)
+                limitProgressingSpeed(read_bytes, max_execution_speed_bytes, total_elapsed_microseconds);
+        }
+    }
+}
+
+}
diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.h b/dbms/src/DataStreams/ExecutionSpeedLimits.h
new file mode 100644
index 00000000000..67627cb36bf
--- /dev/null
+++ b/dbms/src/DataStreams/ExecutionSpeedLimits.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <Poco/Timespan.h>
+#include <Core/Types.h>
+
+namespace DB
+{
+
+/// Limits for query execution speed.
+/// In rows per second.
+class ExecutionSpeedLimits
+{
+public:
+    size_t min_execution_speed = 0;
+    size_t max_execution_speed = 0;
+    size_t min_execution_speed_bytes = 0;
+    size_t max_execution_speed_bytes = 0;
+
+    Poco::Timespan max_execution_time = 0;
+    /// Verify that the speed is not too low after the specified time has elapsed.
+    Poco::Timespan timeout_before_checking_execution_speed = 0;
+
+    void throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds);
+};
+
+}
+
diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp
index a2c3fb2247c..92bdc559a95 100644
--- a/dbms/src/DataStreams/IBlockInputStream.cpp
+++ b/dbms/src/DataStreams/IBlockInputStream.cpp
@@ -214,11 +214,11 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
 
 bool IBlockInputStream::checkTimeLimit()
 {
-    if (limits.max_execution_time != 0
-        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.max_execution_time.totalMicroseconds()) * 1000)
+    if (limits.speed_limit.max_execution_time != 0
+        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000)
         return handleOverflowMode(limits.timeout_overflow_mode,
             "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds())
-                + " seconds, maximum: " + toString(limits.max_execution_time.totalMicroseconds() / 1000000.0),
+                + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0),
             ErrorCodes::TIMEOUT_EXCEEDED);
 
     return true;
@@ -247,24 +247,6 @@ void IBlockInputStream::checkQuota(Block & block)
     }
 }
 
-static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_in_seconds, UInt64 total_elapsed_microseconds)
-{
-    /// How much time to wait for the average speed to become `max_speed_in_seconds`.
-    UInt64 desired_microseconds = total_progress_size * 1000000 / max_speed_in_seconds;
-
-    if (desired_microseconds > total_elapsed_microseconds)
-    {
-        UInt64 sleep_microseconds = desired_microseconds - total_elapsed_microseconds;
-
-        /// Never sleep more than one second (it should be enough to limit speed for a reasonable amount, and otherwise it's too easy to make query hang).
-        sleep_microseconds = std::min(UInt64(1000000), sleep_microseconds);
-
-        sleepForMicroseconds(sleep_microseconds);
-
-        ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_microseconds);
-    }
-}
-
 
 void IBlockInputStream::progressImpl(const Progress & value)
 {
@@ -284,40 +266,11 @@ void IBlockInputStream::progressImpl(const Progress & value)
         /** Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read.
             * NOTE: Maybe it makes sense to have them checked directly in ProcessList?
             */
-
-        if (limits.mode == LIMITS_TOTAL
-            && ((limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows)
-                || (limits.size_limits.max_bytes && progress.read_bytes > limits.size_limits.max_bytes)))
+        if (limits.mode == LIMITS_TOTAL)
         {
-            switch (limits.size_limits.overflow_mode)
-            {
-                case OverflowMode::THROW:
-                {
-                    if (limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows)
-                        throw Exception("Limit for rows to read exceeded: " + toString(total_rows_estimate)
-                            + " rows read (or to read), maximum: " + toString(limits.size_limits.max_rows),
-                            ErrorCodes::TOO_MANY_ROWS);
-                    else
-                        throw Exception("Limit for (uncompressed) bytes to read exceeded: " + toString(progress.read_bytes)
-                            + " bytes read, maximum: " + toString(limits.size_limits.max_bytes),
-                            ErrorCodes::TOO_MANY_BYTES);
-                }
-
-                case OverflowMode::BREAK:
-                {
-                    /// For `break`, we will stop only if so many rows were actually read, and not just supposed to be read.
-                    if ((limits.size_limits.max_rows && progress.read_rows > limits.size_limits.max_rows)
-                        || (limits.size_limits.max_bytes && progress.read_bytes > limits.size_limits.max_bytes))
-                    {
-                        cancel(false);
-                    }
-
-                    break;
-                }
-
-                default:
-                    throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
-            }
+            if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read",
+                                         ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
+                cancel(false);
         }
 
         size_t total_rows = progress.total_rows_to_read;
@@ -331,46 +284,7 @@ void IBlockInputStream::progressImpl(const Progress & value)
             last_profile_events_update_time = total_elapsed_microseconds;
         }
 
-        if ((limits.min_execution_speed || limits.max_execution_speed || limits.min_execution_speed_bytes ||
-             limits.max_execution_speed_bytes || (total_rows && limits.timeout_before_checking_execution_speed != 0)) &&
-            (static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
-        {
-            /// Do not count sleeps in throttlers
-            UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds];
-            double elapsed_seconds = (throttler_sleep_microseconds > total_elapsed_microseconds)
-                                     ? 0.0 : (total_elapsed_microseconds - throttler_sleep_microseconds) / 1000000.0;
-
-            if (elapsed_seconds > 0)
-            {
-                if (limits.min_execution_speed && progress.read_rows / elapsed_seconds < limits.min_execution_speed)
-                    throw Exception("Query is executing too slow: " + toString(progress.read_rows / elapsed_seconds)
-                        + " rows/sec., minimum: " + toString(limits.min_execution_speed),
-                        ErrorCodes::TOO_SLOW);
-
-                if (limits.min_execution_speed_bytes && progress.read_bytes / elapsed_seconds < limits.min_execution_speed_bytes)
-                    throw Exception("Query is executing too slow: " + toString(progress.read_bytes / elapsed_seconds)
-                        + " bytes/sec., minimum: " + toString(limits.min_execution_speed_bytes),
-                        ErrorCodes::TOO_SLOW);
-
-                /// If the predicted execution time is longer than `max_execution_time`.
-                if (limits.max_execution_time != 0 && total_rows && progress.read_rows)
-                {
-                    double estimated_execution_time_seconds = elapsed_seconds * (static_cast<double>(total_rows) / progress.read_rows);
-
-                    if (estimated_execution_time_seconds > limits.max_execution_time.totalSeconds())
-                        throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)"
-                            + " is too long. Maximum: " + toString(limits.max_execution_time.totalSeconds())
-                            + ". Estimated rows to process: " + toString(total_rows),
-                            ErrorCodes::TOO_SLOW);
-                }
-
-                if (limits.max_execution_speed && progress.read_rows / elapsed_seconds >= limits.max_execution_speed)
-                    limitProgressingSpeed(progress.read_rows, limits.max_execution_speed, total_elapsed_microseconds);
-
-                if (limits.max_execution_speed_bytes && progress.read_bytes / elapsed_seconds >= limits.max_execution_speed_bytes)
-                    limitProgressingSpeed(progress.read_bytes, limits.max_execution_speed_bytes, total_elapsed_microseconds);
-            }
-        }
+        limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
 
         if (quota != nullptr && limits.mode == LIMITS_TOTAL)
         {
diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h
index f33c4534a3f..7b40292690e 100644
--- a/dbms/src/DataStreams/IBlockInputStream.h
+++ b/dbms/src/DataStreams/IBlockInputStream.h
@@ -5,6 +5,7 @@
 #include <DataStreams/IBlockStream_fwd.h>
 #include <DataStreams/BlockStreamProfileInfo.h>
 #include <DataStreams/SizeLimits.h>
+#include <DataStreams/ExecutionSpeedLimits.h>
 #include <IO/Progress.h>
 #include <Core/SettingsCommon.h>
 #include <Storages/TableStructureLockHolder.h>
@@ -202,16 +203,9 @@ public:
 
         SizeLimits size_limits;
 
-        Poco::Timespan max_execution_time = 0;
-        OverflowMode timeout_overflow_mode = OverflowMode::THROW;
+        ExecutionSpeedLimits speed_limit;
 
-        /// in rows per second
-        size_t min_execution_speed = 0;
-        size_t max_execution_speed = 0;
-        size_t min_execution_speed_bytes = 0;
-        size_t max_execution_speed_bytes = 0;
-        /// Verify that the speed is not too low after the specified time has elapsed.
-        Poco::Timespan timeout_before_checking_execution_speed = 0;
+        OverflowMode timeout_overflow_mode = OverflowMode::THROW;
     };
 
     /** Set limitations that checked on each block. */
diff --git a/dbms/src/DataStreams/SizeLimits.cpp b/dbms/src/DataStreams/SizeLimits.cpp
index 63164552120..be0308b6edd 100644
--- a/dbms/src/DataStreams/SizeLimits.cpp
+++ b/dbms/src/DataStreams/SizeLimits.cpp
@@ -7,13 +7,13 @@
 namespace DB
 {
 
-bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const
+bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int too_many_rows_exception_code, int too_many_bytes_exception_code) const
 {
     if (max_rows && rows > max_rows)
     {
         if (overflow_mode == OverflowMode::THROW)
             throw Exception("Limit for " + std::string(what) + " exceeded, max rows: " + formatReadableQuantity(max_rows)
-                + ", current rows: " + formatReadableQuantity(rows), exception_code);
+                + ", current rows: " + formatReadableQuantity(rows), too_many_rows_exception_code);
         else
             return false;
     }
@@ -22,7 +22,7 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti
     {
         if (overflow_mode == OverflowMode::THROW)
             throw Exception("Limit for " + std::string(what) + " exceeded, max bytes: " + formatReadableSizeWithBinarySuffix(max_bytes)
-                + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), exception_code);
+                + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), too_many_bytes_exception_code);
         else
             return false;
     }
@@ -30,4 +30,9 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti
     return true;
 }
 
+bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const
+{
+    return check(rows, bytes, what, exception_code, exception_code);
+}
+
 }
diff --git a/dbms/src/DataStreams/SizeLimits.h b/dbms/src/DataStreams/SizeLimits.h
index 41238087613..1bd673b1602 100644
--- a/dbms/src/DataStreams/SizeLimits.h
+++ b/dbms/src/DataStreams/SizeLimits.h
@@ -31,6 +31,7 @@ struct SizeLimits
         : max_rows(max_rows_), max_bytes(max_bytes_), overflow_mode(overflow_mode_) {}
 
     /// Check limits. If exceeded, return false or throw an exception, depending on overflow_mode.
+    bool check(UInt64 rows, UInt64 bytes, const char * what, int too_many_rows_exception_code, int too_many_bytes_exception_code) const;
     bool check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const;
 };
 
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index 39a1976d2d4..68d91bdba30 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1603,7 +1603,7 @@ void InterpreterSelectQuery::executeFetchColumns(
             IBlockInputStream::LocalLimits limits;
             limits.mode = IBlockInputStream::LIMITS_TOTAL;
             limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode);
-            limits.max_execution_time = settings.max_execution_time;
+            limits.speed_limit.max_execution_time = settings.max_execution_time;
             limits.timeout_overflow_mode = settings.timeout_overflow_mode;
 
             /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers,
@@ -1615,11 +1615,11 @@ void InterpreterSelectQuery::executeFetchColumns(
               */
             if (options.to_stage == QueryProcessingStage::Complete)
             {
-                limits.min_execution_speed = settings.min_execution_speed;
-                limits.max_execution_speed = settings.max_execution_speed;
-                limits.min_execution_speed_bytes = settings.min_execution_speed_bytes;
-                limits.max_execution_speed_bytes = settings.max_execution_speed_bytes;
-                limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
+                limits.speed_limit.min_execution_speed = settings.min_execution_speed;
+                limits.speed_limit.max_execution_speed = settings.max_execution_speed;
+                limits.speed_limit.min_execution_speed_bytes = settings.min_execution_speed_bytes;
+                limits.speed_limit.max_execution_speed_bytes = settings.max_execution_speed_bytes;
+                limits.speed_limit.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
             }
 
             QuotaForIntervals & quota = context.getQuota();
diff --git a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp
index 5eee08efcfc..094181d9cdb 100644
--- a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp
+++ b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp
@@ -80,11 +80,11 @@ void LimitsCheckingTransform::transform(Chunk & chunk)
 
 bool LimitsCheckingTransform::checkTimeLimit()
 {
-    if (limits.max_execution_time != 0
-        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.max_execution_time.totalMicroseconds()) * 1000)
+    if (limits.speed_limit.max_execution_time != 0
+        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000)
         return handleOverflowMode(limits.timeout_overflow_mode,
                                   "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds())
-                                  + " seconds, maximum: " + toString(limits.max_execution_time.totalMicroseconds() / 1000000.0),
+                                  + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0),
                                   ErrorCodes::TIMEOUT_EXCEEDED);
 
     return true;
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
index 29adb061e29..3bea5bc53a9 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -51,7 +51,7 @@ void KafkaBlockInputStream::readPrefixImpl()
 
     const auto & limits_ = getLimits();
     const size_t poll_timeout = buffer->pollTimeout();
-    size_t rows_portion_size = poll_timeout ? std::min<size_t>(max_block_size, limits_.max_execution_time.totalMilliseconds() / poll_timeout) : max_block_size;
+    size_t rows_portion_size = poll_timeout ? std::min<size_t>(max_block_size, limits_.speed_limit.max_execution_time.totalMilliseconds() / poll_timeout) : max_block_size;
     rows_portion_size = std::max(rows_portion_size, 1ul);
 
     auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support
diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp
index ed067993a18..ae9b2527d9a 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.cpp
+++ b/dbms/src/Storages/Kafka/StorageKafka.cpp
@@ -387,7 +387,7 @@ bool StorageKafka::streamToViews()
 
         // Limit read batch to maximum block size to allow DDL
         IBlockInputStream::LocalLimits limits;
-        limits.max_execution_time = settings.stream_flush_interval_ms;
+        limits.speed_limit.max_execution_time = settings.stream_flush_interval_ms;
         limits.timeout_overflow_mode = OverflowMode::BREAK;
         stream->setLimits(limits);
     }

From 23069ca6d0ca5cebfb42e1bd3f0fca37853f0c92 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 4 Oct 2019 18:40:05 +0300
Subject: [PATCH 016/222] Progress for MergeTreeSelectProcessor.

---
 dbms/src/DataStreams/IBlockInputStream.h      | 10 +--
 dbms/src/Interpreters/ProcessList.cpp         |  2 +
 .../src/Processors/Executors/TreeExecutor.cpp | 43 ++++++++++-
 dbms/src/Processors/Executors/TreeExecutor.h  | 13 ++++
 .../Sources/SourceFromInputStream.cpp         |  2 +-
 .../Sources/SourceFromInputStream.h           | 11 ++-
 .../Processors/Sources/SourceWithProgress.cpp | 69 +++++++++++++++++
 .../Processors/Sources/SourceWithProgress.h   | 75 +++++++++++++++++++
 .../MergeTreeBaseSelectProcessor.cpp          |  6 +-
 .../MergeTree/MergeTreeBaseSelectProcessor.h  |  8 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  9 ++-
 .../MergeTreeReverseSelectProcessor.cpp       | 10 +--
 .../MergeTreeReverseSelectProcessor.h         |  2 +-
 .../MergeTree/MergeTreeSelectProcessor.cpp    |  9 +--
 .../MergeTree/MergeTreeSelectProcessor.h      |  2 +-
 15 files changed, 234 insertions(+), 37 deletions(-)
 create mode 100644 dbms/src/Processors/Sources/SourceWithProgress.cpp
 create mode 100644 dbms/src/Processors/Sources/SourceWithProgress.h

diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h
index 7b40292690e..3bfdb614fbc 100644
--- a/dbms/src/DataStreams/IBlockInputStream.h
+++ b/dbms/src/DataStreams/IBlockInputStream.h
@@ -139,7 +139,7 @@ public:
       * The function takes the number of rows in the last block, the number of bytes in the last block.
       * Note that the callback can be called from different threads.
       */
-    void setProgressCallback(const ProgressCallback & callback);
+    virtual void setProgressCallback(const ProgressCallback & callback);
 
 
     /** In this method:
@@ -164,11 +164,11 @@ public:
       * Based on this information, the quota and some restrictions will be checked.
       * This information will also be available in the SHOW PROCESSLIST request.
       */
-    void setProcessListElement(QueryStatus * elem);
+    virtual void setProcessListElement(QueryStatus * elem);
 
     /** Set the approximate total number of rows to read.
       */
-    void addTotalRowsApprox(size_t value) { total_rows_approx += value; }
+    virtual void addTotalRowsApprox(size_t value) { total_rows_approx += value; }
 
 
     /** Ask to abort the receipt of data as soon as possible.
@@ -209,7 +209,7 @@ public:
     };
 
     /** Set limitations that checked on each block. */
-    void setLimits(const LocalLimits & limits_)
+    virtual void setLimits(const LocalLimits & limits_)
     {
         limits = limits_;
     }
@@ -222,7 +222,7 @@ public:
     /** Set the quota. If you set a quota on the amount of raw data,
       * then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits.
       */
-    void setQuota(QuotaForIntervals & quota_)
+    virtual void setQuota(QuotaForIntervals & quota_)
     {
         quota = &quota_;
     }
diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp
index 100ecc00dc1..3f7eca86930 100644
--- a/dbms/src/Interpreters/ProcessList.cpp
+++ b/dbms/src/Interpreters/ProcessList.cpp
@@ -28,6 +28,8 @@ namespace ErrorCodes
     extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
     extern const int QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING;
     extern const int LOGICAL_ERROR;
+    extern const int TOO_MANY_ROWS;
+    extern const int TOO_MANY_BYTES;
 }
 
 
diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp
index 593d455e672..469b1c36eb2 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.cpp
+++ b/dbms/src/Processors/Executors/TreeExecutor.cpp
@@ -1,4 +1,5 @@
 #include <Processors/Executors/TreeExecutor.h>
+#include <Processors/Sources/SourceWithProgress.h>
 #include <stack>
 
 namespace DB
@@ -13,7 +14,7 @@ static void checkProcessorHasSingleOutput(IProcessor * processor)
                         ErrorCodes::LOGICAL_ERROR);
 }
 
-static void validateTree(const Processors & processors, IProcessor * root)
+static void validateTree(const Processors & processors, IProcessor * root, std::vector<ISourceWithProgress *> & sources)
 {
     std::unordered_map<IProcessor *, size_t> index;
 
@@ -56,6 +57,13 @@ static void validateTree(const Processors & processors, IProcessor * root)
         auto & children = node->getInputs();
         for (auto & child : children)
             stack.push(&child.getOutputPort().getProcessor());
+
+        /// Fill sources array.
+        if (children.empty())
+        {
+            if (auto * source = dynamic_cast<ISourceWithProgress *>(node))
+                sources.push_back(source);
+        }
     }
 
     for (size_t i = 0; i < is_visited.size(); ++i)
@@ -71,7 +79,7 @@ void TreeExecutor::init()
 
     root = processors.back().get();
 
-    validateTree(processors, root);
+    validateTree(processors, root, sources_with_progress);
 
     port = std::make_unique<InputPort>(getHeader(), root);
     connect(root->getOutputs().front(), *port);
@@ -170,4 +178,35 @@ Block TreeExecutor::readImpl()
     }
 }
 
+void TreeExecutor::setProgressCallback(const ProgressCallback & callback)
+{
+    for (auto & source : sources_with_progress)
+        source->setProgressCallback(callback);
+}
+
+void TreeExecutor::setProcessListElement(QueryStatus * elem)
+{
+    for (auto & source : sources_with_progress)
+        source->setProcessListElement(elem);
+}
+
+void TreeExecutor::setLimits(const IBlockInputStream::LocalLimits & limits_)
+{
+    for (auto & source : sources_with_progress)
+        source->setLimits(limits_);
+}
+
+void TreeExecutor::setQuota(QuotaForIntervals & quota_)
+{
+    for (auto & source : sources_with_progress)
+        source->setQuota(quota_);
+}
+
+void TreeExecutor::addTotalRowsApprox(size_t value)
+{
+    /// Add only for one source.
+    if (!sources_with_progress.empty())
+        sources_with_progress.front()->addTotalRowsApprox(value);
+}
+
 }
diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h
index 0aad5b3024a..4af989240c2 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.h
+++ b/dbms/src/Processors/Executors/TreeExecutor.h
@@ -5,6 +5,8 @@
 namespace DB
 {
 
+class ISourceWithProgress;
+
 class TreeExecutor : public IBlockInputStream
 {
 public:
@@ -13,6 +15,14 @@ public:
     String getName() const override { return root->getName(); }
     Block getHeader() const override { return root->getOutputs().front().getHeader(); }
 
+    /// This methods does not affect TreeExecutor as IBlockInputStream itself.
+    /// They just passed to all SourceWithProgress processors.
+    void setProgressCallback(const ProgressCallback & callback) final;
+    void setProcessListElement(QueryStatus * elem) final;
+    void setLimits(const LocalLimits & limits_) final;
+    void setQuota(QuotaForIntervals & quota_) final;
+    void addTotalRowsApprox(size_t value) final;
+
 protected:
     Block readImpl() override;
 
@@ -21,6 +31,9 @@ private:
     IProcessor * root = nullptr;
     std::unique_ptr<InputPort> port;
 
+    /// Remember sources that support progress.
+    std::vector<ISourceWithProgress *> sources_with_progress;
+
     void init();
     void execute();
 };
diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.cpp b/dbms/src/Processors/Sources/SourceFromInputStream.cpp
index b82130f5ebb..691a9785942 100644
--- a/dbms/src/Processors/Sources/SourceFromInputStream.cpp
+++ b/dbms/src/Processors/Sources/SourceFromInputStream.cpp
@@ -7,7 +7,7 @@ namespace DB
 {
 
 SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_)
-    : ISource(stream_->getHeader())
+    : ISourceWithProgress(stream_->getHeader())
     , force_add_aggregating_info(force_add_aggregating_info_)
     , stream(std::move(stream_))
 {
diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.h b/dbms/src/Processors/Sources/SourceFromInputStream.h
index 0e6c698f260..6f8a7fcd2d1 100644
--- a/dbms/src/Processors/Sources/SourceFromInputStream.h
+++ b/dbms/src/Processors/Sources/SourceFromInputStream.h
@@ -1,5 +1,5 @@
 #pragma once
-#include <Processors/ISource.h>
+#include <Processors/Sources/SourceWithProgress.h>
 
 namespace DB
 {
@@ -7,7 +7,7 @@ namespace DB
 class IBlockInputStream;
 using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
 
-class SourceFromInputStream : public ISource
+class SourceFromInputStream : public ISourceWithProgress
 {
 public:
     explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false);
@@ -22,6 +22,13 @@ public:
 
     void addTotalsPort();
 
+    /// Implementation for methods from ISourceWithProgress.
+    void setLimits(const LocalLimits & limits_) final { stream->setLimits(limits_); }
+    void setQuota(QuotaForIntervals & quota_) final { stream->setQuota(quota_); }
+    void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); }
+    void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); }
+    void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); }
+
 private:
     bool has_aggregate_functions = false;
     bool force_add_aggregating_info;
diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp
new file mode 100644
index 00000000000..d3487faae3a
--- /dev/null
+++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp
@@ -0,0 +1,69 @@
+#include <Processors/Sources/SourceWithProgress.h>
+
+#include <Interpreters/ProcessList.h>
+#include <Interpreters/Quota.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int TOO_MANY_ROWS;
+    extern const int TOO_MANY_BYTES;
+}
+
+void SourceWithProgress::progress(const Progress & value)
+{
+    if (total_rows_approx != 0 && process_list_elem)
+    {
+        process_list_elem->updateProgressIn({0, 0, total_rows_approx});
+        total_rows_approx = 0;
+    }
+
+    if (progress_callback)
+        progress_callback(value);
+
+    if (process_list_elem)
+    {
+        if (!process_list_elem->updateProgressIn(value))
+            cancel();
+
+        /// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers.
+
+        ProgressValues progress = process_list_elem->getProgressIn();
+        size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read);
+
+        /// Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read.
+        /// NOTE: Maybe it makes sense to have them checked directly in ProcessList?
+        if (limits.mode == LimitsMode::LIMITS_TOTAL)
+        {
+            if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read",
+                                          ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
+                cancel();
+        }
+
+        size_t total_rows = progress.total_rows_to_read;
+
+        constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds
+        UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds();
+
+        if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
+        {
+            /// Should be done in PipelineExecutor.
+            /// It is here for compatibility with IBlockInputsStream.
+            CurrentThread::updatePerformanceCounters();
+            last_profile_events_update_time = total_elapsed_microseconds;
+        }
+
+        /// Should be done in PipelineExecutor.
+        /// It is here for compatibility with IBlockInputsStream.
+        limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
+
+        if (quota != nullptr && limits.mode == LimitsMode::LIMITS_TOTAL)
+        {
+            quota->checkAndAddReadRowsBytes(time(nullptr), value.read_rows, value.read_bytes);
+        }
+    }
+}
+
+}
diff --git a/dbms/src/Processors/Sources/SourceWithProgress.h b/dbms/src/Processors/Sources/SourceWithProgress.h
new file mode 100644
index 00000000000..833e5eccb6f
--- /dev/null
+++ b/dbms/src/Processors/Sources/SourceWithProgress.h
@@ -0,0 +1,75 @@
+#pragma once
+#include <Processors/ISource.h>
+#include <DataStreams/IBlockInputStream.h>
+#include <Common/Stopwatch.h>
+
+namespace DB
+{
+
+/// Adds progress to ISource.
+/// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread.
+class ISourceWithProgress : public ISource
+{
+public:
+    using ISource::ISource;
+
+    using LocalLimits = IBlockInputStream::LocalLimits;
+    using LimitsMode = IBlockInputStream::LimitsMode;
+
+    /// Set limitations that checked on each chunk.
+    virtual void setLimits(const LocalLimits & limits_) = 0;
+
+    /// Set the quota. If you set a quota on the amount of raw data,
+    /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits.
+    virtual void setQuota(QuotaForIntervals & quota_) = 0;
+
+    /// Set the pointer to the process list item.
+    /// General information about the resources spent on the request will be written into it.
+    /// Based on this information, the quota and some restrictions will be checked.
+    /// This information will also be available in the SHOW PROCESSLIST request.
+    virtual void setProcessListElement(QueryStatus * elem) = 0;
+
+    /// Set the execution progress bar callback.
+    /// It is called after each chunk.
+    /// The function takes the number of rows in the last chunk, the number of bytes in the last chunk.
+    /// Note that the callback can be called from different threads.
+    virtual void setProgressCallback(const ProgressCallback & callback) = 0;
+
+    /// Set the approximate total number of rows to read.
+    virtual void addTotalRowsApprox(size_t value) = 0;
+};
+
+/// Implementation for ISourceWithProgress
+class SourceWithProgress : public ISourceWithProgress
+{
+public:
+    using ISourceWithProgress::ISourceWithProgress;
+
+    using LocalLimits = IBlockInputStream::LocalLimits;
+    using LimitsMode = IBlockInputStream::LimitsMode;
+
+    void setLimits(const LocalLimits & limits_) final { limits = limits_; }
+    void setQuota(QuotaForIntervals & quota_) final { quota = &quota_; }
+    void setProcessListElement(QueryStatus * elem) final { process_list_elem = elem; }
+    void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; }
+    void addTotalRowsApprox(size_t value) final { total_rows_approx += value; }
+
+protected:
+    /// Call this method to provide information about progress.
+    void progress(const Progress & value);
+
+private:
+    LocalLimits limits;
+    QuotaForIntervals * quota = nullptr;
+    ProgressCallback progress_callback;
+    QueryStatus * process_list_elem = nullptr;
+
+    /// The approximate total number of rows to read. For progress bar.
+    size_t total_rows_approx = 0;
+
+    Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE};    /// Time with waiting time.
+    /// According to total_stopwatch in microseconds.
+    UInt64 last_profile_events_update_time = 0;
+};
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 77f33dce01d..17c5e4609c7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -3,9 +3,7 @@
 #include <Storages/MergeTree/MergeTreeReader.h>
 #include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
 #include <Columns/FilterDescription.h>
-#include <Columns/ColumnArray.h>
 #include <Common/typeid_cast.h>
-#include <ext/range.h>
 #include <DataTypes/DataTypeNothing.h>
 
 
@@ -32,7 +30,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor(
     bool save_marks_in_cache_,
     const Names & virt_column_names_)
 :
-    ISource(getHeader(std::move(header), prewhere_info_, virt_column_names_)),
+    SourceWithProgress(getHeader(std::move(header), prewhere_info_, virt_column_names_)),
     storage(storage_),
     prewhere_info(prewhere_info_),
     max_block_size_rows(max_block_size_rows_),
@@ -176,7 +174,7 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl()
 
     UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows;
 
-    /// TODO: progressImpl({ read_result.numReadRows(), read_result.numBytesRead() });
+    progress({ read_result.numReadRows(), read_result.numBytesRead() });
 
     if (task->size_predictor)
     {
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
index db369a5f267..22692271e58 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@@ -5,7 +5,7 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/SelectQueryInfo.h>
 
-#include <Processors/ISource.h>
+#include <Processors/Sources/SourceWithProgress.h>
 
 namespace DB
 {
@@ -16,7 +16,7 @@ class MarkCache;
 
 
 /// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream
-class MergeTreeBaseSelectProcessor : public ISource
+class MergeTreeBaseSelectProcessor : public SourceWithProgress
 {
 public:
     MergeTreeBaseSelectProcessor(
@@ -39,7 +39,7 @@ public:
 protected:
     Chunk generate() final;
 
-    /// Creates new this->task, and initilizes readers
+    /// Creates new this->task, and initializes readers.
     virtual bool getNewTask() = 0;
 
     virtual Chunk readFromPart();
@@ -52,8 +52,6 @@ protected:
 
     void initializeRangeReaders(MergeTreeReadTask & task);
 
-    size_t estimateNumRows(MergeTreeReadTask & current_task, MergeTreeRangeReader & current_reader);
-
 protected:
     const MergeTreeData & storage;
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 7df4178f58f..5d5f0057121 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -749,17 +749,18 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
 
         for (size_t i = 0; i < num_streams; ++i)
         {
-            res.push_back({std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
+            auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
                 i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes,
                 settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache,
-                query_info.prewhere_info, settings, virt_columns)});
+                query_info.prewhere_info, settings, virt_columns);
 
             if (i == 0)
             {
                 /// Set the approximate number of rows for the first source only
-                /// TODO
-                /// res.front()->addTotalRowsApprox(total_rows);
+                source->addTotalRowsApprox(total_rows);
             }
+
+            res.push_back({std::move(source)});
         }
     }
     else if (sum_marks > 0)
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
index ff8d599135b..af8c02318d7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
@@ -1,7 +1,6 @@
 #include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReader.h>
-#include <Core/Defines.h>
 
 
 namespace DB
@@ -39,7 +38,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
     size_t preferred_block_size_bytes_,
     size_t preferred_max_column_in_block_size_bytes_,
     Names required_columns_,
-    const MarkRanges & mark_ranges_,
+    MarkRanges mark_ranges_,
     bool use_uncompressed_cache_,
     const PrewhereInfoPtr & prewhere_info_,
     bool check_columns,
@@ -55,10 +54,10 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
         storage_, prewhere_info_, max_block_size_rows_,
         preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_,
         max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_},
-    required_columns{required_columns_},
+    required_columns{std::move(required_columns_)},
     data_part{owned_data_part_},
     part_columns_lock(data_part->columns_lock),
-    all_mark_ranges(mark_ranges_),
+    all_mark_ranges(std::move(mark_ranges_)),
     part_index_in_query(part_index_in_query_),
     path(data_part->getFullPath())
 {
@@ -76,8 +75,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
         : "")
         << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
 
-    /// TODO
-    /// addTotalRowsApprox(total_rows);
+    addTotalRowsApprox(total_rows);
 
     ordered_names = header_without_virtual_columns.getNames();
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
index dcba0ca5e36..58202988e4c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
@@ -23,7 +23,7 @@ public:
         size_t preferred_block_size_bytes,
         size_t preferred_max_column_in_block_size_bytes,
         Names column_names,
-        const MarkRanges & mark_ranges,
+        MarkRanges mark_ranges,
         bool use_uncompressed_cache,
         const PrewhereInfoPtr & prewhere_info,
         bool check_columns,
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
index 04954d6ff82..51ed337367d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@@ -1,7 +1,6 @@
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReader.h>
-#include <Core/Defines.h>
 
 
 namespace DB
@@ -39,7 +38,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
     size_t preferred_block_size_bytes_,
     size_t preferred_max_column_in_block_size_bytes_,
     Names required_columns_,
-    const MarkRanges & mark_ranges_,
+    MarkRanges mark_ranges_,
     bool use_uncompressed_cache_,
     const PrewhereInfoPtr & prewhere_info_,
     bool check_columns_,
@@ -58,7 +57,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
     required_columns{std::move(required_columns_)},
     data_part{owned_data_part_},
     part_columns_lock(data_part->columns_lock),
-    all_mark_ranges(mark_ranges_),
+    all_mark_ranges(std::move(mark_ranges_)),
     part_index_in_query(part_index_in_query_),
     check_columns(check_columns_),
     path(data_part->getFullPath())
@@ -77,9 +76,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
         : "")
         << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
 
-    /// TODO
-    /// addTotalRowsApprox(total_rows);
-
+    addTotalRowsApprox(total_rows);
     ordered_names = header_without_virtual_columns.getNames();
 }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h
index 0551d966481..c0d93842a81 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h
@@ -23,7 +23,7 @@ public:
         size_t preferred_block_size_bytes,
         size_t preferred_max_column_in_block_size_bytes,
         Names column_names_,
-        const MarkRanges & mark_ranges,
+        MarkRanges mark_ranges,
         bool use_uncompressed_cache,
         const PrewhereInfoPtr & prewhere_info,
         bool check_columns,

From d4f11af8175c15df419724184e27266c3f8b3413 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 4 Oct 2019 20:46:48 +0300
Subject: [PATCH 017/222] Update QueryPipeline.

---
 dbms/src/Processors/QueryPipeline.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Processors/QueryPipeline.cpp b/dbms/src/Processors/QueryPipeline.cpp
index 6fbc3bb8ff1..06fed2ac3fc 100644
--- a/dbms/src/Processors/QueryPipeline.cpp
+++ b/dbms/src/Processors/QueryPipeline.cpp
@@ -515,8 +515,8 @@ void QueryPipeline::setProgressCallback(const ProgressCallback & callback)
 {
     for (auto & processor : processors)
     {
-        if (auto * source = typeid_cast<SourceFromInputStream *>(processor.get()))
-            source->getStream().setProgressCallback(callback);
+        if (auto * source = typeid_cast<ISourceWithProgress *>(processor.get()))
+            source->setProgressCallback(callback);
 
         if (auto * source = typeid_cast<CreatingSetsTransform *>(processor.get()))
             source->setProgressCallback(callback);
@@ -527,8 +527,8 @@ void QueryPipeline::setProcessListElement(QueryStatus * elem)
 {
     for (auto & processor : processors)
     {
-        if (auto * source = typeid_cast<SourceFromInputStream *>(processor.get()))
-            source->getStream().setProcessListElement(elem);
+        if (auto * source = dynamic_cast<ISourceWithProgress *>(processor.get()))
+            source->setProcessListElement(elem);
 
         if (auto * source = typeid_cast<CreatingSetsTransform *>(processor.get()))
             source->setProcessListElement(elem);

From c7bb83262ecfa48c72f04ccd1cd048d75dedb3af Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 9 Oct 2019 12:33:16 +0300
Subject: [PATCH 018/222] Fix progress callback for processors pipeline.

---
 dbms/src/Processors/QueryPipeline.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Processors/QueryPipeline.cpp b/dbms/src/Processors/QueryPipeline.cpp
index 06fed2ac3fc..445e12a2a2d 100644
--- a/dbms/src/Processors/QueryPipeline.cpp
+++ b/dbms/src/Processors/QueryPipeline.cpp
@@ -515,7 +515,7 @@ void QueryPipeline::setProgressCallback(const ProgressCallback & callback)
 {
     for (auto & processor : processors)
     {
-        if (auto * source = typeid_cast<ISourceWithProgress *>(processor.get()))
+        if (auto * source = dynamic_cast<ISourceWithProgress *>(processor.get()))
             source->setProgressCallback(callback);
 
         if (auto * source = typeid_cast<CreatingSetsTransform *>(processor.get()))

From ea27918de87ce0d29bc4778583b591b76f6ff5e3 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 9 Oct 2019 12:40:30 +0300
Subject: [PATCH 019/222] Try fix progressbar.

---
 dbms/src/Processors/Sources/SourceWithProgress.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp
index d3487faae3a..1e63003acef 100644
--- a/dbms/src/Processors/Sources/SourceWithProgress.cpp
+++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp
@@ -16,7 +16,12 @@ void SourceWithProgress::progress(const Progress & value)
 {
     if (total_rows_approx != 0 && process_list_elem)
     {
-        process_list_elem->updateProgressIn({0, 0, total_rows_approx});
+        Progress total_rows_progress = {0, 0, total_rows_approx};
+
+        if (progress_callback)
+            progress_callback(total_rows_progress);
+
+        process_list_elem->updateProgressIn(total_rows_progress);
         total_rows_approx = 0;
     }
 

From eb2677c94ef1abf005a331172ded06d7b7a1882b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 9 Oct 2019 12:44:24 +0300
Subject: [PATCH 020/222] Try fix progressbar.

---
 dbms/src/Processors/Sources/SourceWithProgress.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp
index 1e63003acef..57bf6f2dca0 100644
--- a/dbms/src/Processors/Sources/SourceWithProgress.cpp
+++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp
@@ -14,14 +14,16 @@ namespace ErrorCodes
 
 void SourceWithProgress::progress(const Progress & value)
 {
-    if (total_rows_approx != 0 && process_list_elem)
+    if (total_rows_approx != 0)
     {
         Progress total_rows_progress = {0, 0, total_rows_approx};
 
         if (progress_callback)
             progress_callback(total_rows_progress);
 
-        process_list_elem->updateProgressIn(total_rows_progress);
+        if (process_list_elem)
+            process_list_elem->updateProgressIn(total_rows_progress);
+
         total_rows_approx = 0;
     }
 

From dea89cfc11c79150b073302735450a3d47ffbae4 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 9 Oct 2019 19:42:13 +0300
Subject: [PATCH 021/222] Disable processors by default.

---
 dbms/src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 8f2474982a0..1d2cb2e6416 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -359,7 +359,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \
     M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \
     \
-    M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \
+    M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \
     \
     M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \
     M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \

From 4728bdfccd57517e692fb2cbe6da204d4e3e3e81 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 10 Oct 2019 14:20:25 +0300
Subject: [PATCH 022/222] Fix MergeTreeSequentialBlockInputStream.

---
 .../MergeTree/MergeTreeSequentialBlockInputStream.cpp       | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
index eeeb07f1c26..7b5ca701287 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
@@ -107,12 +107,16 @@ try
             if (should_evaluate_missing_defaults)
                 reader->evaluateMissingDefaults({}, columns);
 
+            res = header.cloneEmpty();
+
             /// Reorder columns and fill result block.
             size_t num_columns = sample.size();
             auto it = sample.begin();
             for (size_t i = 0; i < num_columns; ++i)
             {
-                res.insert({columns[i], it->type, it->name});
+                if (header.has(it->name))
+                    header.getByName(it->name).column = std::move(columns[i]);
+
                 ++it;
             }
 

From 378052743d04ed71b7a485d948c69ab4f0f3a8aa Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 10 Oct 2019 14:24:29 +0300
Subject: [PATCH 023/222] Fix MergeTreeSequentialBlockInputStream.

---
 .../MergeTree/MergeTreeSequentialBlockInputStream.cpp         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
index 7b5ca701287..081ad289d28 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp
@@ -114,8 +114,8 @@ try
             auto it = sample.begin();
             for (size_t i = 0; i < num_columns; ++i)
             {
-                if (header.has(it->name))
-                    header.getByName(it->name).column = std::move(columns[i]);
+                if (res.has(it->name))
+                    res.getByName(it->name).column = std::move(columns[i]);
 
                 ++it;
             }

From ef14df4632450a431d18b7201b9588d5045a1574 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 10 Oct 2019 17:16:15 +0300
Subject: [PATCH 024/222] Added more comments.

---
 dbms/src/DataStreams/ExecutionSpeedLimits.h          |  1 +
 dbms/src/DataStreams/IBlockInputStream.cpp           |  8 ++++----
 dbms/src/DataStreams/IBlockInputStream.h             |  2 +-
 dbms/src/Interpreters/InterpreterSelectQuery.cpp     | 12 ++++++------
 dbms/src/Processors/Executors/TreeExecutor.cpp       |  2 ++
 dbms/src/Processors/Executors/TreeExecutor.h         |  9 +++++++++
 dbms/src/Processors/Sources/SourceFromInputStream.h  |  1 +
 dbms/src/Processors/Sources/SourceWithProgress.cpp   | 12 +++++++++---
 .../Transforms/LimitsCheckingTransform.cpp           |  6 +++---
 dbms/src/Processors/Transforms/ReverseTransform.h    |  1 +
 dbms/src/Storages/IStorage.h                         |  4 ++++
 dbms/src/Storages/Kafka/StorageKafka.cpp             |  2 +-
 .../MergeTree/MergeTreeBaseSelectProcessor.h         |  5 ++++-
 .../MergeTreeThreadSelectBlockInputProcessor.cpp     | 11 ++++++-----
 14 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.h b/dbms/src/DataStreams/ExecutionSpeedLimits.h
index 67627cb36bf..6dbc2e5c687 100644
--- a/dbms/src/DataStreams/ExecutionSpeedLimits.h
+++ b/dbms/src/DataStreams/ExecutionSpeedLimits.h
@@ -20,6 +20,7 @@ public:
     /// Verify that the speed is not too low after the specified time has elapsed.
     Poco::Timespan timeout_before_checking_execution_speed = 0;
 
+    /// Pause execution in case if speed limits were exceeded.
     void throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds);
 };
 
diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp
index 447b595d438..2e30749e89f 100644
--- a/dbms/src/DataStreams/IBlockInputStream.cpp
+++ b/dbms/src/DataStreams/IBlockInputStream.cpp
@@ -219,11 +219,11 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
 
 bool IBlockInputStream::checkTimeLimit()
 {
-    if (limits.speed_limit.max_execution_time != 0
-        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000)
+    if (limits.speed_limits.max_execution_time != 0
+        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.speed_limits.max_execution_time.totalMicroseconds()) * 1000)
         return handleOverflowMode(limits.timeout_overflow_mode,
             "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds())
-                + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0),
+                + " seconds, maximum: " + toString(limits.speed_limits.max_execution_time.totalMicroseconds() / 1000000.0),
             ErrorCodes::TIMEOUT_EXCEEDED);
 
     return true;
@@ -289,7 +289,7 @@ void IBlockInputStream::progressImpl(const Progress & value)
             last_profile_events_update_time = total_elapsed_microseconds;
         }
 
-        limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
+        limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
 
         if (quota != nullptr && limits.mode == LIMITS_TOTAL)
         {
diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h
index 059e73f6db9..4f945001686 100644
--- a/dbms/src/DataStreams/IBlockInputStream.h
+++ b/dbms/src/DataStreams/IBlockInputStream.h
@@ -202,7 +202,7 @@ public:
 
         SizeLimits size_limits;
 
-        ExecutionSpeedLimits speed_limit;
+        ExecutionSpeedLimits speed_limits;
 
         OverflowMode timeout_overflow_mode = OverflowMode::THROW;
     };
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index ac73d888116..ff67bc170e9 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1587,7 +1587,7 @@ void InterpreterSelectQuery::executeFetchColumns(
             IBlockInputStream::LocalLimits limits;
             limits.mode = IBlockInputStream::LIMITS_TOTAL;
             limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode);
-            limits.speed_limit.max_execution_time = settings.max_execution_time;
+            limits.speed_limits.max_execution_time = settings.max_execution_time;
             limits.timeout_overflow_mode = settings.timeout_overflow_mode;
 
             /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers,
@@ -1599,11 +1599,11 @@ void InterpreterSelectQuery::executeFetchColumns(
               */
             if (options.to_stage == QueryProcessingStage::Complete)
             {
-                limits.speed_limit.min_execution_speed = settings.min_execution_speed;
-                limits.speed_limit.max_execution_speed = settings.max_execution_speed;
-                limits.speed_limit.min_execution_speed_bytes = settings.min_execution_speed_bytes;
-                limits.speed_limit.max_execution_speed_bytes = settings.max_execution_speed_bytes;
-                limits.speed_limit.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
+                limits.speed_limits.min_execution_speed = settings.min_execution_speed;
+                limits.speed_limits.max_execution_speed = settings.max_execution_speed;
+                limits.speed_limits.min_execution_speed_bytes = settings.min_execution_speed_bytes;
+                limits.speed_limits.max_execution_speed_bytes = settings.max_execution_speed_bytes;
+                limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
             }
 
             QuotaForIntervals & quota = context.getQuota();
diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp
index 469b1c36eb2..94e2dfe5b5a 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.cpp
+++ b/dbms/src/Processors/Executors/TreeExecutor.cpp
@@ -14,6 +14,8 @@ static void checkProcessorHasSingleOutput(IProcessor * processor)
                         ErrorCodes::LOGICAL_ERROR);
 }
 
+/// Check tree invariants (described in TreeExecutor.h).
+/// Collect sources with progress.
 static void validateTree(const Processors & processors, IProcessor * root, std::vector<ISourceWithProgress *> & sources)
 {
     std::unordered_map<IProcessor *, size_t> index;
diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h
index 4af989240c2..51fc82200b8 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.h
+++ b/dbms/src/Processors/Executors/TreeExecutor.h
@@ -7,9 +7,17 @@ namespace DB
 
 class ISourceWithProgress;
 
+/// It's a wrapper from processors tree-shaped pipeline to block input stream.
+/// Execute all processors in a single thread, by in-order tree traverse.
+/// Also, support fro progress and quotas.
 class TreeExecutor : public IBlockInputStream
 {
 public:
+    /// Last processor in list must be a tree root.
+    /// It is checked that
+    ///  * processors form a tree
+    ///  * all processors are attainable from root
+    ///  * there is no other connected processors
     explicit TreeExecutor(Processors processors_) : processors(std::move(processors_)) { init(); }
 
     String getName() const override { return root->getName(); }
@@ -35,6 +43,7 @@ private:
     std::vector<ISourceWithProgress *> sources_with_progress;
 
     void init();
+    /// Execute tree step-by-step until root returns next chunk or execution is finished.
     void execute();
 };
 
diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.h b/dbms/src/Processors/Sources/SourceFromInputStream.h
index 6f8a7fcd2d1..888439f15d5 100644
--- a/dbms/src/Processors/Sources/SourceFromInputStream.h
+++ b/dbms/src/Processors/Sources/SourceFromInputStream.h
@@ -7,6 +7,7 @@ namespace DB
 class IBlockInputStream;
 using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
 
+/// Wrapper for IBlockInputStream which implements ISourceWithProgress.
 class SourceFromInputStream : public ISourceWithProgress
 {
 public:
diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp
index 57bf6f2dca0..21f9d5ca9bb 100644
--- a/dbms/src/Processors/Sources/SourceWithProgress.cpp
+++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp
@@ -12,6 +12,8 @@ namespace ErrorCodes
     extern const int TOO_MANY_BYTES;
 }
 
+/// Aggregated copy-paste from IBlockInputStream::progressImpl.
+/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream.
 void SourceWithProgress::progress(const Progress & value)
 {
     if (total_rows_approx != 0)
@@ -35,13 +37,17 @@ void SourceWithProgress::progress(const Progress & value)
         if (!process_list_elem->updateProgressIn(value))
             cancel();
 
-        /// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers.
+        /// The total amount of data processed or intended for processing in all sources, possibly on remote servers.
 
         ProgressValues progress = process_list_elem->getProgressIn();
         size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read);
 
-        /// Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read.
+        /// Check the restrictions on the
+        ///  * amount of data to read
+        ///  * speed of the query
+        ///  * quota on the amount of data to read
         /// NOTE: Maybe it makes sense to have them checked directly in ProcessList?
+
         if (limits.mode == LimitsMode::LIMITS_TOTAL)
         {
             if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read",
@@ -64,7 +70,7 @@ void SourceWithProgress::progress(const Progress & value)
 
         /// Should be done in PipelineExecutor.
         /// It is here for compatibility with IBlockInputsStream.
-        limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
+        limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
 
         if (quota != nullptr && limits.mode == LimitsMode::LIMITS_TOTAL)
         {
diff --git a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp
index 094181d9cdb..4947d11974b 100644
--- a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp
+++ b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp
@@ -80,11 +80,11 @@ void LimitsCheckingTransform::transform(Chunk & chunk)
 
 bool LimitsCheckingTransform::checkTimeLimit()
 {
-    if (limits.speed_limit.max_execution_time != 0
-        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000)
+    if (limits.speed_limits.max_execution_time != 0
+        && info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.speed_limits.max_execution_time.totalMicroseconds()) * 1000)
         return handleOverflowMode(limits.timeout_overflow_mode,
                                   "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds())
-                                  + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0),
+                                  + " seconds, maximum: " + toString(limits.speed_limits.max_execution_time.totalMicroseconds() / 1000000.0),
                                   ErrorCodes::TIMEOUT_EXCEEDED);
 
     return true;
diff --git a/dbms/src/Processors/Transforms/ReverseTransform.h b/dbms/src/Processors/Transforms/ReverseTransform.h
index 2e3eca25648..6450fbbae47 100644
--- a/dbms/src/Processors/Transforms/ReverseTransform.h
+++ b/dbms/src/Processors/Transforms/ReverseTransform.h
@@ -4,6 +4,7 @@
 namespace DB
 {
 
+/// Reverse rows in chunk.
 class ReverseTransform : public ISimpleTransform
 {
 public:
diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index 3bd494fdb4a..b224f84be97 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -238,6 +238,8 @@ public:
       *  if the storage can return a different number of streams.
       *
       * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP).
+      *
+      * Default implementation calls `readWithProcessors` and wraps into TreeExecutor.
       */
     virtual BlockInputStreams read(
         const Names & /*column_names*/,
@@ -247,6 +249,8 @@ public:
         size_t /*max_block_size*/,
         unsigned /*num_streams*/);
 
+    /** The same as read, but returns processors.
+     */
     virtual Pipes readWithProcessors(
         const Names & /*column_names*/,
         const SelectQueryInfo & /*query_info*/,
diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp
index 46b6ac7e2d6..c0109f337c9 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.cpp
+++ b/dbms/src/Storages/Kafka/StorageKafka.cpp
@@ -388,7 +388,7 @@ bool StorageKafka::streamToViews()
 
         // Limit read batch to maximum block size to allow DDL
         IBlockInputStream::LocalLimits limits;
-        limits.speed_limit.max_execution_time = settings.stream_flush_interval_ms;
+        limits.speed_limits.max_execution_time = settings.stream_flush_interval_ms;
         limits.timeout_overflow_mode = OverflowMode::BREAK;
         stream->setLimits(limits);
     }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
index 22692271e58..7f3367b74c8 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@@ -15,7 +15,7 @@ class UncompressedCache;
 class MarkCache;
 
 
-/// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream
+/// Base class for MergeTreeThreadSelectProcessor and MergeTreeSelectProcessor
 class MergeTreeBaseSelectProcessor : public SourceWithProgress
 {
 public:
@@ -46,8 +46,10 @@ protected:
 
     Chunk readFromPartImpl();
 
+    /// Two versions for header and chunk.
     static void injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns);
     static void injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns);
+
     static Block getHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns);
 
     void initializeRangeReaders(MergeTreeReadTask & task);
@@ -68,6 +70,7 @@ protected:
     bool save_marks_in_cache;
 
     Names virt_column_names;
+    /// This header is used for chunks from readFromPart().
     Block header_without_virtual_columns;
 
     std::unique_ptr<MergeTreeReadTask> task;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp
index 78122c53ac1..cc090833f1e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp
@@ -20,11 +20,12 @@ MergeTreeThreadSelectBlockInputProcessor::MergeTreeThreadSelectBlockInputProcess
     const Settings & settings,
     const Names & virt_column_names_)
     :
-        MergeTreeBaseSelectProcessor{pool_->getHeader(), storage_, prewhere_info_, max_block_size_rows_,
-                                     preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io,
-                                     settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_},
-        thread{thread_},
-        pool{pool_}
+    MergeTreeBaseSelectProcessor{pool_->getHeader(), storage_, prewhere_info_, max_block_size_rows_,
+                                 preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
+                                 settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size,
+                                 use_uncompressed_cache_, true, virt_column_names_},
+    thread{thread_},
+    pool{pool_}
 {
     /// round min_marks_to_read up to nearest multiple of block_size expressed in marks
     /// If granularity is adaptive it doesn't make sense

From 89dfe7882d3fae32ee33434e7635f99f3afc4178 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 11 Oct 2019 11:55:00 +0300
Subject: [PATCH 025/222] Enable processors by default.

---
 dbms/src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 1d2cb2e6416..8f2474982a0 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -359,7 +359,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \
     M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \
     \
-    M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \
+    M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \
     \
     M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \
     M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \

From 7c2575542b63ddbb8fdabd6df1ecaf8d89183e45 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Sat, 12 Oct 2019 21:52:20 +0300
Subject: [PATCH 026/222] Fix build.

---
 dbms/src/Processors/Transforms/ReverseTransform.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Processors/Transforms/ReverseTransform.cpp b/dbms/src/Processors/Transforms/ReverseTransform.cpp
index eb2b39d26d1..e2fb66411aa 100644
--- a/dbms/src/Processors/Transforms/ReverseTransform.cpp
+++ b/dbms/src/Processors/Transforms/ReverseTransform.cpp
@@ -1,4 +1,5 @@
 #include <Processors/Transforms/ReverseTransform.h>
+#include <Columns/IColumn.h>
 
 namespace DB
 {

From 436e87a8edf6685bf52e3ee0af79578f2bcc6c5e Mon Sep 17 00:00:00 2001
From: FeehanG <51821376+FeehanG@users.noreply.github.com>
Date: Mon, 14 Oct 2019 14:03:23 +0300
Subject: [PATCH 027/222] Update parametric_functions.md

---
 .../agg_functions/parametric_functions.md     | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md
index 13cbc2b05d8..a044f7d97be 100644
--- a/docs/en/query_language/agg_functions/parametric_functions.md
+++ b/docs/en/query_language/agg_functions/parametric_functions.md
@@ -73,7 +73,7 @@ In this case, you should remember that you don't know the histogram bin borders.
 
 ## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch}
 
-Checks whether the sequence contains the event chain that matches the pattern.
+Checks whether the sequence contains an event chain that matches the pattern.
 
 ```sql
 sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
@@ -87,9 +87,9 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 
 - `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
 
-- `timestamp` — Column that considered to contain time data. Typical data types are `Date`, and `DateTime`. You can use also any of the supported [UInt](../../data_types/int_uint.md) data types.
+- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types.
 
-- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes into account only the events described in these conditions. If the sequence contains data that are not described with conditions the function skips them.
+- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described under these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them.
 
 
 **Returned values**
@@ -104,11 +104,11 @@ Type: `UInt8`.
 <a name="sequence-function-pattern-syntax"></a>
 **Pattern syntax**
 
-- `(?N)` — Matches the condition argument at the position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter.
+- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter.
 
-- `.*` — Matches any number of any events. You don't need the conditional arguments to match this element of the pattern.
+- `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern.
 
-- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that distanced from each other for more than 1800 seconds. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators.
+- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators.
 
 **Examples**
 
@@ -133,7 +133,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2) FROM t
 └───────────────────────────────────────────────────────────────────────┘
 ```
 
-The function has found the event chain where number 2 follows number 1. It skipped number 3 between them, because the number is not described as an event. If we want to take this number into account when searching for the event chain, showed in the example, we should make a condition for it.
+The function found the event chain where number 2 follows number 1. It skipped number 3 between them, because the number is not described as an event. If we want to take this number into account when searching for the event chain given in the example, we should make a condition for it.
 
 ```sql
 SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM t
@@ -144,7 +144,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM
 └──────────────────────────────────────────────────────────────────────────────────────────┘
 ```
 
-In this case the function couldn't find the event chain matching the pattern, because there is the event for number 3 occured between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern.
+In this case, the function couldn't find the event chain matching the pattern, because the event for number 3 occured between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern.
 
 ```sql
 SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM t
@@ -163,7 +163,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
 
 ## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount}
 
-Counts the number of event chains that matched the pattern. The function searches event chains that not overlap. It starts to search for the next chain after the current chain is matched.
+Counts the number of event chains that matched the pattern. The function searches event chains that don't overlap. It starts to search for the next chain after the current chain is matched.
 
 !!! warning "Warning"
     Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
@@ -176,14 +176,14 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...)
 
 - `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
 
-- `timestamp` — Column that considered to contain time data. Typical data types are `Date`, and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types.
+- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types.
 
-- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes into account only the events described in these conditions. If the sequence contains data that are not described with conditions the function skips them.
+- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them.
 
 
 **Returned values**
 
-- Number of non-overlapping event chains that are matched
+- Number of non-overlapping event chains that are matched.
 
 Type: `UInt64`.
 
@@ -230,7 +230,7 @@ windowFunnel(window)(timestamp, cond1, cond2, cond3, ...)
 **Parameters:**
 
 - `window` — Length of the sliding window in seconds.
-- `timestamp` — Name of the column containing the timestamp. Data type support: `Date`,`DateTime`, and other unsigned integer types (note that though timestamp support `UInt64` type, there is a limitation it's value can't overflow maximum of Int64, which is 2^63 - 1).
+- `timestamp` — Name of the column containing the timestamp. Data types supported: `Date`,`DateTime`, and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it's value can't exceed the Int64 maximum, which is 2^63 - 1).
 - `cond1`, `cond2`... — Conditions or data describing the chain of events. Data type: `UInt8`. Values can be 0 or 1.
 
 **Algorithm**

From 5364f76625eb7392e7f04396a2aebce92f4a5810 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 14 Oct 2019 16:50:16 +0300
Subject: [PATCH 028/222] Fix build.

---
 dbms/src/Processors/Transforms/ReverseTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Processors/Transforms/ReverseTransform.cpp b/dbms/src/Processors/Transforms/ReverseTransform.cpp
index e2fb66411aa..98f2bf54aa5 100644
--- a/dbms/src/Processors/Transforms/ReverseTransform.cpp
+++ b/dbms/src/Processors/Transforms/ReverseTransform.cpp
@@ -1,5 +1,5 @@
 #include <Processors/Transforms/ReverseTransform.h>
-#include <Columns/IColumn.h>
+#include <Common/PODArray.h>
 
 namespace DB
 {

From d1fb23882089157576de77ef0799e34150dc9891 Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Tue, 15 Oct 2019 15:43:05 +0800
Subject: [PATCH 029/222] implement arrayCompact

---
 dbms/src/Functions/array/arrayCompact.cpp     | 141 ++++++++++++++++++
 .../registerFunctionsHigherOrder.cpp          |   2 +
 2 files changed, 143 insertions(+)
 create mode 100644 dbms/src/Functions/array/arrayCompact.cpp

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
new file mode 100644
index 00000000000..9f763a12ec3
--- /dev/null
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -0,0 +1,141 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnsNumber.h>
+#include "FunctionArrayMapped.h"
+#include <Functions/FunctionFactory.h>
+
+
+namespace DB
+{
+
+    namespace ErrorCodes
+    {
+        extern const int ILLEGAL_COLUMN;
+    }
+
+    struct ArrayCompactImpl
+    {
+        static bool needBoolean() { return false; }
+        static bool needExpression() { return false; }
+        static bool needOneArray() { return false; }
+
+        static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/)
+        {
+            WhichDataType which(expression_return);
+
+            if (which.isNativeUInt())
+                return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
+
+            if (which.isNativeInt())
+                return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>());
+
+            if (which.isFloat())
+                return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
+
+            throw Exception("arrayCompact cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+
+
+        template <typename Element, typename Result>
+        static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
+        {
+            const ColumnVector<Element> * column = checkAndGetColumn<ColumnVector<Element>>(&*mapped);
+
+            if (!column)
+            {
+                const ColumnConst * column_const = checkAndGetColumnConst<ColumnVector<Element>>(&*mapped);
+
+                if (!column_const)
+                    return false;
+
+                const Element x = column_const->template getValue<Element>();
+                const IColumn::Offsets & offsets = array.getOffsets();
+                auto column_data = ColumnVector<Result>::create(column_const->size());
+                typename ColumnVector<Result>::Container & res_values = column_data->getData();
+                auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size());
+                IColumn::Offsets & res_offsets = column_offsets->getData();
+
+                size_t res_pos = 0;
+                size_t pos = 0;
+                for (size_t i = 0; i < offsets.size(); ++i)
+                {
+                    if (pos < offsets[i])
+                    {
+                        res_values[res_pos] = x;
+                        for (++pos, ++res_pos; pos < offsets[i]; ++pos)
+                        {
+                            res_values[res_pos++] = x;
+                        }
+                    }
+                    res_offsets[i] = res_pos;
+                }
+                for(size_t i = 0; i < column_data->size() - res_pos; ++i)
+                {
+                    res_values.pop_back();
+                }
+                res_ptr = ColumnArray::create(std::move(column_data), std::move(column_offsets));
+                return true;
+            }
+
+            const IColumn::Offsets & offsets = array.getOffsets();
+            const typename ColumnVector<Element>::Container & data = column->getData();
+            auto column_data = ColumnVector<Result>::create(data.size());
+            typename ColumnVector<Result>::Container & res_values = column_data->getData();
+            auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size());
+            IColumn::Offsets & res_offsets = column_offsets->getData();
+
+            size_t res_pos = 0;
+            size_t pos = 0;
+            for (size_t i = 0; i < offsets.size(); ++i)
+            {
+                if (pos < offsets[i])
+                {
+                    res_values[res_pos] = data[pos];
+                    for (++pos, ++res_pos; pos < offsets[i]; ++pos)
+                    {
+                        if(data[pos] != data[pos - 1])
+                        {
+                            res_values[res_pos++] = data[pos];
+                        }
+                    }
+                }
+                res_offsets[i] = res_pos;
+            }
+            for(size_t i = 0; i < data.size() - res_pos; ++i)
+            {
+                res_values.pop_back();
+            }
+            res_ptr = ColumnArray::create(std::move(column_data), std::move(column_offsets));
+            return true;
+        }
+
+        static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
+        {
+            ColumnPtr res;
+
+            if (executeType< UInt8 , UInt64>(mapped, array, res) ||
+                executeType< UInt16, UInt64>(mapped, array, res) ||
+                executeType< UInt32, UInt64>(mapped, array, res) ||
+                executeType< UInt64, UInt64>(mapped, array, res) ||
+                executeType<  Int8 ,  Int64>(mapped, array, res) ||
+                executeType<  Int16,  Int64>(mapped, array, res) ||
+                executeType<  Int32,  Int64>(mapped, array, res) ||
+                executeType<  Int64,  Int64>(mapped, array, res) ||
+                executeType<Float32,Float64>(mapped, array, res) ||
+                executeType<Float64,Float64>(mapped, array, res))
+                return res;
+            else
+                throw Exception("Unexpected column for arrayCompact: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
+        }
+
+    };
+
+    struct NameArrayCompact { static constexpr auto name = "arrayCompact"; };
+    using FunctionArrayCompact = FunctionArrayMapped<ArrayCompactImpl, NameArrayCompact>;
+
+    void registerFunctionArrayCompact(FunctionFactory & factory)
+    {
+        factory.registerFunction<FunctionArrayCompact>();
+    }
+
+}
+
diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
index e0948ebc913..c5f7f341a53 100644
--- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp
+++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
@@ -8,6 +8,7 @@ void registerFunctionArrayFilter(FunctionFactory &);
 void registerFunctionArrayCount(FunctionFactory &);
 void registerFunctionArrayExists(FunctionFactory &);
 void registerFunctionArrayAll(FunctionFactory &);
+void registerFunctionArrayCompact(FunctionFactory &);
 void registerFunctionArraySum(FunctionFactory &);
 void registerFunctionArrayFirst(FunctionFactory &);
 void registerFunctionArrayFirstIndex(FunctionFactory &);
@@ -24,6 +25,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory)
     registerFunctionArrayCount(factory);
     registerFunctionArrayExists(factory);
     registerFunctionArrayAll(factory);
+    registerFunctionArrayCompact(factory);
     registerFunctionArraySum(factory);
     registerFunctionArrayFirst(factory);
     registerFunctionArrayFirstIndex(factory);

From 8b1b7f2a5f938a28d4eba37b3a7e915e056140f9 Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Tue, 15 Oct 2019 16:04:59 +0800
Subject: [PATCH 030/222] add arrayCompact test

---
 .../0_stateless/01020_function_array_compact.reference   | 9 +++++++++
 .../queries/0_stateless/01020_function_array_compact.sql | 9 +++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01020_function_array_compact.reference
 create mode 100644 dbms/tests/queries/0_stateless/01020_function_array_compact.sql

diff --git a/dbms/tests/queries/0_stateless/01020_function_array_compact.reference b/dbms/tests/queries/0_stateless/01020_function_array_compact.reference
new file mode 100644
index 00000000000..6627a437251
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01020_function_array_compact.reference
@@ -0,0 +1,9 @@
+[0]
+[1]
+[2]
+[1]
+[1,2]
+[1,2]
+[1,2,1]
+[2,1]
+[1,2,3,4,5]
diff --git a/dbms/tests/queries/0_stateless/01020_function_array_compact.sql b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql
new file mode 100644
index 00000000000..ac309fe3f0a
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql
@@ -0,0 +1,9 @@
+select arrayCompact([0])
+select arrayCompact([1])
+select arrayCompact([2])
+select arrayCompact([1,1])
+select arrayCompact([1,2])
+select arrayCompact([1,1,2])
+select arrayCompact([1,2,1])
+select arrayCompact([2,1,1])
+select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5])

From 947c345eda602c0308180e1092134bab8f41a32f Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Wed, 16 Oct 2019 14:05:43 +0800
Subject: [PATCH 031/222] style: Normalize the format

---
 dbms/src/Functions/array/arrayCompact.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index 9f763a12ec3..0775c4cb7bb 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -6,7 +6,7 @@
 
 namespace DB
 {
-
+/// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays
     namespace ErrorCodes
     {
         extern const int ILLEGAL_COLUMN;
@@ -68,7 +68,7 @@ namespace DB
                     }
                     res_offsets[i] = res_pos;
                 }
-                for(size_t i = 0; i < column_data->size() - res_pos; ++i)
+                for (size_t i = 0; i < column_data->size() - res_pos; ++i)
                 {
                     res_values.pop_back();
                 }
@@ -92,7 +92,7 @@ namespace DB
                     res_values[res_pos] = data[pos];
                     for (++pos, ++res_pos; pos < offsets[i]; ++pos)
                     {
-                        if(data[pos] != data[pos - 1])
+                        if (data[pos] != data[pos - 1])
                         {
                             res_values[res_pos++] = data[pos];
                         }
@@ -100,7 +100,7 @@ namespace DB
                 }
                 res_offsets[i] = res_pos;
             }
-            for(size_t i = 0; i < data.size() - res_pos; ++i)
+            for (size_t i = 0; i < data.size() - res_pos; ++i)
             {
                 res_values.pop_back();
             }

From a2bf848e124a3118555362deaade2ce0f6eb0b13 Mon Sep 17 00:00:00 2001
From: "philip.han" <philip.han@kakaocorp.com>
Date: Tue, 15 Oct 2019 13:22:51 +0900
Subject: [PATCH 032/222] Made bloom_filter type of index supporting
 LowCardinality and Nullable

---
 dbms/src/Interpreters/BloomFilter.cpp         |  34 +++++
 dbms/src/Interpreters/BloomFilter.h           |   6 +
 dbms/src/Interpreters/BloomFilterHash.h       |  49 ++++--
 .../MergeTree/MergeTreeIndexBloomFilter.cpp   |  10 +-
 .../MergeTreeIndexConditionBloomFilter.cpp    |  12 +-
 .../00945_bloom_filter_index.reference        | 105 +++++++++++++
 .../0_stateless/00945_bloom_filter_index.sql  | 144 ++++++++++++++++++
 7 files changed, 335 insertions(+), 25 deletions(-)

diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp
index d648fd114f4..62897b6c774 100644
--- a/dbms/src/Interpreters/BloomFilter.cpp
+++ b/dbms/src/Interpreters/BloomFilter.cpp
@@ -1,5 +1,11 @@
 #include <Interpreters/BloomFilter.h>
 #include <city.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnLowCardinality.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 
 
 namespace DB
@@ -83,4 +89,32 @@ bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed
     return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType)))));
 }
 
+const DataTypePtr getPrimitiveType(const DataTypePtr data_type)
+{
+    if (const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get()))
+        return getPrimitiveType(array_type->getNestedType());
+
+    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_type.get()))
+        return getPrimitiveType(nullable_type->getNestedType());
+
+    if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(data_type.get()))
+        return getPrimitiveType(low_cardinality_type->getDictionaryType());
+
+    return data_type;
+}
+
+const ColumnPtr getPrimitiveColumn(const ColumnPtr column)
+{
+    if (const auto * array_col = typeid_cast<const ColumnArray *>(column.get()))
+        return getPrimitiveColumn(array_col->getDataPtr());
+
+    if (const auto * nullable_col = typeid_cast<const ColumnNullable *>(column.get()))
+        return getPrimitiveColumn(nullable_col->getNestedColumnPtr());
+
+    if (const auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(column.get()))
+        return getPrimitiveColumn(low_cardinality_col->convertToFullColumnIfLowCardinality());
+
+    return column;
+}
+
 }
diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h
index 19469834c94..319a508e8c0 100644
--- a/dbms/src/Interpreters/BloomFilter.h
+++ b/dbms/src/Interpreters/BloomFilter.h
@@ -2,9 +2,12 @@
 
 #include <vector>
 #include <Core/Types.h>
+#include <Core/Field.h>
 #include <Common/PODArray.h>
 #include <Common/Allocator.h>
+#include <Columns/IColumn.h>
 #include <Columns/ColumnVector.h>
+#include <DataTypes/IDataType.h>
 
 namespace DB
 {
@@ -53,4 +56,7 @@ using BloomFilterPtr = std::shared_ptr<BloomFilter>;
 
 bool operator== (const BloomFilter & a, const BloomFilter & b);
 
+const DataTypePtr getPrimitiveType(const DataTypePtr data_type);
+const ColumnPtr getPrimitiveColumn(const ColumnPtr column);
+
 }
diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index 658f9790bee..0b458b6a7e9 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -10,9 +10,12 @@
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <ext/bit_cast.h>
 #include <Common/HashTable/Hash.h>
+#include <Interpreters/BloomFilter.h>
 
 namespace DB
 {
@@ -35,15 +38,38 @@ struct BloomFilterHash
         WhichDataType which(data_type);
 
         if (which.isUInt() || which.isDateOrDateTime())
-            return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet<UInt64>())), 1);
+            if (field.isNull() == false)
+                return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet<UInt64>())), 1);
+            else
+                return ColumnConst::create(ColumnUInt64::create(1, intHash64(0)), 1);
         else if (which.isInt() || which.isEnum())
-            return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()))), 1);
+            if (field.isNull() == false)
+                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()))), 1);
+            else
+                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(0))), 1);
         else if (which.isFloat32() || which.isFloat64())
-            return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()))), 1);
+            if (field.isNull() == false)
+                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()))), 1);
+            else
+                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(0))), 1);
         else if (which.isString() || which.isFixedString())
         {
-            const auto & value = field.safeGet<String>();
-            return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1);
+            if (field.isNull() == false)
+            {
+                const auto & value = field.safeGet<String>();
+                return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1);
+            }
+            else
+            {
+                if (which.isString())
+                    return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64("", 0)), 1);
+                else
+                {
+                    const DataTypeFixedString * fixed_string_type = typeid_cast<const DataTypeFixedString *>(data_type);
+                    const char value[fixed_string_type->getN()] = { 0, };
+                    return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(&value[0], fixed_string_type->getN())), 1);
+                }
+            }
         }
         else
             throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
@@ -51,9 +77,6 @@ struct BloomFilterHash
 
     static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit)
     {
-        const IColumn * actual_col = column.get();
-        const IDataType * actual_type = data_type.get();
-
         WhichDataType which(data_type);
         if (which.isArray())
         {
@@ -62,17 +85,17 @@ struct BloomFilterHash
             if (checkAndGetColumn<ColumnNullable>(array_col->getData()))
                 throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
 
-            actual_col = array_col->getDataPtr().get();
-            actual_type = static_cast<const DataTypeArray *>(data_type.get())->getNestedType().get();
-
             const auto & offsets = array_col->getOffsets();
             size_t offset = (pos == 0) ? 0 : offsets[pos - 1];
-            limit = std::max(actual_col->size() - offset, limit);
+            limit = std::max(array_col->getDataPtr().get()->size() - offset, limit);
         }
 
+        const ColumnPtr actual_col = getPrimitiveColumn(column);
+        const DataTypePtr actual_type = getPrimitiveType(data_type);
+
         auto index_column = ColumnUInt64::create(limit);
         ColumnUInt64::Container & index_column_vec = index_column->getData();
-        getAnyTypeHash<true>(actual_type, actual_col, index_column_vec, pos);
+        getAnyTypeHash<true>(actual_type.get(), actual_col.get(), index_column_vec, pos);
         return index_column;
     }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 56c754cf979..7815cce35f1 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -12,6 +12,7 @@
 #include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
 #include <Parsers/queryToString.h>
 #include <Columns/ColumnConst.h>
+#include <Columns/ColumnLowCardinality.h>
 #include <Interpreters/BloomFilterHash.h>
 
 
@@ -74,13 +75,8 @@ static void assertIndexColumnsType(const Block & header)
 
     for (size_t index = 0; index < columns_data_types.size(); ++index)
     {
-        WhichDataType which(columns_data_types[index]);
-
-        if (which.isArray())
-        {
-            const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(columns_data_types[index].get());
-            which = WhichDataType(array_type->getNestedType());
-        }
+        const IDataType * actual_type = getPrimitiveType(columns_data_types[index]).get();
+        WhichDataType which(actual_type);
 
         if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
             !which.isDateOrDateTime() && !which.isEnum())
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index 56a18122f29..488abb7f6a8 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -254,7 +254,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
         size_t row_size = column->size();
         size_t position = header.getPositionByName(key_ast->getColumnName());
         const DataTypePtr & index_type = header.getByPosition(position).type;
-        const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context);
+        const auto & converted_column = castColumn(ColumnWithTypeAndName{getPrimitiveColumn(column), getPrimitiveType(type), ""}, index_type, context);
         out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size)));
 
         if (function_name == "in"  || function_name == "globalIn")
@@ -309,8 +309,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
             if (!array_type)
                 throw Exception("First argument for function has must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-            Field converted_field = convertFieldToType(value_field, *array_type->getNestedType(), &*value_type);
-            out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*array_type->getNestedType(), converted_field)));
+            const DataTypePtr actual_type = getPrimitiveType(array_type->getNestedType());
+            Field converted_field = convertFieldToType(value_field, *actual_type.get(), &*value_type);
+            out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
         }
         else
         {
@@ -318,8 +319,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
                 throw Exception("An array type of bloom_filter supports only has() function.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
             out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
-            Field converted_field = convertFieldToType(value_field, *index_type, &*value_type);
-            out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*index_type, converted_field)));
+            const DataTypePtr actual_type = getPrimitiveType(index_type);
+            Field converted_field = convertFieldToType(value_field, *actual_type.get(), &*value_type);
+            out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
         }
 
         return true;
diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
index 332e97bf5a1..7e9362b5d33 100755
--- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
+++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
@@ -28,6 +28,7 @@
 1
 1
 1
+100
 1
 1
 1
@@ -70,3 +71,107 @@
 3
 3
 3
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+100
+1
+1
+1
+1
+100
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
index 03666dccd96..268574a609f 100755
--- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
+++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -47,6 +47,8 @@ SELECT COUNT() FROM test.bloom_filter_types_test WHERE date_time = toDateTime('1
 SELECT COUNT() FROM test.bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;
 
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE str IN ( SELECT str FROM test.bloom_filter_types_test);
+
 DROP TABLE IF EXISTS test.bloom_filter_types_test;
 
 DROP TABLE IF EXISTS test.bloom_filter_array_types_test;
@@ -102,3 +104,145 @@ SELECT COUNT() FROM test.bloom_filter_array_types_test WHERE has(str, '10');
 SELECT COUNT() FROM test.bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('10', 5));
 
 DROP TABLE IF EXISTS test.bloom_filter_array_types_test;
+
+DROP TABLE IF EXISTS test.bloom_filter_null_types_test;
+
+CREATE TABLE test.bloom_filter_null_types_test (order_key UInt64, i8 Nullable(Int8), i16 Nullable(Int16), i32 Nullable(Int32), i64 Nullable(Int64), u8 Nullable(UInt8), u16 Nullable(UInt16), u32 Nullable(UInt32), u64 Nullable(UInt64), f32 Nullable(Float32), f64 Nullable(Float64), date Nullable(Date), date_time Nullable(DateTime('Europe/Moscow')), str Nullable(String), fixed_string Nullable(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6;
+INSERT INTO test.bloom_filter_null_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Europe/Moscow') AS date, toDateTime(number, 'Europe/Moscow') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100;
+INSERT INTO test.bloom_filter_null_types_test SELECT 0 AS order_key, NULL AS i8, NULL AS i16, NULL AS i32, NULL AS i64, NULL AS u8, NULL AS u16, NULL AS u32, NULL AS u64, NULL AS f32, NULL AS f64, NULL AS date, NULL AS date_time, NULL AS str, NULL AS fixed_string;
+
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i32 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i64 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u8 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;
+
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i8);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i16);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i32);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i64);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u8);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u16);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u32);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u64);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(f32);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(f64);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(date);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(date_time);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(str);
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(fixed_string);
+
+SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE str IN ( SELECT str FROM test.bloom_filter_null_types_test);
+
+DROP TABLE IF EXISTS test.bloom_filter_null_types_test;
+
+DROP TABLE IF EXISTS test.bloom_filter_lc_null_types_test;
+
+CREATE TABLE test.bloom_filter_lc_null_types_test (order_key UInt64, str LowCardinality(Nullable(String)), fixed_string LowCardinality(Nullable(FixedString(5))), INDEX idx (str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6;
+INSERT INTO test.bloom_filter_lc_null_types_test SELECT number AS order_key, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100;
+INSERT INTO test.bloom_filter_lc_null_types_test SELECT 0 AS order_key, NULL AS str, NULL AS fixed_string;
+
+SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;
+
+SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE isNull(str);
+SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE isNull(fixed_string);
+
+SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE str IN ( SELECT str FROM test.bloom_filter_lc_null_types_test);
+
+DROP TABLE IF EXISTS test.bloom_filter_lc_null_types_test;
+
+DROP TABLE IF EXISTS test.bloom_filter_array_lc_null_types_test;
+
+CREATE TABLE test.bloom_filter_array_lc_null_types_test (order_key   Array(LowCardinality(Nullable((UInt64)))), i8 Array(LowCardinality(Nullable((Int8)))), i16 Array(LowCardinality(Nullable((Int16)))), i32 Array(LowCardinality(Nullable((Int32)))), i64 Array(LowCardinality(Nullable((Int64)))), u8 Array(LowCardinality(Nullable((UInt8)))), u16 Array(LowCardinality(Nullable((UInt16)))), u32 Array(LowCardinality(Nullable((UInt32)))), u64 Array(LowCardinality(Nullable((UInt64)))), f32 Array(LowCardinality(Nullable((Float32)))), f64 Array(LowCardinality(Nullable((Float64)))), date Array(LowCardinality(Nullable((Date)))), date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), str Array(LowCardinality(Nullable((String)))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6;
+INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15);
+INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15);
+INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15);
+INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT n AS order_key, n AS i8, n AS i16, n AS i32, n AS i64, n AS u8, n AS u16, n AS u32, n AS u64, n AS f32, n AS f64, n AS date, n AS date_time, n AS str, n AS fixed_string FROM (SELECT [NULL] AS n);
+INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT [NULL, n] AS order_key, [NULL, toInt8(n)] AS i8, [NULL, toInt16(n)] AS i16, [NULL, toInt32(n)] AS i32, [NULL, toInt64(n)] AS i64, [NULL, toUInt8(n)] AS u8, [NULL, toUInt16(n)] AS u16, [NULL, toUInt32(n)] AS u32, [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, [NULL, toDate(n, 'Europe/Moscow')] AS date, [NULL, toDateTime(n, 'Europe/Moscow')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n);
+
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 1);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-02'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '1');
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('1', 5));
+
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 5);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-06'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '5');
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('5', 5));
+
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 10);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-11'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '10');
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('10', 5));
+
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, NULL);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, NULL);
+
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 100);
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-04-11'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Europe/Moscow'));
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '100');
+SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('100', 5));
+
+DROP TABLE IF EXISTS test.bloom_filter_array_lc_null_types_test;

From ef09cedbb821be5a8d06311ece6fda2047c2574a Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Thu, 17 Oct 2019 11:38:49 +0800
Subject: [PATCH 033/222] fix test bug

---
 .../01020_function_array_compact.sql           | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/01020_function_array_compact.sql b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql
index ac309fe3f0a..eea69dcb6da 100644
--- a/dbms/tests/queries/0_stateless/01020_function_array_compact.sql
+++ b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql
@@ -1,9 +1,9 @@
-select arrayCompact([0])
-select arrayCompact([1])
-select arrayCompact([2])
-select arrayCompact([1,1])
-select arrayCompact([1,2])
-select arrayCompact([1,1,2])
-select arrayCompact([1,2,1])
-select arrayCompact([2,1,1])
-select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5])
+select arrayCompact([0]);
+select arrayCompact([1]);
+select arrayCompact([2]);
+select arrayCompact([1,1]);
+select arrayCompact([1,2]);
+select arrayCompact([1,1,2]);
+select arrayCompact([1,2,1]);
+select arrayCompact([2,1,1]);
+select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]);

From 08a348fbd10e6b1bc7202728d7a7f4e6e1209ebc Mon Sep 17 00:00:00 2001
From: "philip.han" <philip.han@kakaocorp.com>
Date: Thu, 17 Oct 2019 19:08:36 +0900
Subject: [PATCH 034/222] Made IN operator support
 Array(LowCardinality(Nullable(String)))

---
 dbms/src/Interpreters/Set.cpp                               | 3 ++-
 .../queries/0_stateless/00688_low_cardinality_in.reference  | 1 +
 dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql | 6 ++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp
index 68c219c3a91..330f0dc0287 100644
--- a/dbms/src/Interpreters/Set.cpp
+++ b/dbms/src/Interpreters/Set.cpp
@@ -424,7 +424,8 @@ void Set::checkColumnsNumber(size_t num_key_columns) const
 
 void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const
 {
-    if (!removeNullable(data_types[set_type_idx])->equals(*removeNullable(other_type)))
+
+    if (!removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))->equals(*removeNullable(recursiveRemoveLowCardinality(other_type))))
         throw Exception("Types of column " + toString(set_type_idx + 1) + " in section IN don't match: "
                         + data_types[set_type_idx]->getName() + " on the right, " + other_type->getName() +
                         " on the left.", ErrorCodes::TYPE_MISMATCH);
diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference
index 74266c7f888..8edea4d363a 100644
--- a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference
+++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference
@@ -10,3 +10,4 @@ a	1
 b	1
 1	1
 2	1
+['1']
diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql
index 09a96743847..cb57fad51a4 100644
--- a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql
+++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql
@@ -9,3 +9,9 @@ select val, val in (select arrayJoin([1, 3])) from lc_00688;
 select str, str in (select str from lc_00688) from lc_00688;
 select val, val in (select val from lc_00688) from lc_00688;
 drop table if exists lc_00688;
+
+drop table if exists ary_lc_null;
+CREATE TABLE ary_lc_null (i int, v Array(LowCardinality(Nullable(String)))) ENGINE = MergeTree() ORDER BY i ;
+INSERT INTO ary_lc_null VALUES (1, ['1']);
+SELECT v FROM ary_lc_null WHERE v IN (SELECT v FROM ary_lc_null);
+drop table if exists ary_lc_null;

From 8aac4d04de956c50d66fde4c6ece572906e013c9 Mon Sep 17 00:00:00 2001
From: "philip.han" <philip.han@kakaocorp.com>
Date: Thu, 17 Oct 2019 19:53:41 +0900
Subject: [PATCH 035/222] Fixed weird type-casting for IN-operator in
 bloom_filter

---
 dbms/src/Functions/FunctionHelpers.cpp                    | 8 ++++++++
 .../MergeTree/MergeTreeIndexConditionBloomFilter.cpp      | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp
index 212a107e37c..4546b5dd8f0 100644
--- a/dbms/src/Functions/FunctionHelpers.cpp
+++ b/dbms/src/Functions/FunctionHelpers.cpp
@@ -4,8 +4,10 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnNullable.h>
+#include <Columns/ColumnLowCardinality.h>
 #include <Common/assert_cast.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <IO/WriteHelpers.h>
 
 
@@ -75,6 +77,12 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un
                 const auto & nested_col = checkAndGetColumn<ColumnNullable>(const_column->getDataColumn())->getNestedColumnPtr();
                 res.insert({ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name});
             }
+            else if (auto * low_cardinality = checkAndGetColumn<ColumnLowCardinality>(*col.column))
+            {
+                const DataTypePtr & low_cardinality_type = static_cast<const DataTypeLowCardinality &>(*col.type).getDictionaryType();
+                const auto & low_cardinality_col = low_cardinality->convertToFullColumnIfLowCardinality();
+                res.insert({low_cardinality_col, low_cardinality_type, col.name});
+            }
             else
                 throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN);
         }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index 488abb7f6a8..dbb9a113244 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -254,7 +254,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
         size_t row_size = column->size();
         size_t position = header.getPositionByName(key_ast->getColumnName());
         const DataTypePtr & index_type = header.getByPosition(position).type;
-        const auto & converted_column = castColumn(ColumnWithTypeAndName{getPrimitiveColumn(column), getPrimitiveType(type), ""}, index_type, context);
+        const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context);
         out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size)));
 
         if (function_name == "in"  || function_name == "globalIn")

From 6e87eb9fd71b546155156abd9eb4bc0999c7a638 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Thu, 17 Oct 2019 16:47:19 +0300
Subject: [PATCH 036/222] DOCAPI-8163: EN review, RU translation.

---
 .../agg_functions/parametric_functions.md     |   6 +-
 .../agg_functions/parametric_functions.md     | 157 ++++++++++++++----
 2 files changed, 130 insertions(+), 33 deletions(-)

diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md
index a044f7d97be..db946830c7e 100644
--- a/docs/en/query_language/agg_functions/parametric_functions.md
+++ b/docs/en/query_language/agg_functions/parametric_functions.md
@@ -89,7 +89,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 
 - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types.
 
-- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described under these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them.
+- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them.
 
 
 **Returned values**
@@ -108,7 +108,7 @@ Type: `UInt8`.
 
 - `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern.
 
-- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators.
+- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators.
 
 **Examples**
 
@@ -178,7 +178,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...)
 
 - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types.
 
-- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them.
+- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them.
 
 
 **Returned values**
diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md
index b0ece3ced11..a5db4598c3b 100644
--- a/docs/ru/query_language/agg_functions/parametric_functions.md
+++ b/docs/ru/query_language/agg_functions/parametric_functions.md
@@ -71,51 +71,148 @@ FROM
 
 В этом случае необходимо помнить, что границы корзин гистограммы не известны.
 
-## sequenceMatch(pattern)(time, cond1, cond2, ...)
+## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch}
 
-Сопоставление с образцом для цепочки событий.
-
-`pattern` - строка, содержащая шаблон для сопоставления. Шаблон похож на регулярное выражение.
-
-`time` - время события, тип DateTime
-
-`cond1`, `cond2` ... - от одного до 32 аргументов типа UInt8 - признаков, было ли выполнено некоторое условие для события.
-
-Функция собирает в оперативке последовательность событий. Затем производит проверку на соответствие этой последовательности шаблону.
-Возвращает UInt8 - 0, если шаблон не подходит и 1, если шаблон подходит.
-
-Пример: `sequenceMatch('(?1).*(?2)')(EventTime, URL LIKE '%company%', URL LIKE '%cart%')`
-
--   была ли цепочка событий, в которой посещение страницы с адресом, содержащим company было раньше по времени посещения страницы с адресом, содержащим cart.
-
-Это вырожденный пример. Его можно записать с помощью других агрегатных функций:
+Проверяет, содержит ли последовательность цепочку событий, которая соответствует шаблону.
 
 ```sql
-minIf(EventTime, URL LIKE '%company%') < maxIf(EventTime, URL LIKE '%cart%').
+sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 ```
 
-Но в более сложных случаях, такого решения нет.
+!!! warning "Предупреждение"
+    События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции.
 
-Синтаксис шаблонов:
 
-`(?1)` - ссылка на условие (вместо 1 - любой номер);
+**Параметры**
 
-`.*` - произвольное количество любых событий;
+- `pattern` — строка с шаблоном. Смотрите [Синтаксис шаблонов](#sequence-function-pattern-syntax).
 
-`(?t>=1800)` - условие на время;
+- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../data_types/int_uint.md).
 
-за указанное время допускается любое количество любых событий;
+- `cond1`, `cond2` — условия, описывающие цепочку событий. Тип данных — `UInt8`. Можно использовать до 32 условий. Функция учитывает только те события, которые указаны в условиях. Функция пропускает данные из последовательности, если они не описаны ни в одном из условий.
 
-вместо `>=` могут использоваться операторы `<`, `>`, `<=`;
 
-вместо 1800 может быть любое число;
+**Возвращаемые значения**
 
-События, произошедшие в одну секунду, могут оказаться в цепочке в произвольном порядке. От этого может зависеть результат работы функции.
+- 1, если цепочка событий, соответствующая шаблону найдена.
+- 0, если цепочка событий, соответствующая шаблону не найдена.
 
-## sequenceCount(pattern)(time, cond1, cond2, ...)
+Тип: `UInt8`.
 
-Аналогично функции sequenceMatch, но возвращает не факт наличия цепочки событий, а UInt64 - количество найденных цепочек.
-Цепочки ищутся без перекрытия. То есть, следующая цепочка может начаться только после окончания предыдущей.
+<a name="sequence-function-pattern-syntax"></a>
+**Синтаксис шаблонов**
+
+- `(?N)` — соответствует условию на позиции `N`. Условия пронумерованы по порядку в диапазоне `[1, 32]`. Например, `(?1)` соответствует условию, заданному параметром `cond1`.
+
+- `.*` — соответствует любому количеству событий. Для этого элемента шаблона не надо задавать условия.
+
+- `(?t operator value)` — устанавливает время в секундах, которое должно разделять два события. Например, шаблон `(?1)(?t>1800)(?2)` соответствует событиям, которые произошли более чем через 1800 секунд друг от друга. Между этими событиями может находиться произвольное количество любых событий. Операторы могут быть `>=`, `>`, `<`, `<=`.
+
+**Примеры**
+
+Пусть таблица `t` содержит следующие данные:
+
+```text
+┌─time─┬─number─┐
+│    1 │      1 │
+│    2 │      3 │
+│    3 │      2 │
+└──────┴────────┘
+```
+
+Выполним запрос:
+
+```sql
+SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2) FROM t
+```
+```text
+┌─sequenceMatch('(?1)(?2)')(time, equals(number, 1), equals(number, 2))─┐
+│                                                                     1 │
+└───────────────────────────────────────────────────────────────────────┘
+```
+
+Функция нашла цепочку событий, в которой число 2 следует за числом 1. Число 3 между ними было пропущено, поскольку оно не описано как событие. Если необходимо учесть это число при поиске цепочки событий, заданной в примере, то необходимо задать для него условие.
+
+```sql
+SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM t
+```
+```text
+┌─sequenceMatch('(?1)(?2)')(time, equals(number, 1), equals(number, 2), equals(number, 3))─┐
+│                                                                                        0 │
+└──────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+В этом случае функция не может найти цепочку событий, соответствующую шаблону, поскольку событие для числа 3 произошло между 1 и 2. Если бы в этом же случае мы бы проверяли условие на событие для числа 4, то цепочка бы соответствовала шаблону.
+
+```sql
+SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM t
+```
+```text
+┌─sequenceMatch('(?1)(?2)')(time, equals(number, 1), equals(number, 2), equals(number, 4))─┐
+│                                                                                        1 │
+└──────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+
+**Смотрите также**
+
+- [sequenceCount](#function-sequencecount)
+
+
+## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount}
+
+Вычисляет количество цепочек событий, соответствующих шаблону. Функция отыскивает непересекающиеся цепочки событий. Она начитает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий.
+
+!!! warning "Предупреждение"
+    События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции.
+
+```sql
+sequenceCount(pattern)(timestamp, cond1, cond2, ...)
+```
+
+**Параметры**
+
+- `pattern` — строка с шаблоном. Смотрите [Синтаксис шаблонов](#sequence-function-pattern-syntax).
+
+- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../data_types/int_uint.md).
+
+- `cond1`, `cond2` — условия, описывающие цепочку событий. Тип данных — `UInt8`. Можно использовать до 32 условий. Функция учитывает только те события, которые указаны в условиях. Функция пропускает данные из последовательности, если они не описаны ни в одном из условий.
+
+**Возвращаемое значение**
+
+- Число непересекающихся цепочек событий, соответствущих шаблону.
+
+Тип: `UInt64`.
+
+**Пример**
+
+Пусть таблица `t` содержит следующие данные:
+
+```text
+┌─time─┬─number─┐
+│    1 │      1 │
+│    2 │      3 │
+│    3 │      2 │
+│    4 │      1 │
+│    5 │      3 │
+│    6 │      2 │
+└──────┴────────┘
+```
+
+Вычислим сколько раз число 2 стоит после числа 1, причем между 1 и 2 могут быть любые числа:
+
+```sql
+SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t
+```
+```text
+┌─sequenceCount('(?1).*(?2)')(time, equals(number, 1), equals(number, 2))─┐
+│                                                                       2 │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+- [sequenceMatch](#function-sequencematch)
 
 ## windowFunnel(window)(timestamp, cond1, cond2, cond3, ...)
 

From 1d423fad7382c2fabd42e45afe96d23169331e29 Mon Sep 17 00:00:00 2001
From: "philip.han" <philip.han@kakaocorp.com>
Date: Fri, 18 Oct 2019 11:55:01 +0900
Subject: [PATCH 037/222] Blocked "nested-array-type" for the bloom_filter

---
 dbms/src/Interpreters/BloomFilter.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp
index 62897b6c774..150ddbb5e0c 100644
--- a/dbms/src/Interpreters/BloomFilter.cpp
+++ b/dbms/src/Interpreters/BloomFilter.cpp
@@ -92,7 +92,12 @@ bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed
 const DataTypePtr getPrimitiveType(const DataTypePtr data_type)
 {
     if (const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get()))
-        return getPrimitiveType(array_type->getNestedType());
+    {
+        if (!typeid_cast<const DataTypeArray *>(array_type->getNestedType().get()))
+            return getPrimitiveType(array_type->getNestedType());
+        else
+            throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
+    }
 
     if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_type.get()))
         return getPrimitiveType(nullable_type->getNestedType());

From decd5553170cd377a84c5817d4b4e189784018b0 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Fri, 18 Oct 2019 15:30:41 +0800
Subject: [PATCH 038/222] Cleanup, keep function names consistent

---
 dbms/src/Functions/array/arraySort.cpp              | 2 +-
 dbms/src/Functions/registerFunctionsHigherOrder.cpp | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Functions/array/arraySort.cpp b/dbms/src/Functions/array/arraySort.cpp
index 17a711e8902..35f03cd10ca 100644
--- a/dbms/src/Functions/array/arraySort.cpp
+++ b/dbms/src/Functions/array/arraySort.cpp
@@ -63,7 +63,7 @@ struct NameArrayReverseSort { static constexpr auto name = "arrayReverseSort"; }
 using FunctionArraySort = FunctionArrayMapped<ArraySortImpl<true>, NameArraySort>;
 using FunctionArrayReverseSort = FunctionArrayMapped<ArraySortImpl<false>, NameArrayReverseSort>;
 
-void registerFunctionsArraySort(FunctionFactory & factory)
+void registerFunctionArraySort(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionArraySort>();
     factory.registerFunction<FunctionArrayReverseSort>();
diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
index 2e8b678240b..46e89850582 100644
--- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp
+++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
@@ -12,8 +12,7 @@ void registerFunctionArraySum(FunctionFactory &);
 void registerFunctionArrayFirst(FunctionFactory &);
 void registerFunctionArrayFirstIndex(FunctionFactory &);
 void registerFunctionArraySplit(FunctionFactory &);
-void registerFunctionsArraySort(FunctionFactory &);
-void registerFunctionArrayReverseSort(FunctionFactory &);
+void registerFunctionArraySort(FunctionFactory &);
 void registerFunctionArrayCumSum(FunctionFactory &);
 void registerFunctionArrayCumSumNonNegative(FunctionFactory &);
 void registerFunctionArrayDifference(FunctionFactory &);
@@ -29,7 +28,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory)
     registerFunctionArrayFirst(factory);
     registerFunctionArrayFirstIndex(factory);
     registerFunctionArraySplit(factory);
-    registerFunctionsArraySort(factory);
+    registerFunctionArraySort(factory);
     registerFunctionArrayCumSum(factory);
     registerFunctionArrayCumSumNonNegative(factory);
     registerFunctionArrayDifference(factory);

From 124ea9699acbfdbb99935d7aa0eccc926bb323c8 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Fri, 18 Oct 2019 16:45:39 +0800
Subject: [PATCH 039/222] Add arrayFill

---
 dbms/src/Functions/array/arrayFill.cpp        | 127 ++++++++++++++++++
 .../registerFunctionsHigherOrder.cpp          |   2 +
 2 files changed, 129 insertions(+)
 create mode 100644 dbms/src/Functions/array/arrayFill.cpp

diff --git a/dbms/src/Functions/array/arrayFill.cpp b/dbms/src/Functions/array/arrayFill.cpp
new file mode 100644
index 00000000000..4c2dc5659b8
--- /dev/null
+++ b/dbms/src/Functions/array/arrayFill.cpp
@@ -0,0 +1,127 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnsNumber.h>
+#include "FunctionArrayMapped.h"
+#include <Functions/FunctionFactory.h>
+
+
+namespace DB
+{
+
+template <bool Reverse>
+struct ArrayFillImpl
+{
+    static bool needBoolean() { return true; }
+    static bool needExpression() { return true; }
+    static bool needOneArray() { return false; }
+
+    static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
+    {
+        return std::make_shared<DataTypeArray>(array_element);
+    }
+
+    static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
+    {
+        const ColumnUInt8 * column_fill = typeid_cast<const ColumnUInt8 *>(&*mapped);
+
+        const IColumn & in_data = array.getData();
+        const IColumn::Offsets & in_offsets = array.getOffsets();
+        auto column_data = in_data.cloneEmpty();
+        IColumn & out_data = *column_data.get();
+
+        if (column_fill)
+        {
+            const IColumn::Filter & fill = column_fill->getData();
+
+            size_t array_begin = 0;
+            size_t array_end = 0;
+            size_t begin = 0;
+            size_t end = 0;
+
+            out_data.reserve(in_data.size());
+
+            for (size_t i = 0; i < in_offsets.size(); ++i)
+            {
+                array_end = in_offsets[i] - 1;
+
+                for (; end <= array_end; ++end)
+                {
+                    if (end == array_end || fill[end + 1] != fill[begin]) {
+                        if (fill[begin])
+                        {
+                            if constexpr (Reverse)
+                            {
+                                if (end == array_end)
+                                    out_data.insertManyFrom(in_data, array_end, end + 1 - begin);
+                                else
+                                    out_data.insertManyFrom(in_data, end + 1, end + 1 - begin);
+                            }
+                            else
+                            {
+                                if (begin == array_begin)
+                                    out_data.insertManyFrom(in_data, array_begin, end + 1 - begin);
+                                else
+                                    out_data.insertManyFrom(in_data, begin - 1, end + 1 - begin);
+                            }
+                        }
+                        else
+                            out_data.insertRangeFrom(in_data, begin, end + 1 - begin);
+
+                        begin = end + 1;
+                    }
+                }
+
+                array_begin = array_end + 1;
+            }
+        }
+        else
+        {
+            auto column_fill_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
+
+            if (!column_fill_const)
+                throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN);
+
+            if (column_fill_const->getValue<UInt8>())
+            {
+                size_t array_begin = 0;
+                size_t array_end = 0;
+
+                out_data.reserve(in_data.size());
+
+                for (size_t i = 0; i < in_offsets.size(); ++i)
+                {
+                    array_end = in_offsets[i] - 1;
+
+                    if constexpr (Reverse)
+                        out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin);
+                    else
+                        out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin);
+
+                    array_begin = array_end + 1;
+                }
+            }
+            else
+                return ColumnArray::create(
+                    array.getDataPtr(),
+                    array.getOffsetsPtr()
+                );
+        }
+
+        return ColumnArray::create(
+            std::move(column_data),
+            array.getOffsetsPtr()
+        );
+    }
+};
+
+struct NameArrayFill { static constexpr auto name = "arrayFill"; };
+struct NameArrayReverseFill { static constexpr auto name = "arrayReverseFill"; };
+using FunctionArrayFill = FunctionArrayMapped<ArrayFillImpl<false>, NameArrayFill>;
+using FunctionArrayReverseFill = FunctionArrayMapped<ArrayFillImpl<true>, NameArrayReverseFill>;
+
+void registerFunctionArrayFill(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionArrayFill>();
+    factory.registerFunction<FunctionArrayReverseFill>();
+}
+
+}
diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
index 46e89850582..8511c0c412c 100644
--- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp
+++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
@@ -11,6 +11,7 @@ void registerFunctionArrayAll(FunctionFactory &);
 void registerFunctionArraySum(FunctionFactory &);
 void registerFunctionArrayFirst(FunctionFactory &);
 void registerFunctionArrayFirstIndex(FunctionFactory &);
+void registerFunctionArrayFill(FunctionFactory &);
 void registerFunctionArraySplit(FunctionFactory &);
 void registerFunctionArraySort(FunctionFactory &);
 void registerFunctionArrayCumSum(FunctionFactory &);
@@ -27,6 +28,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory)
     registerFunctionArraySum(factory);
     registerFunctionArrayFirst(factory);
     registerFunctionArrayFirstIndex(factory);
+    registerFunctionArrayFill(factory);
     registerFunctionArraySplit(factory);
     registerFunctionArraySort(factory);
     registerFunctionArrayCumSum(factory);

From b87fe27cd6ad9f875d3bf5229667310cae56ccd0 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Fri, 18 Oct 2019 16:45:56 +0800
Subject: [PATCH 040/222] Add tests

---
 .../queries/0_stateless/01019_array_fill.reference    | 10 ++++++++++
 dbms/tests/queries/0_stateless/01019_array_fill.sql   | 11 +++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01019_array_fill.reference
 create mode 100644 dbms/tests/queries/0_stateless/01019_array_fill.sql

diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.reference b/dbms/tests/queries/0_stateless/01019_array_fill.reference
new file mode 100644
index 00000000000..97841f800c7
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01019_array_fill.reference
@@ -0,0 +1,10 @@
+[1,2,3,11,12,13,4,5,6,14,15,16]
+[1,2,3,11,12,13,4,5,6,14,15,16]
+[1,1,1,1,1,1,1,1,1,1,1,1]
+[16,16,16,16,16,16,16,16,16,16,16,16]
+[1,1,1,11,12,13,13,13,13,14,15,16]
+[11,11,11,11,12,13,14,14,14,14,15,16]
+[1,1,3,11,12,12,12,5,6,14,14,14]
+[1,3,3,11,12,5,5,5,6,14,NULL,NULL]
+[1,1,3,11,11,11,11,5,6,14,14,14]
+[3,3,3,11,5,5,5,5,6,14,16,16]
diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.sql b/dbms/tests/queries/0_stateless/01019_array_fill.sql
new file mode 100644
index 00000000000..33e064d8cb1
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01019_array_fill.sql
@@ -0,0 +1,11 @@
+SELECT arrayFill(x -> 0, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
+SELECT arrayReverseFill(x -> 0, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
+SELECT arrayFill(x -> 1, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
+SELECT arrayReverseFill(x -> 1, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
+
+SELECT arrayFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
+SELECT arrayReverseFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
+SELECT arrayFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]);
+SELECT arrayReverseFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]);
+SELECT arrayFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]);
+SELECT arrayReverseFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]);

From 5f1b14e313d71e78830fd81a791cb7911a565cfa Mon Sep 17 00:00:00 2001
From: "philip.han" <philip.han@kakaocorp.com>
Date: Fri, 18 Oct 2019 17:41:54 +0900
Subject: [PATCH 041/222] Remove a dynamically sized array in BloomFilterHash.h

---
 dbms/src/Interpreters/BloomFilterHash.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index 0b458b6a7e9..e2af70b7ad8 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -66,8 +66,8 @@ struct BloomFilterHash
                 else
                 {
                     const DataTypeFixedString * fixed_string_type = typeid_cast<const DataTypeFixedString *>(data_type);
-                    const char value[fixed_string_type->getN()] = { 0, };
-                    return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(&value[0], fixed_string_type->getN())), 1);
+                    const std::vector<char> value(fixed_string_type->getN(), 0);
+                    return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1);
                 }
             }
         }

From 73e7131051d3ef20aebecbb0e39f4760ad18e952 Mon Sep 17 00:00:00 2001
From: achimbab <36371084+achimbab@users.noreply.github.com>
Date: Fri, 18 Oct 2019 22:29:42 +0900
Subject: [PATCH 042/222] Fixd mergetree.md about types and a function for
 bloom_filter.

---
 docs/en/operations/table_engines/mergetree.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md
index a8d4d62f2d0..b41816996b5 100644
--- a/docs/en/operations/table_engines/mergetree.md
+++ b/docs/en/operations/table_engines/mergetree.md
@@ -306,9 +306,9 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
 
     The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025.
 
-    Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`.
+    Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`.
 
-    The following functions can use it: [equals](../../query_language/functions/comparison_functions.md), [notEquals](../../query_language/functions/comparison_functions.md), [in](../../query_language/functions/in_functions.md), [notIn](../../query_language/functions/in_functions.md).
+    The following functions can use it: [equals](../../query_language/functions/comparison_functions.md), [notEquals](../../query_language/functions/comparison_functions.md), [in](../../query_language/functions/in_functions.md), [notIn](../../query_language/functions/in_functions.md), [has](../../query_language/functions/array_functions.md).
 
 ```sql
 INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4

From 2be06255b12db66ec020313e194961a2293e0dbf Mon Sep 17 00:00:00 2001
From: hcz <hczhcz@users.noreply.github.com>
Date: Fri, 18 Oct 2019 21:31:18 +0800
Subject: [PATCH 043/222] Update arrayFill.cpp

---
 dbms/src/Functions/array/arrayFill.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Functions/array/arrayFill.cpp b/dbms/src/Functions/array/arrayFill.cpp
index 4c2dc5659b8..b35c729010e 100644
--- a/dbms/src/Functions/array/arrayFill.cpp
+++ b/dbms/src/Functions/array/arrayFill.cpp
@@ -45,7 +45,8 @@ struct ArrayFillImpl
 
                 for (; end <= array_end; ++end)
                 {
-                    if (end == array_end || fill[end + 1] != fill[begin]) {
+                    if (end == array_end || fill[end + 1] != fill[begin])
+                    {
                         if (fill[begin])
                         {
                             if constexpr (Reverse)

From 5e32bf1ae7eaa4b22d5913084f91117a273bbbb4 Mon Sep 17 00:00:00 2001
From: "philip.han" <philip.han@kakaocorp.com>
Date: Sat, 19 Oct 2019 21:38:45 +0900
Subject: [PATCH 044/222] Removed unchecked type-casting

---
 dbms/src/Functions/FunctionHelpers.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp
index 4546b5dd8f0..0ab342b84ba 100644
--- a/dbms/src/Functions/FunctionHelpers.cpp
+++ b/dbms/src/Functions/FunctionHelpers.cpp
@@ -79,9 +79,8 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un
             }
             else if (auto * low_cardinality = checkAndGetColumn<ColumnLowCardinality>(*col.column))
             {
-                const DataTypePtr & low_cardinality_type = static_cast<const DataTypeLowCardinality &>(*col.type).getDictionaryType();
                 const auto & low_cardinality_col = low_cardinality->convertToFullColumnIfLowCardinality();
-                res.insert({low_cardinality_col, low_cardinality_type, col.name});
+                res.insert({low_cardinality_col, nested_type, col.name});
             }
             else
                 throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN);

From 79a27ece1a9654bc66596dabefe882be6ce41b71 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Mon, 21 Oct 2019 11:19:11 +0800
Subject: [PATCH 045/222] Flip the condition column in arrayFill

---
 dbms/src/Functions/array/arrayFill.cpp        | 40 +++++++++----------
 .../0_stateless/01019_array_fill.reference    |  8 ++--
 .../queries/0_stateless/01019_array_fill.sql  |  8 ++--
 3 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/dbms/src/Functions/array/arrayFill.cpp b/dbms/src/Functions/array/arrayFill.cpp
index 4c2dc5659b8..1b6d72027e0 100644
--- a/dbms/src/Functions/array/arrayFill.cpp
+++ b/dbms/src/Functions/array/arrayFill.cpp
@@ -47,6 +47,8 @@ struct ArrayFillImpl
                 {
                     if (end == array_end || fill[end + 1] != fill[begin]) {
                         if (fill[begin])
+                            out_data.insertRangeFrom(in_data, begin, end + 1 - begin);
+                        else
                         {
                             if constexpr (Reverse)
                             {
@@ -63,8 +65,6 @@ struct ArrayFillImpl
                                     out_data.insertManyFrom(in_data, begin - 1, end + 1 - begin);
                             }
                         }
-                        else
-                            out_data.insertRangeFrom(in_data, begin, end + 1 - begin);
 
                         begin = end + 1;
                     }
@@ -81,29 +81,27 @@ struct ArrayFillImpl
                 throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN);
 
             if (column_fill_const->getValue<UInt8>())
-            {
-                size_t array_begin = 0;
-                size_t array_end = 0;
-
-                out_data.reserve(in_data.size());
-
-                for (size_t i = 0; i < in_offsets.size(); ++i)
-                {
-                    array_end = in_offsets[i] - 1;
-
-                    if constexpr (Reverse)
-                        out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin);
-                    else
-                        out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin);
-
-                    array_begin = array_end + 1;
-                }
-            }
-            else
                 return ColumnArray::create(
                     array.getDataPtr(),
                     array.getOffsetsPtr()
                 );
+
+            size_t array_begin = 0;
+            size_t array_end = 0;
+
+            out_data.reserve(in_data.size());
+
+            for (size_t i = 0; i < in_offsets.size(); ++i)
+            {
+                array_end = in_offsets[i] - 1;
+
+                if constexpr (Reverse)
+                    out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin);
+                else
+                    out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin);
+
+                array_begin = array_end + 1;
+            }
         }
 
         return ColumnArray::create(
diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.reference b/dbms/tests/queries/0_stateless/01019_array_fill.reference
index 97841f800c7..08982beb62e 100644
--- a/dbms/tests/queries/0_stateless/01019_array_fill.reference
+++ b/dbms/tests/queries/0_stateless/01019_array_fill.reference
@@ -1,9 +1,9 @@
-[1,2,3,11,12,13,4,5,6,14,15,16]
-[1,2,3,11,12,13,4,5,6,14,15,16]
 [1,1,1,1,1,1,1,1,1,1,1,1]
 [16,16,16,16,16,16,16,16,16,16,16,16]
-[1,1,1,11,12,13,13,13,13,14,15,16]
-[11,11,11,11,12,13,14,14,14,14,15,16]
+[1,2,3,11,12,13,4,5,6,14,15,16]
+[1,2,3,11,12,13,4,5,6,14,15,16]
+[1,2,3,3,3,3,4,5,6,6,6,6]
+[1,2,3,4,4,4,4,5,6,16,16,16]
 [1,1,3,11,12,12,12,5,6,14,14,14]
 [1,3,3,11,12,5,5,5,6,14,NULL,NULL]
 [1,1,3,11,11,11,11,5,6,14,14,14]
diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.sql b/dbms/tests/queries/0_stateless/01019_array_fill.sql
index 33e064d8cb1..af48e8d0be4 100644
--- a/dbms/tests/queries/0_stateless/01019_array_fill.sql
+++ b/dbms/tests/queries/0_stateless/01019_array_fill.sql
@@ -5,7 +5,7 @@ SELECT arrayReverseFill(x -> 1, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
 
 SELECT arrayFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
 SELECT arrayReverseFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]);
-SELECT arrayFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]);
-SELECT arrayReverseFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]);
-SELECT arrayFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]);
-SELECT arrayReverseFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]);
+SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]);
+SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]);
+SELECT arrayFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0]);
+SELECT arrayReverseFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0]);

From f7d2e1b758703a3dc6bfb0d6e314c0aaff8fbbf1 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Sun, 20 Oct 2019 12:12:42 +0300
Subject: [PATCH 046/222] Added Pipe class. Updated
 MergeTreeDataSelectExecutor.

---
 .../src/Processors/Executors/TreeExecutor.cpp |  14 +--
 dbms/src/Processors/Executors/TreeExecutor.h  |  10 +-
 dbms/src/Processors/IProcessor.h              |   3 +
 dbms/src/Processors/ISource.h                 |   2 +
 dbms/src/Processors/Pipe.cpp                  |  87 +++++++++++++
 dbms/src/Processors/Pipe.h                    |  37 ++++++
 dbms/src/Storages/IStorage.h                  |   4 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 117 ++++++------------
 dbms/src/Storages/StorageMergeTree.cpp        |   1 +
 .../src/Storages/StorageReplicatedMergeTree.h |   1 +
 10 files changed, 185 insertions(+), 91 deletions(-)
 create mode 100644 dbms/src/Processors/Pipe.cpp
 create mode 100644 dbms/src/Processors/Pipe.h

diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp
index 94e2dfe5b5a..b53b32455ab 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.cpp
+++ b/dbms/src/Processors/Executors/TreeExecutor.cpp
@@ -79,13 +79,13 @@ void TreeExecutor::init()
     if (processors.empty())
         throw Exception("No processors were passed to TreeExecutor.", ErrorCodes::LOGICAL_ERROR);
 
-    root = processors.back().get();
+    root = &output_port.getProcessor();
 
     validateTree(processors, root, sources_with_progress);
 
-    port = std::make_unique<InputPort>(getHeader(), root);
-    connect(root->getOutputs().front(), *port);
-    port->setNeeded();
+    input_port = std::make_unique<InputPort>(getHeader(), root);
+    connect(output_port, *input_port);
+    input_port->setNeeded();
 }
 
 void TreeExecutor::execute()
@@ -170,11 +170,11 @@ Block TreeExecutor::readImpl()
 {
     while (true)
     {
-        if (port->isFinished())
+        if (input_port->isFinished())
             return {};
 
-        if (port->hasData())
-            return getHeader().cloneWithColumns(port->pull().detachColumns());
+        if (input_port->hasData())
+            return getHeader().cloneWithColumns(input_port->pull().detachColumns());
 
         execute();
     }
diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h
index 51fc82200b8..d4817d6c99b 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.h
+++ b/dbms/src/Processors/Executors/TreeExecutor.h
@@ -1,6 +1,6 @@
 #pragma once
 #include <DataStreams/IBlockInputStream.h>
-#include <Processors/IProcessor.h>
+#include <Processors/Pipe.h>
 
 namespace DB
 {
@@ -18,7 +18,10 @@ public:
     ///  * processors form a tree
     ///  * all processors are attainable from root
     ///  * there is no other connected processors
-    explicit TreeExecutor(Processors processors_) : processors(std::move(processors_)) { init(); }
+    explicit TreeExecutor(Pipe pipe) : output_port(pipe.getPort()), processors(std::move(pipe).detachProcessors())
+    {
+        init();
+    }
 
     String getName() const override { return root->getName(); }
     Block getHeader() const override { return root->getOutputs().front().getHeader(); }
@@ -35,9 +38,10 @@ protected:
     Block readImpl() override;
 
 private:
+    OutputPort & output_port;
     Processors processors;
     IProcessor * root = nullptr;
-    std::unique_ptr<InputPort> port;
+    std::unique_ptr<InputPort> input_port;
 
     /// Remember sources that support progress.
     std::vector<ISourceWithProgress *> sources_with_progress;
diff --git a/dbms/src/Processors/IProcessor.h b/dbms/src/Processors/IProcessor.h
index 7a9a6fee755..ed59f4e591d 100644
--- a/dbms/src/Processors/IProcessor.h
+++ b/dbms/src/Processors/IProcessor.h
@@ -226,6 +226,9 @@ public:
     auto & getInputs() { return inputs; }
     auto & getOutputs() { return outputs; }
 
+    const auto & getInputs() const { return inputs; }
+    const auto & getOutputs() const { return outputs; }
+
     /// Debug output.
     void dump() const;
 
diff --git a/dbms/src/Processors/ISource.h b/dbms/src/Processors/ISource.h
index b1669860192..9be21c3a398 100644
--- a/dbms/src/Processors/ISource.h
+++ b/dbms/src/Processors/ISource.h
@@ -27,4 +27,6 @@ public:
     const OutputPort & getPort() const { return output; }
 };
 
+using SourcePtr = std::shared_ptr<ISource>;
+
 }
diff --git a/dbms/src/Processors/Pipe.cpp b/dbms/src/Processors/Pipe.cpp
new file mode 100644
index 00000000000..4511b468061
--- /dev/null
+++ b/dbms/src/Processors/Pipe.cpp
@@ -0,0 +1,87 @@
+#include <Processors/Pipe.h>
+#include <IO/WriteHelpers.h>
+
+namespace DB
+{
+
+static void checkSingleInput(const IProcessor & transform)
+{
+    if (transform.getInputs().size() != 1)
+        throw Exception("Processor for pipe should have single input, "
+                        "but " + transform.getName() + " has " +
+                        toString(transform.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
+}
+
+static void checkMultipleInputs(const IProcessor & transform, size_t num_inputs)
+{
+    if (transform.getInputs().size() != num_inputs)
+        throw Exception("Processor for pipe should have " + toString(num_inputs) + " inputs, "
+                        "but " + transform.getName() + " has " +
+                        toString(transform.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
+}
+
+static void checkSingleOutput(const IProcessor & transform)
+{
+    if (transform.getOutputs().size() != 1)
+        throw Exception("Processor for pipe should have single output, "
+                        "but " + transform.getName() + " has " +
+                        toString(transform.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
+}
+
+static void checkSimpleTransform(const IProcessor & transform)
+{
+    checkSingleInput(transform);
+    checkSingleOutput(transform);
+}
+
+static void checkSource(const IProcessor & source)
+{
+    if (!source.getInputs().empty())
+        throw Exception("Source for pipe shouldn't have any input, but " + source.getName() + " has " +
+                        toString(source.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
+
+    if (source.getOutputs().empty())
+        throw Exception("Source for pipe should have single output, but it doesn't have any",
+                        ErrorCodes::LOGICAL_ERROR);
+
+    if (source.getOutputs().size() != 1)
+        throw Exception("Source for pipe should have single output, but " + source.getName() + " has " +
+                        toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
+}
+
+
+Pipe::Pipe(ProcessorPtr source)
+{
+    checkSource(*source);
+    output_port = &source->getOutputs().front();
+    processors.emplace_back(std::move(source));
+}
+
+Pipe::Pipe(Pipes && pipes, ProcessorPtr transform)
+{
+    checkSingleOutput(*transform);
+    checkMultipleInputs(*transform, pipes.size());
+
+    auto it = transform->getInputs().begin();
+
+    for (auto & pipe : pipes)
+    {
+        connect(*pipe.output_port, *it);
+        ++it;
+
+        processors.insert(processors.end(), pipe.processors.begin(), pipe.processors.end());
+    }
+
+    output_port = &transform->getOutputs().front();
+    processors.emplace_back(std::move(transform));
+}
+
+void Pipe::addSimpleTransform(ProcessorPtr transform)
+{
+    checkSimpleTransform(*transform);
+    connect(*output_port, transform->getInputs().front());
+    output_port = &transform->getOutputs().front();
+    processors.emplace_back(std::move(transform));
+}
+
+}
diff --git a/dbms/src/Processors/Pipe.h b/dbms/src/Processors/Pipe.h
new file mode 100644
index 00000000000..55b397c82d6
--- /dev/null
+++ b/dbms/src/Processors/Pipe.h
@@ -0,0 +1,37 @@
+#include <Processors/IProcessor.h>
+
+namespace DB
+{
+
+class Pipe;
+using Pipes = std::vector<Pipe>;
+
+/// Pipe is a set of processors which represents the part of pipeline with single output.
+/// All processors in pipe are connected. All ports are connected except the output one.
+class Pipe
+{
+public:
+    explicit Pipe(ProcessorPtr source);
+    Pipe(Pipes && pipes, ProcessorPtr transform);
+
+    Pipe(const Pipe & other) = delete;
+    Pipe(Pipe && other) = default;
+
+    Pipe & operator=(const Pipe & other) = delete;
+    Pipe & operator=(Pipe && other) = default;
+
+    OutputPort & getPort() const { return *output_port; }
+    const Block & getHeader() const { return output_port->getHeader(); }
+
+    /// Add transform to pipe. It must have single input and single output (is checked).
+    /// Input will be connected with current output port, output port will be updated.
+    void addSimpleTransform(ProcessorPtr transform);
+
+    Processors detachProcessors() && { return std::move(processors); }
+
+private:
+    Processors processors;
+    OutputPort * output_port = nullptr;
+};
+
+}
diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index b224f84be97..6958d7be54b 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -45,7 +45,9 @@ class PartitionCommands;
 class IProcessor;
 using ProcessorPtr = std::shared_ptr<IProcessor>;
 using Processors = std::vector<ProcessorPtr>;
-using Pipes = std::vector<Processors>;
+
+class Pipe;
+using Pipes = std::vector<Pipe>;
 
 struct ColumnSize
 {
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 3be9c2cc0dc..80c8b337536 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -632,34 +632,23 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
     if (use_sampling)
     {
         for (auto & pipe : res)
-        {
-            auto & output = pipe.back()->getOutputs().front();
-            pipe.emplace_back(std::make_shared<FilterTransform>(output.getHeader(), filter_expression, filter_function->getColumnName(), false));
-            connect(output, pipe.back()->getInputs().front());
-        }
+            pipe.addSimpleTransform(std::make_shared<FilterTransform>(
+                    pipe.getHeader(), filter_expression, filter_function->getColumnName(), false));
     }
 
     /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
     if (sample_factor_column_queried)
     {
         for (auto & pipe : res)
-        {
-            auto & output = pipe.back()->getOutputs().front();
-            pipe.emplace_back(std::make_shared<AddingConstColumnTransform<Float64>>(
-                    output.getHeader(), std::make_shared<DataTypeFloat64>(), used_sample_factor, "_sample_factor"));
-            connect(output, pipe.back()->getInputs().front());
-        }
+            pipe.addSimpleTransform(std::make_shared<AddingConstColumnTransform<Float64>>(
+                    pipe.getHeader(), std::make_shared<DataTypeFloat64>(), used_sample_factor, "_sample_factor"));
     }
 
     if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions)
     {
         for (auto & pipe : res)
-        {
-            auto & output = pipe.back()->getOutputs().front();
-            pipe.emplace_back(std::make_shared<ExpressionTransform>(
-                    output.getHeader(), query_info.prewhere_info->remove_columns_actions));
-            connect(output, pipe.back()->getInputs().front());
-        }
+            pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(
+                    pipe.getHeader(), query_info.prewhere_info->remove_columns_actions));
     }
 
     return res;
@@ -760,7 +749,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
                 source->addTotalRowsApprox(total_rows);
             }
 
-            res.push_back({std::move(source)});
+            res.emplace_back(std::move(source));
         }
     }
     else if (sum_marks > 0)
@@ -833,7 +822,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
                     use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io,
                     settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query);
 
-                res.push_back({std::move(source_processor)});
+                res.emplace_back(std::move(source_processor));
             }
         }
 
@@ -892,10 +881,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     if (sum_marks > max_marks_to_use_cache)
         use_uncompressed_cache = false;
 
-    Pipes pipes;
+    Pipes res;
 
     if (sum_marks == 0)
-        return pipes;
+        return res;
 
     /// Let's split ranges to avoid reading much data.
     auto split_ranges = [rows_granularity = data_settings->index_granularity, max_block_size](const auto & ranges, int direction)
@@ -949,8 +938,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     {
         size_t need_marks = min_marks_per_stream;
 
-        std::vector<OutputPort *> streams_per_thread;
-        Processors pipe;
+        Pipes pipes;
 
         /// Loop over parts.
         /// We will iteratively take part or some subrange of a part from the back
@@ -1012,58 +1000,44 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
 
             if (sorting_info->direction == 1)
             {
-                pipe.push_back({std::make_shared<MergeTreeSelectProcessor>(
+                pipes.emplace_back(std::make_shared<MergeTreeSelectProcessor>(
                     data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
                     settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part,
                     use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io,
-                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)});
+                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query));
             }
             else
             {
-                pipe.push_back({std::make_shared<MergeTreeReverseSelectProcessor>(
+                pipes.emplace_back(std::make_shared<MergeTreeReverseSelectProcessor>(
                     data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
                     settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part,
                     use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io,
-                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)});
+                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query));
 
-                auto & output = pipe.back()->getOutputs().front();
-                auto reverse_processor = std::make_shared<ReverseTransform>(output.getHeader());
-                connect(output, reverse_processor->getInputs().front());
-                pipe.emplace_back(std::move(reverse_processor));
+                pipes.back().addSimpleTransform(std::make_shared<ReverseTransform>(pipes.back().getHeader()));
             }
-
-            streams_per_thread.emplace_back(&pipe.back()->getOutputs().front());
         }
 
-        if (streams_per_thread.size() > 1)
+        if (pipes.size() > 1)
         {
             SortDescription sort_description;
             for (size_t j = 0; j < query_info.sorting_info->prefix_order_descr.size(); ++j)
                 sort_description.emplace_back(data.sorting_key_columns[j],
                     sorting_info->direction, 1);
 
-            for (auto & stream : streams_per_thread)
-            {
-                pipe.emplace_back(std::make_shared<ExpressionTransform>(stream->getHeader(), sorting_key_prefix_expr));
-                connect(*stream, pipe.back()->getInputs().front());
-                stream = &pipe.back()->getOutputs().front();
-            }
+            for (auto & pipe : pipes)
+                pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(pipe.getHeader(), sorting_key_prefix_expr));
 
-            pipe.push_back(std::make_shared<MergingSortedTransform>(
-                streams_per_thread.back()->getHeader(), streams_per_thread.size(), sort_description, max_block_size));
+            auto merging_sorted = std::make_shared<MergingSortedTransform>(
+                pipes.back().getHeader(), pipes.size(), sort_description, max_block_size);
 
-            auto it = streams_per_thread.begin();
-            for (auto & input : pipe.back()->getInputs())
-            {
-                connect(**it, input);
-                ++it;
-            }
+            res.emplace_back(std::move(pipes), std::move(merging_sorted));
         }
-
-        pipes.push_back(std::move(pipe));
+        else
+            res.emplace_back(std::move(pipes.front()));
     }
 
-    return pipes;
+    return res;
 }
 
 
@@ -1102,7 +1076,6 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         use_uncompressed_cache = false;
 
     Pipes pipes;
-    std::vector<OutputPort *> to_merge;
 
     /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL
 
@@ -1116,13 +1089,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
             query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true,
             virt_columns, part.part_index_in_query);
 
-        auto & output = source_processor->getPort();
-        auto expression_transform = std::make_shared<ExpressionTransform>(output.getHeader(), data.sorting_key_expr);
-        connect(output, expression_transform->getInputPort());
-
-        to_merge.emplace_back(&expression_transform->getOutputPort());
-
-        Processors pipe { std::move(source_processor), std::move(expression_transform) };
+        Pipe pipe(std::move(source_processor));
+        pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(pipe.getHeader(), data.sorting_key_expr));
         pipes.emplace_back(std::move(pipe));
     }
 
@@ -1131,31 +1099,34 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     size_t sort_columns_size = sort_columns.size();
     sort_description.reserve(sort_columns_size);
 
-    Block header = to_merge.at(0)->getHeader();
+    Block header = pipes.at(0).getHeader();
     for (size_t i = 0; i < sort_columns_size; ++i)
         sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
 
     auto streams_to_merge = [&]()
     {
-        size_t num_streams = to_merge.size();
+        size_t num_streams = pipes.size();
 
         BlockInputStreams streams;
         streams.reserve(num_streams);
 
         for (size_t i = 0; i < num_streams; ++i)
-            streams.emplace_back(std::make_shared<TreeExecutor>(pipes[i]));
+            streams.emplace_back(std::make_shared<TreeExecutor>(std::move(pipes[i])));
 
         pipes.clear();
         return streams;
     };
 
-    ProcessorPtr merged_processor;
     BlockInputStreamPtr merged;
     switch (data.merging_params.mode)
     {
         case MergeTreeData::MergingParams::Ordinary:
-            merged_processor = std::make_shared<MergingSortedTransform>(header, to_merge.size(), sort_description, max_block_size);
+        {
+            auto merged_processor =
+                    std::make_shared<MergingSortedTransform>(header, pipes.size(), sort_description, max_block_size);
+            pipes.emplace_back(std::move(pipes), std::move(merged_processor));
             break;
+        }
 
         case MergeTreeData::MergingParams::Collapsing:
             merged = std::make_shared<CollapsingFinalBlockInputStream>(
@@ -1186,23 +1157,9 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     }
 
     if (merged)
-        return {{std::make_shared<SourceFromInputStream>(merged)}};
+        pipes.emplace_back(std::make_shared<SourceFromInputStream>(merged));
 
-    auto it = to_merge.begin();
-    for (auto & input : merged_processor->getInputs())
-    {
-        connect(**it, input);
-        ++it;
-    }
-
-    Processors result;
-    result.reserve(2 * pipes.size() + 1);
-    for (auto & pipe : pipes)
-        for (auto & processor : pipe)
-            result.emplace_back(std::move(processor));
-
-    result.emplace_back(merged_processor);
-    return {result};
+    return pipes;
 }
 
 
diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp
index df541810258..243072b6d07 100644
--- a/dbms/src/Storages/StorageMergeTree.cpp
+++ b/dbms/src/Storages/StorageMergeTree.cpp
@@ -25,6 +25,7 @@
 #include <Poco/File.h>
 #include <optional>
 #include <Interpreters/MutationsInterpreter.h>
+#include <Processors/Pipe.h>
 
 
 namespace DB
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h
index 90d36c1369e..fc6421b7a1c 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.h
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.h
@@ -27,6 +27,7 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/LeaderElection.h>
 #include <Core/BackgroundSchedulePool.h>
+#include <Processors/Pipe.h>
 
 
 namespace DB

From 4ca83a8eb58ebbf1bd091e4d4d817a7ce8035fff Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 21 Oct 2019 18:24:15 +0300
Subject: [PATCH 047/222] Disable processors by default.

---
 dbms/src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 8f2474982a0..c65b7eb17c6 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -380,7 +380,7 @@ struct Settings : public SettingsCollection<Settings>
     \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
-    M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \
+    M(SettingBool, allow_experimental_low_cardinality_type, false, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \
     M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13") \
 
     DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)

From 2b334a4adbeb5146e382b75b7b13b38accdc9244 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 21 Oct 2019 19:26:29 +0300
Subject: [PATCH 048/222] Review fixes.

---
 dbms/src/DataStreams/ExecutionSpeedLimits.cpp | 33 +++++++++--------
 dbms/src/DataStreams/ExecutionSpeedLimits.h   | 13 +++----
 .../Interpreters/InterpreterSelectQuery.cpp   |  8 ++---
 dbms/src/Processors/Chunk.cpp                 |  9 +++++
 dbms/src/Processors/Chunk.h                   |  7 ++--
 ...r.cpp => TreeExecutorBlockInputStream.cpp} | 36 +++++++++----------
 ...cutor.h => TreeExecutorBlockInputStream.h} |  6 ++--
 .../Transforms/AddingConstColumnTransform.h   |  6 +---
 .../Transforms/MergingSortedTransform.cpp     |  2 +-
 dbms/src/Storages/IStorage.cpp                |  4 +--
 .../MergeTreeBaseSelectProcessor.cpp          | 12 +++----
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  6 ++--
 .../MergeTree/StorageFromMergeTreeDataPart.h  |  4 +--
 13 files changed, 79 insertions(+), 67 deletions(-)
 rename dbms/src/Processors/Executors/{TreeExecutor.cpp => TreeExecutorBlockInputStream.cpp} (83%)
 rename dbms/src/Processors/Executors/{TreeExecutor.h => TreeExecutorBlockInputStream.h} (86%)

diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.cpp b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp
index 8886ca4b2b8..532c693bd47 100644
--- a/dbms/src/DataStreams/ExecutionSpeedLimits.cpp
+++ b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp
@@ -37,10 +37,13 @@ static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_i
     }
 }
 
-void ExecutionSpeedLimits::throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds)
+void ExecutionSpeedLimits::throttle(
+    size_t read_rows, size_t read_bytes,
+    size_t total_rows_to_read, UInt64 total_elapsed_microseconds)
 {
-    if ((min_execution_speed || max_execution_speed || min_execution_speed_bytes ||
-         max_execution_speed_bytes || (total_rows && timeout_before_checking_execution_speed != 0)) &&
+    if ((min_execution_rps != 0 || max_execution_rps != 0
+         || min_execution_bps != 0 || max_execution_bps != 0
+         || (total_rows_to_read != 0 && timeout_before_checking_execution_speed != 0)) &&
         (static_cast<Int64>(total_elapsed_microseconds) > timeout_before_checking_execution_speed.totalMicroseconds()))
     {
         /// Do not count sleeps in throttlers
@@ -52,33 +55,35 @@ void ExecutionSpeedLimits::throttle(size_t read_rows, size_t read_bytes, size_t
 
         if (elapsed_seconds > 0)
         {
-            if (min_execution_speed && read_rows / elapsed_seconds < min_execution_speed)
+            auto rows_per_second = read_rows / elapsed_seconds;
+            if (min_execution_rps && rows_per_second < min_execution_rps)
                 throw Exception("Query is executing too slow: " + toString(read_rows / elapsed_seconds)
-                                + " rows/sec., minimum: " + toString(min_execution_speed),
+                                + " rows/sec., minimum: " + toString(min_execution_rps),
                                 ErrorCodes::TOO_SLOW);
 
-            if (min_execution_speed_bytes && read_bytes / elapsed_seconds < min_execution_speed_bytes)
+            auto bytes_per_second = read_bytes / elapsed_seconds;
+            if (min_execution_bps && bytes_per_second < min_execution_bps)
                 throw Exception("Query is executing too slow: " + toString(read_bytes / elapsed_seconds)
-                                + " bytes/sec., minimum: " + toString(min_execution_speed_bytes),
+                                + " bytes/sec., minimum: " + toString(min_execution_bps),
                                 ErrorCodes::TOO_SLOW);
 
             /// If the predicted execution time is longer than `max_execution_time`.
-            if (max_execution_time != 0 && total_rows && read_rows)
+            if (max_execution_time != 0 && total_rows_to_read && read_rows)
             {
-                double estimated_execution_time_seconds = elapsed_seconds * (static_cast<double>(total_rows) / read_rows);
+                double estimated_execution_time_seconds = elapsed_seconds * (static_cast<double>(total_rows_to_read) / read_rows);
 
                 if (estimated_execution_time_seconds > max_execution_time.totalSeconds())
                     throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)"
                                     + " is too long. Maximum: " + toString(max_execution_time.totalSeconds())
-                                    + ". Estimated rows to process: " + toString(total_rows),
+                                    + ". Estimated rows to process: " + toString(total_rows_to_read),
                                     ErrorCodes::TOO_SLOW);
             }
 
-            if (max_execution_speed && read_rows / elapsed_seconds >= max_execution_speed)
-                limitProgressingSpeed(read_rows, max_execution_speed, total_elapsed_microseconds);
+            if (max_execution_rps && rows_per_second >= max_execution_rps)
+                limitProgressingSpeed(read_rows, max_execution_rps, total_elapsed_microseconds);
 
-            if (max_execution_speed_bytes && read_bytes / elapsed_seconds >= max_execution_speed_bytes)
-                limitProgressingSpeed(read_bytes, max_execution_speed_bytes, total_elapsed_microseconds);
+            if (max_execution_bps && bytes_per_second >= max_execution_bps)
+                limitProgressingSpeed(read_bytes, max_execution_bps, total_elapsed_microseconds);
         }
     }
 }
diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.h b/dbms/src/DataStreams/ExecutionSpeedLimits.h
index 6dbc2e5c687..a067fc86000 100644
--- a/dbms/src/DataStreams/ExecutionSpeedLimits.h
+++ b/dbms/src/DataStreams/ExecutionSpeedLimits.h
@@ -7,21 +7,22 @@ namespace DB
 {
 
 /// Limits for query execution speed.
-/// In rows per second.
 class ExecutionSpeedLimits
 {
 public:
-    size_t min_execution_speed = 0;
-    size_t max_execution_speed = 0;
-    size_t min_execution_speed_bytes = 0;
-    size_t max_execution_speed_bytes = 0;
+    /// For rows per second.
+    size_t min_execution_rps = 0;
+    size_t max_execution_rps = 0;
+    /// For bytes per second.
+    size_t min_execution_bps = 0;
+    size_t max_execution_bps = 0;
 
     Poco::Timespan max_execution_time = 0;
     /// Verify that the speed is not too low after the specified time has elapsed.
     Poco::Timespan timeout_before_checking_execution_speed = 0;
 
     /// Pause execution in case if speed limits were exceeded.
-    void throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds);
+    void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds);
 };
 
 }
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index fe41080d033..9c6334981e3 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1568,10 +1568,10 @@ void InterpreterSelectQuery::executeFetchColumns(
               */
             if (options.to_stage == QueryProcessingStage::Complete)
             {
-                limits.speed_limits.min_execution_speed = settings.min_execution_speed;
-                limits.speed_limits.max_execution_speed = settings.max_execution_speed;
-                limits.speed_limits.min_execution_speed_bytes = settings.min_execution_speed_bytes;
-                limits.speed_limits.max_execution_speed_bytes = settings.max_execution_speed_bytes;
+                limits.speed_limits.min_execution_rps = settings.min_execution_speed;
+                limits.speed_limits.max_execution_rps = settings.max_execution_speed;
+                limits.speed_limits.min_execution_bps = settings.min_execution_speed_bytes;
+                limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes;
                 limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
             }
 
diff --git a/dbms/src/Processors/Chunk.cpp b/dbms/src/Processors/Chunk.cpp
index 253eb1e91b1..4be0502f604 100644
--- a/dbms/src/Processors/Chunk.cpp
+++ b/dbms/src/Processors/Chunk.cpp
@@ -97,6 +97,15 @@ Columns Chunk::detachColumns()
     return std::move(columns);
 }
 
+void Chunk::addColumn(ColumnPtr column)
+{
+    if (column->size() != num_rows)
+        throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " +
+                        toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
+
+    columns.emplace_back(std::move(column))
+}
+
 void Chunk::erase(size_t position)
 {
     if (columns.empty())
diff --git a/dbms/src/Processors/Chunk.h b/dbms/src/Processors/Chunk.h
index 7e33d8cf1c0..28e9bde56e5 100644
--- a/dbms/src/Processors/Chunk.h
+++ b/dbms/src/Processors/Chunk.h
@@ -72,11 +72,12 @@ public:
 
     UInt64 getNumRows() const { return num_rows; }
     UInt64 getNumColumns() const { return columns.size(); }
-    bool hasNoRows() const { return num_rows == 0; }
-    bool hasNoColumns() const { return columns.empty(); }
-    bool empty() const { return hasNoRows() && hasNoColumns(); }
+    bool hasRows() const { return num_rows > 0; }
+    bool hasColumns() const { return !columns.empty(); }
+    bool empty() const { return !hasRows() && !hasColumns(); }
     operator bool() const { return !empty(); }
 
+    void addColumn(ColumnPtr column);
     void erase(size_t position);
 
     UInt64 bytes() const;
diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp
similarity index 83%
rename from dbms/src/Processors/Executors/TreeExecutor.cpp
rename to dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp
index b53b32455ab..5d632bdcef5 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.cpp
+++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp
@@ -1,4 +1,4 @@
-#include <Processors/Executors/TreeExecutor.h>
+#include <Processors/Executors/TreeExecutorBlockInputStream.h>
 #include <Processors/Sources/SourceWithProgress.h>
 #include <stack>
 
@@ -9,7 +9,7 @@ static void checkProcessorHasSingleOutput(IProcessor * processor)
 {
     size_t num_outputs = processor->getOutputs().size();
     if (num_outputs != 1)
-        throw Exception("All processors in TreeExecutor must have single output, "
+        throw Exception("All processors in TreeExecutorBlockInputStream must have single output, "
                         "but processor with name " + processor->getName() + " has " + std::to_string(num_outputs),
                         ErrorCodes::LOGICAL_ERROR);
 }
@@ -25,7 +25,7 @@ static void validateTree(const Processors & processors, IProcessor * root, std::
         bool is_inserted = index.try_emplace(processor.get(), index.size()).second;
 
         if (!is_inserted)
-            throw Exception("Duplicate processor in TreeExecutor with name " + processor->getName(),
+            throw Exception("Duplicate processor in TreeExecutorBlockInputStream with name " + processor->getName(),
                             ErrorCodes::LOGICAL_ERROR);
     }
 
@@ -43,13 +43,13 @@ static void validateTree(const Processors & processors, IProcessor * root, std::
 
         if (it == index.end())
             throw Exception("Processor with name " + node->getName() + " "
-                            "was not mentioned in list passed to TreeExecutor, "
+                            "was not mentioned in list passed to TreeExecutorBlockInputStream, "
                             "but was traversed to from other processors.", ErrorCodes::LOGICAL_ERROR);
 
         size_t position = it->second;
 
         if (is_visited[position])
-            throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutor. "
+            throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutorBlockInputStream. "
                             "Passed processors are not tree.", ErrorCodes::LOGICAL_ERROR);
 
         is_visited[position] = true;
@@ -71,13 +71,13 @@ static void validateTree(const Processors & processors, IProcessor * root, std::
     for (size_t i = 0; i < is_visited.size(); ++i)
         if (!is_visited[i])
             throw Exception("Processor with name " + processors[i]->getName() +
-                            " was not visited by traverse in TreeExecutor.", ErrorCodes::LOGICAL_ERROR);
+                            " was not visited by traverse in TreeExecutorBlockInputStream.", ErrorCodes::LOGICAL_ERROR);
 }
 
-void TreeExecutor::init()
+void TreeExecutorBlockInputStream::init()
 {
     if (processors.empty())
-        throw Exception("No processors were passed to TreeExecutor.", ErrorCodes::LOGICAL_ERROR);
+        throw Exception("No processors were passed to TreeExecutorBlockInputStream.", ErrorCodes::LOGICAL_ERROR);
 
     root = &output_port.getProcessor();
 
@@ -88,7 +88,7 @@ void TreeExecutor::init()
     input_port->setNeeded();
 }
 
-void TreeExecutor::execute()
+void TreeExecutorBlockInputStream::execute()
 {
     std::stack<IProcessor *> stack;
     stack.push(root);
@@ -120,7 +120,7 @@ void TreeExecutor::execute()
 
                 if (inputs.empty())
                     throw Exception("Processors " + node->getName() + " with empty input "
-                                    "has returned NeedData in TreeExecutor", ErrorCodes::LOGICAL_ERROR);
+                                    "has returned NeedData in TreeExecutorBlockInputStream", ErrorCodes::LOGICAL_ERROR);
 
                 bool all_finished = true;
 
@@ -135,7 +135,7 @@ void TreeExecutor::execute()
                 }
 
                 if (all_finished)
-                    throw Exception("Processors " + node->getName() + " has returned NeedData in TreeExecutor, "
+                    throw Exception("Processors " + node->getName() + " has returned NeedData in TreeExecutorBlockInputStream, "
                                     "but all it's inputs are finished.", ErrorCodes::LOGICAL_ERROR);
                 break;
             }
@@ -160,13 +160,13 @@ void TreeExecutor::execute()
             {
                 throw Exception("Processor with name " + node->getName() + " "
                                 "returned status " + IProcessor::statusToName(status) + " "
-                                "which is not supported in TreeExecutor.", ErrorCodes::LOGICAL_ERROR);
+                                "which is not supported in TreeExecutorBlockInputStream.", ErrorCodes::LOGICAL_ERROR);
             }
         }
     }
 }
 
-Block TreeExecutor::readImpl()
+Block TreeExecutorBlockInputStream::readImpl()
 {
     while (true)
     {
@@ -180,31 +180,31 @@ Block TreeExecutor::readImpl()
     }
 }
 
-void TreeExecutor::setProgressCallback(const ProgressCallback & callback)
+void TreeExecutorBlockInputStream::setProgressCallback(const ProgressCallback & callback)
 {
     for (auto & source : sources_with_progress)
         source->setProgressCallback(callback);
 }
 
-void TreeExecutor::setProcessListElement(QueryStatus * elem)
+void TreeExecutorBlockInputStream::setProcessListElement(QueryStatus * elem)
 {
     for (auto & source : sources_with_progress)
         source->setProcessListElement(elem);
 }
 
-void TreeExecutor::setLimits(const IBlockInputStream::LocalLimits & limits_)
+void TreeExecutorBlockInputStream::setLimits(const IBlockInputStream::LocalLimits & limits_)
 {
     for (auto & source : sources_with_progress)
         source->setLimits(limits_);
 }
 
-void TreeExecutor::setQuota(QuotaForIntervals & quota_)
+void TreeExecutorBlockInputStream::setQuota(QuotaForIntervals & quota_)
 {
     for (auto & source : sources_with_progress)
         source->setQuota(quota_);
 }
 
-void TreeExecutor::addTotalRowsApprox(size_t value)
+void TreeExecutorBlockInputStream::addTotalRowsApprox(size_t value)
 {
     /// Add only for one source.
     if (!sources_with_progress.empty())
diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h
similarity index 86%
rename from dbms/src/Processors/Executors/TreeExecutor.h
rename to dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h
index d4817d6c99b..da1d60dd972 100644
--- a/dbms/src/Processors/Executors/TreeExecutor.h
+++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h
@@ -9,8 +9,8 @@ class ISourceWithProgress;
 
 /// It's a wrapper from processors tree-shaped pipeline to block input stream.
 /// Execute all processors in a single thread, by in-order tree traverse.
-/// Also, support fro progress and quotas.
-class TreeExecutor : public IBlockInputStream
+/// Also, support for progress and quotas.
+class TreeExecutorBlockInputStream : public IBlockInputStream
 {
 public:
     /// Last processor in list must be a tree root.
@@ -18,7 +18,7 @@ public:
     ///  * processors form a tree
     ///  * all processors are attainable from root
     ///  * there is no other connected processors
-    explicit TreeExecutor(Pipe pipe) : output_port(pipe.getPort()), processors(std::move(pipe).detachProcessors())
+    explicit TreeExecutorBlockInputStream(Pipe pipe) : output_port(pipe.getPort()), processors(std::move(pipe).detachProcessors())
     {
         init();
     }
diff --git a/dbms/src/Processors/Transforms/AddingConstColumnTransform.h b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h
index aea9ee392b5..26d70d27ca7 100644
--- a/dbms/src/Processors/Transforms/AddingConstColumnTransform.h
+++ b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h
@@ -19,11 +19,7 @@ protected:
     void transform(Chunk & chunk) override
     {
         auto num_rows = chunk.getNumRows();
-        auto columns = chunk.detachColumns();
-
-        columns.emplace_back(data_type->createColumnConst(num_rows, value)->convertToFullColumnIfConst());
-
-        chunk.setColumns(std::move(columns), num_rows);
+        chunk.addColumn(data_type->createColumnConst(num_rows, value)->convertToFullColumnIfConst());
     }
 
 private:
diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
index e37eae82de1..3a9cbe23873 100644
--- a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
+++ b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
@@ -176,7 +176,7 @@ IProcessor::Status MergingSortedTransform::prepare()
                     return Status::NeedData;
 
                 auto chunk = input.pull();
-                if (chunk.hasNoRows())
+                if (!chunk.hasRows())
                     return Status::NeedData;
 
                 updateCursor(std::move(chunk), next_input_to_read);
diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp
index c271f69090a..7a2960a1335 100644
--- a/dbms/src/Storages/IStorage.cpp
+++ b/dbms/src/Storages/IStorage.cpp
@@ -5,7 +5,7 @@
 #include <Parsers/ASTSetQuery.h>
 #include <Common/quoteString.h>
 
-#include <Processors/Executors/TreeExecutor.h>
+#include <Processors/Executors/TreeExecutorBlockInputStream.h>
 
 #include <sparsehash/dense_hash_map>
 #include <sparsehash/dense_hash_set>
@@ -440,7 +440,7 @@ BlockInputStreams IStorage::read(
     res.reserve(pipes.size());
 
     for (auto & pipe : pipes)
-        res.emplace_back(std::make_shared<TreeExecutor>(std::move(pipe)));
+        res.emplace_back(std::make_shared<TreeExecutorBlockInputStream>(std::move(pipe)));
 
     return res;
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 17c5e4609c7..44a4e939565 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -59,7 +59,7 @@ Chunk MergeTreeBaseSelectProcessor::generate()
 
         auto res = readFromPart();
 
-        if (!res.hasNoRows())
+        if (res.hasRows())
         {
             injectVirtualColumns(res, task.get(), virt_column_names);
             return res;
@@ -231,7 +231,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeString().createColumn();
 
-                callback.template insert<DataTypeString>(column, virtual_column_name);
+                callback.template operator()<DataTypeString>(column, virtual_column_name);
             }
             else if (virtual_column_name == "_part_index")
             {
@@ -241,7 +241,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeUInt64().createColumn();
 
-                callback.template insert<DataTypeUInt64>(column, virtual_column_name);
+                callback.template operator()<DataTypeUInt64>(column, virtual_column_name);
             }
             else if (virtual_column_name == "_partition_id")
             {
@@ -251,7 +251,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeString().createColumn();
 
-                callback.template insert<DataTypeString>(column, virtual_column_name);
+                callback.template operator()<DataTypeString>(column, virtual_column_name);
             }
         }
     }
@@ -262,7 +262,7 @@ namespace
     struct InsertIntoBlockCallback
     {
         template <typename DataType>
-        void insert(const ColumnPtr & column, const String & name)
+        void operator()(const ColumnPtr & column, const String & name)
         {
             block.insert({column, std::make_shared<DataType>(), name});
         }
@@ -273,7 +273,7 @@ namespace
     struct InsertIntoColumnsCallback
     {
         template <typename>
-        void insert(const ColumnPtr & column, const String &)
+        void operator()(const ColumnPtr & column, const String &)
         {
             columns.push_back(column);
         }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 80c8b337536..42648bce692 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -58,7 +58,7 @@ namespace std
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/Transforms/ReverseTransform.h>
 #include <Processors/Transforms/MergingSortedTransform.h>
-#include <Processors/Executors/TreeExecutor.h>
+#include <Processors/Executors/TreeExecutorBlockInputStream.h>
 #include <Processors/Sources/SourceFromInputStream.h>
 
 namespace ProfileEvents
@@ -1103,7 +1103,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     for (size_t i = 0; i < sort_columns_size; ++i)
         sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
 
-    auto streams_to_merge = [&]()
+    auto streams_to_merge = [&pipes]()
     {
         size_t num_streams = pipes.size();
 
@@ -1111,7 +1111,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         streams.reserve(num_streams);
 
         for (size_t i = 0; i < num_streams; ++i)
-            streams.emplace_back(std::make_shared<TreeExecutor>(std::move(pipes[i])));
+            streams.emplace_back(std::make_shared<TreeExecutorBlockInputStream>(std::move(pipes[i])));
 
         pipes.clear();
         return streams;
diff --git a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
index 37a3b931fa8..506a8cc3298 100644
--- a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
+++ b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
@@ -6,7 +6,7 @@
 #include <Core/Defines.h>
 
 #include <ext/shared_ptr_helper.h>
-#include <Processors/Executors/TreeExecutor.h>
+#include <Processors/Executors/TreeExecutorBlockInputStream.h>
 
 
 namespace DB
@@ -36,7 +36,7 @@ public:
         streams.reserve(pipes.size());
 
         for (auto & pipe : pipes)
-            streams.emplace_back(std::make_shared<TreeExecutor>(std::move(pipe)));
+            streams.emplace_back(std::make_shared<TreeExecutorBlockInputStream>(std::move(pipe)));
 
         return streams;
     }

From f024b007a2e9d04721121e61028234a0f19545ad Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Mon, 21 Oct 2019 20:57:26 +0300
Subject: [PATCH 049/222] default_merge_block_size setting

---
 .../MergeTree/MergeTreeDataMergerMutator.cpp   | 18 +++++++++++-------
 .../src/Storages/MergeTree/MergeTreeSettings.h |  1 +
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index df3720359d3..344210c348c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -683,42 +683,46 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     /// If merge is vertical we cannot calculate it
     bool blocks_are_granules_size = (merge_alg == MergeAlgorithm::Vertical);
 
+    UInt64 merge_block_size = data_settings->default_merge_block_size;
     switch (data.merging_params.mode)
     {
         case MergeTreeData::MergingParams::Ordinary:
             merged_stream = std::make_unique<MergingSortedBlockInputStream>(
-                src_streams, sort_description, DEFAULT_MERGE_BLOCK_SIZE, 0, rows_sources_write_buf.get(), true, blocks_are_granules_size);
+                src_streams, sort_description, merge_block_size, 0, rows_sources_write_buf.get(), true, blocks_are_granules_size);
             break;
 
         case MergeTreeData::MergingParams::Collapsing:
             merged_stream = std::make_unique<CollapsingSortedBlockInputStream>(
-                src_streams, sort_description, data.merging_params.sign_column, DEFAULT_MERGE_BLOCK_SIZE, rows_sources_write_buf.get(), blocks_are_granules_size);
+                src_streams, sort_description, data.merging_params.sign_column,
+                merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size);
             break;
 
         case MergeTreeData::MergingParams::Summing:
             merged_stream = std::make_unique<SummingSortedBlockInputStream>(
-                src_streams, sort_description, data.merging_params.columns_to_sum, DEFAULT_MERGE_BLOCK_SIZE);
+                src_streams, sort_description, data.merging_params.columns_to_sum, merge_block_size);
             break;
 
         case MergeTreeData::MergingParams::Aggregating:
             merged_stream = std::make_unique<AggregatingSortedBlockInputStream>(
-                src_streams, sort_description, DEFAULT_MERGE_BLOCK_SIZE);
+                src_streams, sort_description, merge_block_size);
             break;
 
         case MergeTreeData::MergingParams::Replacing:
             merged_stream = std::make_unique<ReplacingSortedBlockInputStream>(
-                src_streams, sort_description, data.merging_params.version_column, DEFAULT_MERGE_BLOCK_SIZE, rows_sources_write_buf.get(), blocks_are_granules_size);
+                src_streams, sort_description, data.merging_params.version_column,
+                merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size);
             break;
 
         case MergeTreeData::MergingParams::Graphite:
             merged_stream = std::make_unique<GraphiteRollupSortedBlockInputStream>(
-                src_streams, sort_description, DEFAULT_MERGE_BLOCK_SIZE,
+                src_streams, sort_description, merge_block_size,
                 data.merging_params.graphite_params, time_of_merge);
             break;
 
         case MergeTreeData::MergingParams::VersionedCollapsing:
             merged_stream = std::make_unique<VersionedCollapsingSortedBlockInputStream>(
-                src_streams, sort_description, data.merging_params.sign_column, DEFAULT_MERGE_BLOCK_SIZE, rows_sources_write_buf.get(), blocks_are_granules_size);
+                src_streams, sort_description, data.merging_params.sign_column,
+                merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size);
             break;
     }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
index 3652718451f..1e796415254 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
@@ -29,6 +29,7 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
     M(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \
     \
     /** Merge settings. */ \
+    M(SettingUInt64, default_merge_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.") \
     M(SettingUInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).") \
     M(SettingUInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).") \
     M(SettingUInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.") \

From e7ba48ee84acb52660a9a44d942738e2e140aca2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 22 Oct 2019 02:49:44 +0300
Subject: [PATCH 050/222] Fix build.

---
 dbms/src/Processors/Chunk.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Processors/Chunk.cpp b/dbms/src/Processors/Chunk.cpp
index 4be0502f604..d9d0574d3b8 100644
--- a/dbms/src/Processors/Chunk.cpp
+++ b/dbms/src/Processors/Chunk.cpp
@@ -103,7 +103,7 @@ void Chunk::addColumn(ColumnPtr column)
         throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " +
                         toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
 
-    columns.emplace_back(std::move(column))
+    columns.emplace_back(std::move(column));
 }
 
 void Chunk::erase(size_t position)

From dad1e397e2ad628be4b9e7d2f8f98281b6196b16 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 22 Oct 2019 02:51:36 +0300
Subject: [PATCH 051/222] Fix build.

---
 dbms/src/Processors/Transforms/MergingSortedTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
index 3a9cbe23873..d8f06a7fe4a 100644
--- a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
+++ b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
@@ -132,7 +132,7 @@ IProcessor::Status MergingSortedTransform::prepare()
             }
 
             auto chunk = input.pull();
-            if (chunk.hasNoRows())
+            if (!chunk.hasRows())
             {
                 all_inputs_has_data = false;
                 continue;

From 645bcdacaed05b511d2cce9ac1135adab5460289 Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Tue, 22 Oct 2019 10:23:07 +0800
Subject: [PATCH 052/222] add docs and simplify the code

---
 dbms/src/Functions/array/arrayCompact.cpp     | 38 +------------------
 .../functions/array_functions.md              | 19 +++++++++-
 2 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index 0775c4cb7bb..bd3d8fa303e 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -3,7 +3,6 @@
 #include "FunctionArrayMapped.h"
 #include <Functions/FunctionFactory.h>
 
-
 namespace DB
 {
 /// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays
@@ -14,6 +13,7 @@ namespace DB
 
     struct ArrayCompactImpl
     {
+        static bool useDefaultImplementationForConstants() { return true; }
         static bool needBoolean() { return false; }
         static bool needExpression() { return false; }
         static bool needOneArray() { return false; }
@@ -41,40 +41,7 @@ namespace DB
             const ColumnVector<Element> * column = checkAndGetColumn<ColumnVector<Element>>(&*mapped);
 
             if (!column)
-            {
-                const ColumnConst * column_const = checkAndGetColumnConst<ColumnVector<Element>>(&*mapped);
-
-                if (!column_const)
-                    return false;
-
-                const Element x = column_const->template getValue<Element>();
-                const IColumn::Offsets & offsets = array.getOffsets();
-                auto column_data = ColumnVector<Result>::create(column_const->size());
-                typename ColumnVector<Result>::Container & res_values = column_data->getData();
-                auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size());
-                IColumn::Offsets & res_offsets = column_offsets->getData();
-
-                size_t res_pos = 0;
-                size_t pos = 0;
-                for (size_t i = 0; i < offsets.size(); ++i)
-                {
-                    if (pos < offsets[i])
-                    {
-                        res_values[res_pos] = x;
-                        for (++pos, ++res_pos; pos < offsets[i]; ++pos)
-                        {
-                            res_values[res_pos++] = x;
-                        }
-                    }
-                    res_offsets[i] = res_pos;
-                }
-                for (size_t i = 0; i < column_data->size() - res_pos; ++i)
-                {
-                    res_values.pop_back();
-                }
-                res_ptr = ColumnArray::create(std::move(column_data), std::move(column_offsets));
-                return true;
-            }
+                return false;
 
             const IColumn::Offsets & offsets = array.getOffsets();
             const typename ColumnVector<Element>::Container & data = column->getData();
@@ -138,4 +105,3 @@ namespace DB
     }
 
 }
-
diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md
index a43f975254f..5da4f939713 100644
--- a/docs/en/query_language/functions/array_functions.md
+++ b/docs/en/query_language/functions/array_functions.md
@@ -789,5 +789,22 @@ SELECT arrayReverse([1, 2, 3])
 
 Synonym for ["arrayReverse"](#array_functions-arrayreverse)
 
-
 [Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) <!--hide-->
+
+## arrayCompact(arr) {#array_functions-arraycompact}
+
+Takes an array, returns an array with elements that are different between two adjacent elements.
+
+Example:
+
+```sql
+SELECT arrayCompact([1, 2, 2, 3, 2, 3, 3])
+```
+
+```text
+┌─arrayDistinct([1, 2, 2, 3, 2, 3, 3])─┐
+│ [1,2,3,2,3]                          │
+└──────────────────────────────────────┘
+```
+
+## 
\ No newline at end of file

From 640da3f51268d89d32915a35c22744dd94f26756 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 22 Oct 2019 16:59:13 +0300
Subject: [PATCH 053/222] Try to fix AggregateFunctionGroupBitmap.

---
 dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
index 65a450bfbaf..6479eaf3c1f 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
@@ -90,6 +90,10 @@ public:
     {
         Data & data_lhs = this->data(place);
         const Data & data_rhs = this->data(rhs);
+
+        if (!data_rhs.doneFirst)
+            return;
+
         if (!data_lhs.doneFirst)
         {
             data_lhs.doneFirst = true;

From bcc4c2f0af20fd95529c74ae1bd1af717629ea69 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 22 Oct 2019 17:11:29 +0300
Subject: [PATCH 054/222] Disable processors by default.

---
 dbms/src/Core/Settings.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 8e21ff830f1..30752113a6b 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -360,7 +360,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \
     M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \
     \
-    M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \
+    M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \
     \
     M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \
     M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \
@@ -381,7 +381,7 @@ struct Settings : public SettingsCollection<Settings>
     \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
-    M(SettingBool, allow_experimental_low_cardinality_type, false, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \
+    M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \
     M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13") \
 
     DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)

From 9abab40512e93d6c7f6eb15ba7bf23411f4cdaa7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 23 Oct 2019 06:45:43 +0300
Subject: [PATCH 055/222] Added more comments.

---
 dbms/src/Processors/Pipe.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dbms/src/Processors/Pipe.h b/dbms/src/Processors/Pipe.h
index 55b397c82d6..872f04c339b 100644
--- a/dbms/src/Processors/Pipe.h
+++ b/dbms/src/Processors/Pipe.h
@@ -11,7 +11,11 @@ using Pipes = std::vector<Pipe>;
 class Pipe
 {
 public:
+    /// Create from source. It must have no input ports and single output.
     explicit Pipe(ProcessorPtr source);
+    /// Connect several pipes together with specified transform.
+    /// Transform must have the number of inputs equals to the number of pipes. And single output.
+    /// Will connect pipes outputs with transform inputs automatically.
     Pipe(Pipes && pipes, ProcessorPtr transform);
 
     Pipe(const Pipe & other) = delete;

From 081e9d95544a6d264c52f922946823bf51499094 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Wed, 23 Oct 2019 12:59:57 +0300
Subject: [PATCH 056/222] Fixed links.

---
 docs/en/operations/system_tables.md | 5 +++++
 docs/en/query_language/alter.md     | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md
index 47bbf0266ac..de0c277f100 100644
--- a/docs/en/operations/system_tables.md
+++ b/docs/en/operations/system_tables.md
@@ -757,4 +757,9 @@ If there were problems with mutating some parts, the following columns contain a
 
 **latest_fail_reason** - The exception message that caused the most recent part mutation failure.
 
+
+## system.disks {#system_tables-disks}
+
+## system.storage_policies {#system_tables-storage_policies}
+
 [Original article](https://clickhouse.yandex/docs/en/operations/system_tables/) <!--hide-->
diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md
index 5c1d6331add..b7b37924c71 100644
--- a/docs/en/query_language/alter.md
+++ b/docs/en/query_language/alter.md
@@ -355,6 +355,8 @@ Before downloading, the system checks if the partition exists and the table stru
 
 Although the query is called `ALTER TABLE`, it does not change the table structure and does not immediately change the data available in the table.
 
+#### MOVE PARTITION|PART {#alter_move-partition}
+
 #### How To Set Partition Expression {#alter-how-to-specify-part-expr}
 
 You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:

From 9818eada694ce85ad3837a42e53e284bd08c3c7d Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Thu, 24 Oct 2019 02:18:21 +0300
Subject: [PATCH 057/222] rename: merge_max_block_size

---
 dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +-
 dbms/src/Storages/MergeTree/MergeTreeSettings.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 344210c348c..c44aee7e842 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -683,7 +683,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
     /// If merge is vertical we cannot calculate it
     bool blocks_are_granules_size = (merge_alg == MergeAlgorithm::Vertical);
 
-    UInt64 merge_block_size = data_settings->default_merge_block_size;
+    UInt64 merge_block_size = data_settings->merge_max_block_size;
     switch (data.merging_params.mode)
     {
         case MergeTreeData::MergingParams::Ordinary:
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
index 1e796415254..6db22063841 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h
@@ -29,7 +29,7 @@ struct MergeTreeSettings : public SettingsCollection<MergeTreeSettings>
     M(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \
     \
     /** Merge settings. */ \
-    M(SettingUInt64, default_merge_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.") \
+    M(SettingUInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.") \
     M(SettingUInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).") \
     M(SettingUInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).") \
     M(SettingUInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.") \

From 3767cb76de3aaf57d56502e152f53d167fc7c242 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Fri, 25 Oct 2019 11:25:02 +0800
Subject: [PATCH 058/222] Improve style

---
 .../AggregateFunctionMinMaxAny.h              |  8 ++--
 .../AggregateFunctionQuantile.cpp             | 16 ++++----
 .../src/AggregateFunctions/ReservoirSampler.h |  2 +-
 dbms/src/Functions/FunctionBinaryArithmetic.h |  4 +-
 .../Functions/FunctionsStringSimilarity.cpp   | 38 +++++++++----------
 dbms/src/Functions/Regexps.h                  |  8 ++--
 dbms/src/Functions/array/arrayFill.cpp        |  8 ++--
 dbms/src/Functions/array/arraySort.cpp        |  2 +-
 dbms/src/Functions/array/arraySplit.cpp       | 12 +++---
 dbms/src/Functions/formatString.h             | 14 +++----
 .../registerFunctionsHigherOrder.cpp          | 12 +++---
 11 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index 00869c846d4..db2978db6a0 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -673,15 +673,15 @@ struct AggregateFunctionAnyHeavyData : Data
 };
 
 
-template <typename Data, bool AllocatesMemoryInArena>
-class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>>
+template <typename Data, bool use_arena>
+class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, use_arena>>
 {
 private:
     DataTypePtr & type;
 
 public:
     AggregateFunctionsSingleValue(const DataTypePtr & type_)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>>({type_}, {})
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, use_arena>>({type_}, {})
         , type(this->argument_types[0])
     {
         if (StringRef(Data::name()) == StringRef("min")
@@ -722,7 +722,7 @@ public:
 
     bool allocatesMemoryInArena() const override
     {
-        return AllocatesMemoryInArena;
+        return use_arena;
     }
 
     void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp
index 2439120d169..d96bb82d6f5 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@@ -16,11 +16,11 @@ namespace ErrorCodes
 namespace
 {
 
-template <typename Value, bool FloatReturn> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<FloatReturn, Float64, void>, false>;
-template <typename Value, bool FloatReturn> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<FloatReturn, Float64, void>, true>;
+template <typename Value, bool float_return> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<float_return, Float64, void>, false>;
+template <typename Value, bool float_return> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<float_return, Float64, void>, true>;
 
-template <typename Value, bool FloatReturn> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<FloatReturn, Float64, void>, false>;
-template <typename Value, bool FloatReturn> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<FloatReturn, Float64, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false>;
+template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true>;
 
 template <typename Value, bool _> using FuncQuantileExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantileExact, false, void, false>;
 template <typename Value, bool _> using FuncQuantilesExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantilesExact, false, void, true>;
@@ -40,11 +40,11 @@ template <typename Value, bool _> using FuncQuantilesTiming = AggregateFunctionQ
 template <typename Value, bool _> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTimingWeighted, true, Float32, false>;
 template <typename Value, bool _> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTimingWeighted, true, Float32, true>;
 
-template <typename Value, bool FloatReturn> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<FloatReturn, Float32, void>, false>;
-template <typename Value, bool FloatReturn> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<FloatReturn, Float32, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<float_return, Float32, void>, false>;
+template <typename Value, bool float_return> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<float_return, Float32, void>, true>;
 
-template <typename Value, bool FloatReturn> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<FloatReturn, Float32, void>, false>;
-template <typename Value, bool FloatReturn> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<FloatReturn, Float32, void>, true>;
+template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
+template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;
 
 
 template <template <typename, bool> class Function>
diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h
index 30d72709ac2..369065d9095 100644
--- a/dbms/src/AggregateFunctions/ReservoirSampler.h
+++ b/dbms/src/AggregateFunctions/ReservoirSampler.h
@@ -31,7 +31,7 @@ namespace ReservoirSamplerOnEmpty
     };
 }
 
-template <typename ResultType, bool IsFloatingPoint>
+template <typename ResultType, bool is_float>
 struct NanLikeValueConstructor
 {
     static ResultType getValue()
diff --git a/dbms/src/Functions/FunctionBinaryArithmetic.h b/dbms/src/Functions/FunctionBinaryArithmetic.h
index afe65a8adad..407c096a9e5 100644
--- a/dbms/src/Functions/FunctionBinaryArithmetic.h
+++ b/dbms/src/Functions/FunctionBinaryArithmetic.h
@@ -438,7 +438,7 @@ public:
 };
 
 
-template <template <typename, typename> class Op, typename Name, bool CanBeExecutedOnDefaultArguments = true>
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
 class FunctionBinaryArithmetic : public IFunction
 {
     const Context & context;
@@ -944,7 +944,7 @@ public:
     }
 #endif
 
-    bool canBeExecutedOnDefaultArguments() const override { return CanBeExecutedOnDefaultArguments; }
+    bool canBeExecutedOnDefaultArguments() const override { return valid_on_default_arguments; }
 };
 
 }
diff --git a/dbms/src/Functions/FunctionsStringSimilarity.cpp b/dbms/src/Functions/FunctionsStringSimilarity.cpp
index d5632b136e4..9dda521cd29 100644
--- a/dbms/src/Functions/FunctionsStringSimilarity.cpp
+++ b/dbms/src/Functions/FunctionsStringSimilarity.cpp
@@ -32,7 +32,7 @@ namespace DB
   * calculation. If the right string size is big (more than 2**15 bytes),
   * the strings are not similar at all and we return 1.
   */
-template <size_t N, class CodePoint, bool UTF8, bool CaseInsensitive, bool Symmetric>
+template <size_t N, class CodePoint, bool UTF8, bool case_insensitive, bool symmetric>
 struct NgramDistanceImpl
 {
     using ResultType = Float32;
@@ -93,7 +93,7 @@ struct NgramDistanceImpl
         /// Such copying allow us to have 3 codepoints from the previous read to produce the 4-grams with them.
         memcpy(code_points + (N - 1), pos, default_padding * sizeof(CodePoint));
 
-        if constexpr (CaseInsensitive)
+        if constexpr (case_insensitive)
         {
             /// We really need template lambdas with C++20 to do it inline
             unrollLowering<N - 1>(code_points, std::make_index_sequence<padding_offset>());
@@ -141,7 +141,7 @@ struct NgramDistanceImpl
             /// And first bit of first byte if there are two bytes.
             /// For ASCII it works https://catonmat.net/ascii-case-conversion-trick. For most cyrrilic letters also does.
             /// For others, we don't care now. Lowering UTF is not a cheap operation.
-            if constexpr (CaseInsensitive)
+            if constexpr (case_insensitive)
             {
                 switch (length)
                 {
@@ -166,7 +166,7 @@ struct NgramDistanceImpl
         return num;
     }
 
-    template <bool SaveNgrams>
+    template <bool save_ngrams>
     static ALWAYS_INLINE inline size_t calculateNeedleStats(
         const char * data,
         const size_t size,
@@ -189,7 +189,7 @@ struct NgramDistanceImpl
             {
                 ++len;
                 UInt16 hash = hash_functor(cp + i);
-                if constexpr (SaveNgrams)
+                if constexpr (save_ngrams)
                     *ngram_storage++ = hash;
                 ++ngram_stats[hash];
             }
@@ -199,7 +199,7 @@ struct NgramDistanceImpl
         return len;
     }
 
-    template <bool ReuseStats>
+    template <bool reuse_stats>
     static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
         const char * data,
         const size_t size,
@@ -227,9 +227,9 @@ struct NgramDistanceImpl
                 /// For symmetric version we should add when we can't subtract to get symmetric difference.
                 if (static_cast<Int16>(ngram_stats[hash]) > 0)
                     --distance;
-                else if constexpr (Symmetric)
+                else if constexpr (symmetric)
                     ++distance;
-                if constexpr (ReuseStats)
+                if constexpr (reuse_stats)
                     ngram_storage[ngram_cnt] = hash;
                 ++ngram_cnt;
                 --ngram_stats[hash];
@@ -238,7 +238,7 @@ struct NgramDistanceImpl
         } while (start < end && (found = read_code_points(cp, start, end)));
 
         /// Return the state of hash map to its initial.
-        if constexpr (ReuseStats)
+        if constexpr (reuse_stats)
         {
             for (size_t i = 0; i < ngram_cnt; ++i)
                 ++ngram_stats[ngram_storage[i]];
@@ -270,15 +270,15 @@ struct NgramDistanceImpl
         if (data_size <= max_string_size)
         {
             size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats, distance, nullptr);
-            /// For !Symmetric version we should not use first_size.
-            if constexpr (Symmetric)
+            /// For !symmetric version we should not use first_size.
+            if constexpr (symmetric)
                 res = distance * 1.f / std::max(first_size + second_size, size_t(1));
             else
                 res = 1.f - distance * 1.f / std::max(second_size, size_t(1));
         }
         else
         {
-            if constexpr (Symmetric)
+            if constexpr (symmetric)
                 res = 1.f;
             else
                 res = 0.f;
@@ -338,8 +338,8 @@ struct NgramDistanceImpl
                 /// For now, common stats is a zero array.
 
 
-                /// For !Symmetric version we should not use haystack_stats_size.
-                if constexpr (Symmetric)
+                /// For !symmetric version we should not use haystack_stats_size.
+                if constexpr (symmetric)
                     res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
                 else
                     res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1));
@@ -348,7 +348,7 @@ struct NgramDistanceImpl
             {
                 /// Strings are too big, we are assuming they are not the same. This is done because of limiting number
                 /// of bigrams added and not allocating too much memory.
-                if constexpr (Symmetric)
+                if constexpr (symmetric)
                     res[i] = 1.f;
                 else
                     res[i] = 0.f;
@@ -366,7 +366,7 @@ struct NgramDistanceImpl
         PaddedPODArray<Float32> & res)
     {
         /// For symmetric version it is better to use vector_constant
-        if constexpr (Symmetric)
+        if constexpr (symmetric)
         {
             vector_constant(needle_data, needle_offsets, std::move(haystack), res);
         }
@@ -457,8 +457,8 @@ struct NgramDistanceImpl
                     haystack_size, common_stats,
                     distance,
                     ngram_storage.get());
-                /// For !Symmetric version we should not use haystack_stats_size.
-                if constexpr (Symmetric)
+                /// For !symmetric version we should not use haystack_stats_size.
+                if constexpr (symmetric)
                     res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
                 else
                     res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1));
@@ -466,7 +466,7 @@ struct NgramDistanceImpl
             else
             {
                 /// if the strings are too big, we say they are completely not the same
-                if constexpr (Symmetric)
+                if constexpr (symmetric)
                     res[i] = 1.f;
                 else
                     res[i] = 0.f;
diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h
index e7fec8027fb..9a8366fb543 100644
--- a/dbms/src/Functions/Regexps.h
+++ b/dbms/src/Functions/Regexps.h
@@ -114,7 +114,7 @@ namespace MultiRegexps
         std::map<std::pair<std::vector<String>, std::optional<UInt32>>, Regexps> storage;
     };
 
-    template <bool SaveIndices, bool CompileForEditDistance>
+    template <bool save_indices, bool CompileForEditDistance>
     inline Regexps constructRegexps(const std::vector<String> & str_patterns, std::optional<UInt32> edit_distance)
     {
         (void)edit_distance;
@@ -165,7 +165,7 @@ namespace MultiRegexps
         std::unique_ptr<unsigned int[]> ids;
 
         /// We mark the patterns to provide the callback results.
-        if constexpr (SaveIndices)
+        if constexpr (save_indices)
         {
             ids.reset(new unsigned int[patterns.size()]);
             for (size_t i = 0; i < patterns.size(); ++i)
@@ -226,7 +226,7 @@ namespace MultiRegexps
     /// Also, we use templates here because each instantiation of function
     /// template has its own copy of local static variables which must not be the same
     /// for different hyperscan compilations.
-    template <bool SaveIndices, bool CompileForEditDistance>
+    template <bool save_indices, bool CompileForEditDistance>
     inline Regexps * get(const std::vector<StringRef> & patterns, std::optional<UInt32> edit_distance)
     {
         /// C++11 has thread-safe function-local statics on most modern compilers.
@@ -247,7 +247,7 @@ namespace MultiRegexps
             it = known_regexps.storage
                      .emplace(
                          std::pair{str_patterns, edit_distance},
-                         constructRegexps<SaveIndices, CompileForEditDistance>(str_patterns, edit_distance))
+                         constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance))
                      .first;
         /// If found, unlock and return the database.
         lock.unlock();
diff --git a/dbms/src/Functions/array/arrayFill.cpp b/dbms/src/Functions/array/arrayFill.cpp
index 71a7bcddbc4..544cd0a8849 100644
--- a/dbms/src/Functions/array/arrayFill.cpp
+++ b/dbms/src/Functions/array/arrayFill.cpp
@@ -7,7 +7,7 @@
 namespace DB
 {
 
-template <bool Reverse>
+template <bool reverse>
 struct ArrayFillImpl
 {
     static bool needBoolean() { return true; }
@@ -51,7 +51,7 @@ struct ArrayFillImpl
                             out_data.insertRangeFrom(in_data, begin, end + 1 - begin);
                         else
                         {
-                            if constexpr (Reverse)
+                            if constexpr (reverse)
                             {
                                 if (end == array_end)
                                     out_data.insertManyFrom(in_data, array_end, end + 1 - begin);
@@ -96,7 +96,7 @@ struct ArrayFillImpl
             {
                 array_end = in_offsets[i] - 1;
 
-                if constexpr (Reverse)
+                if constexpr (reverse)
                     out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin);
                 else
                     out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin);
@@ -117,7 +117,7 @@ struct NameArrayReverseFill { static constexpr auto name = "arrayReverseFill"; }
 using FunctionArrayFill = FunctionArrayMapped<ArrayFillImpl<false>, NameArrayFill>;
 using FunctionArrayReverseFill = FunctionArrayMapped<ArrayFillImpl<true>, NameArrayReverseFill>;
 
-void registerFunctionArrayFill(FunctionFactory & factory)
+void registerFunctionsArrayFill(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionArrayFill>();
     factory.registerFunction<FunctionArrayReverseFill>();
diff --git a/dbms/src/Functions/array/arraySort.cpp b/dbms/src/Functions/array/arraySort.cpp
index 35f03cd10ca..17a711e8902 100644
--- a/dbms/src/Functions/array/arraySort.cpp
+++ b/dbms/src/Functions/array/arraySort.cpp
@@ -63,7 +63,7 @@ struct NameArrayReverseSort { static constexpr auto name = "arrayReverseSort"; }
 using FunctionArraySort = FunctionArrayMapped<ArraySortImpl<true>, NameArraySort>;
 using FunctionArrayReverseSort = FunctionArrayMapped<ArraySortImpl<false>, NameArrayReverseSort>;
 
-void registerFunctionArraySort(FunctionFactory & factory)
+void registerFunctionsArraySort(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionArraySort>();
     factory.registerFunction<FunctionArrayReverseSort>();
diff --git a/dbms/src/Functions/array/arraySplit.cpp b/dbms/src/Functions/array/arraySplit.cpp
index dcb0c73e8a4..19bc599f856 100644
--- a/dbms/src/Functions/array/arraySplit.cpp
+++ b/dbms/src/Functions/array/arraySplit.cpp
@@ -7,7 +7,7 @@
 namespace DB
 {
 
-template <bool Reverse>
+template <bool reverse>
 struct ArraySplitImpl
 {
     static bool needBoolean() { return true; }
@@ -42,13 +42,13 @@ struct ArraySplitImpl
 
             for (size_t i = 0; i < in_offsets.size(); ++i)
             {
-                pos += !Reverse;
-                for (; pos < in_offsets[i] - Reverse; ++pos)
+                pos += !reverse;
+                for (; pos < in_offsets[i] - reverse; ++pos)
                 {
                     if (cut[pos])
-                        out_offsets_2.push_back(pos + Reverse);
+                        out_offsets_2.push_back(pos + reverse);
                 }
-                pos += Reverse;
+                pos += reverse;
 
                 out_offsets_2.push_back(pos);
                 out_offsets_1.push_back(out_offsets_2.size());
@@ -99,7 +99,7 @@ struct NameArrayReverseSplit { static constexpr auto name = "arrayReverseSplit";
 using FunctionArraySplit = FunctionArrayMapped<ArraySplitImpl<false>, NameArraySplit>;
 using FunctionArrayReverseSplit = FunctionArrayMapped<ArraySplitImpl<true>, NameArrayReverseSplit>;
 
-void registerFunctionArraySplit(FunctionFactory & factory)
+void registerFunctionsArraySplit(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionArraySplit>();
     factory.registerFunction<FunctionArrayReverseSplit>();
diff --git a/dbms/src/Functions/formatString.h b/dbms/src/Functions/formatString.h
index 10363c8a8eb..1df8d090d22 100644
--- a/dbms/src/Functions/formatString.h
+++ b/dbms/src/Functions/formatString.h
@@ -200,12 +200,12 @@ struct FormatImpl
     /// res_data is result_data, res_offsets is offset result.
     /// input_rows_count is the number of rows processed.
     /// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
-    template <bool HasColumnString, bool HasColumnFixedString>
+    template <bool has_column_string, bool has_column_fixed_string>
     static inline void format(
         String pattern,
         const std::vector<const ColumnString::Chars *> & data,
         const std::vector<const ColumnString::Offsets *> & offsets,
-        [[maybe_unused]] /* Because sometimes !HasColumnFixedString */ const std::vector<size_t> & fixed_string_N,
+        [[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector<size_t> & fixed_string_N,
         const std::vector<String> & constant_strings,
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets,
@@ -265,7 +265,7 @@ struct FormatImpl
             memcpySmallAllowReadWriteOverflow15(res_data.data() + offset, substrings[0].data(), substrings[0].size());
             offset += substrings[0].size();
             /// All strings are constant, we should have substrings.size() == 1.
-            if constexpr (HasColumnString || HasColumnFixedString)
+            if constexpr (has_column_string || has_column_fixed_string)
             {
                 for (size_t j = 1; j < substrings.size(); ++j)
                 {
@@ -274,18 +274,18 @@ struct FormatImpl
                     UInt64 arg_offset = 0;
                     UInt64 size = 0;
 
-                    if constexpr (HasColumnString)
+                    if constexpr (has_column_string)
                     {
-                        if (!HasColumnFixedString || offset_ptr)
+                        if (!has_column_fixed_string || offset_ptr)
                         {
                             arg_offset = (*offset_ptr)[i - 1];
                             size = (*offset_ptr)[i] - arg_offset - 1;
                         }
                     }
 
-                    if constexpr (HasColumnFixedString)
+                    if constexpr (has_column_fixed_string)
                     {
-                        if (!HasColumnString || !offset_ptr)
+                        if (!has_column_string || !offset_ptr)
                         {
                             arg_offset = fixed_string_N[arg] * i;
                             size = fixed_string_N[arg];
diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
index 8511c0c412c..44a35053683 100644
--- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp
+++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp
@@ -11,9 +11,9 @@ void registerFunctionArrayAll(FunctionFactory &);
 void registerFunctionArraySum(FunctionFactory &);
 void registerFunctionArrayFirst(FunctionFactory &);
 void registerFunctionArrayFirstIndex(FunctionFactory &);
-void registerFunctionArrayFill(FunctionFactory &);
-void registerFunctionArraySplit(FunctionFactory &);
-void registerFunctionArraySort(FunctionFactory &);
+void registerFunctionsArrayFill(FunctionFactory &);
+void registerFunctionsArraySplit(FunctionFactory &);
+void registerFunctionsArraySort(FunctionFactory &);
 void registerFunctionArrayCumSum(FunctionFactory &);
 void registerFunctionArrayCumSumNonNegative(FunctionFactory &);
 void registerFunctionArrayDifference(FunctionFactory &);
@@ -28,9 +28,9 @@ void registerFunctionsHigherOrder(FunctionFactory & factory)
     registerFunctionArraySum(factory);
     registerFunctionArrayFirst(factory);
     registerFunctionArrayFirstIndex(factory);
-    registerFunctionArrayFill(factory);
-    registerFunctionArraySplit(factory);
-    registerFunctionArraySort(factory);
+    registerFunctionsArrayFill(factory);
+    registerFunctionsArraySplit(factory);
+    registerFunctionsArraySort(factory);
     registerFunctionArrayCumSum(factory);
     registerFunctionArrayCumSumNonNegative(factory);
     registerFunctionArrayDifference(factory);

From 7db76a50f16eebb04815210633dc3b5e5252f4c4 Mon Sep 17 00:00:00 2001
From: Sergei Bocharov <Sergei-b84@bk.ru>
Date: Fri, 25 Oct 2019 17:15:35 +0300
Subject: [PATCH 059/222] Fixes

---
 .../query_language/agg_functions/reference.md | 47 ++++++++++---------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md
index 31810390fe1..a1a4dbe0fac 100644
--- a/docs/en/query_language/agg_functions/reference.md
+++ b/docs/en/query_language/agg_functions/reference.md
@@ -1069,39 +1069,40 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
 To predict we use function [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on.
 
 <a name="stochasticlinearregression-usage-fitting"></a>
-1. Fitting
 
-    Such query may be used.
+**1.** Fitting
 
-    ```sql
-    CREATE TABLE IF NOT EXISTS train_data
-    (
-        param1 Float64,
-        param2 Float64,
-        target Float64
-    ) ENGINE = Memory;
+Such query may be used.
 
-    CREATE TABLE your_model ENGINE = Memory AS SELECT
-    stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2)
-    AS state FROM train_data;
+```sql
+CREATE TABLE IF NOT EXISTS train_data
+(
+    param1 Float64,
+    param2 Float64,
+    target Float64
+) ENGINE = Memory;
 
-    ```
+CREATE TABLE your_model ENGINE = Memory AS SELECT
+stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2)
+AS state FROM train_data;
 
-    Here we also need to insert data into `train_data` table. The number of parameters is not fixed, it depends only on number of arguments, passed into `linearRegressionState`. They all must be numeric values.
-    Note that the column with target value(which we would like to learn to predict) is inserted as the first argument.
+```
 
-2. Predicting
+Here we also need to insert data into `train_data` table. The number of parameters is not fixed, it depends only on number of arguments, passed into `linearRegressionState`. They all must be numeric values.
+Note that the column with target value(which we would like to learn to predict) is inserted as the first argument.
 
-    After saving a state into the table, we may use it multiple times for prediction, or even merge with other states and create new even better models.
+**2.** Predicting
 
-    ```sql
-    WITH (SELECT state FROM your_model) AS model SELECT
-    evalMLMethod(model, param1, param2) FROM test_data
-    ```
+After saving a state into the table, we may use it multiple times for prediction, or even merge with other states and create new even better models.
 
-    The query will return a column of predicted values. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features.
+```sql
+WITH (SELECT state FROM your_model) AS model SELECT
+evalMLMethod(model, param1, param2) FROM test_data
+```
 
-    `test_data` is a table like `train_data` but may not contain target value.
+The query will return a column of predicted values. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features.
+
+`test_data` is a table like `train_data` but may not contain target value.
 
 ### Notes {#agg_functions-stochasticlinearregression-notes}
 

From f9dc1361d16d35b2438ba417b74deba8398af682 Mon Sep 17 00:00:00 2001
From: Sergei Bocharov <Sergei-b84@bk.ru>
Date: Fri, 25 Oct 2019 17:18:19 +0300
Subject: [PATCH 060/222] Fixes

---
 .../query_language/agg_functions/reference.md | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md
index 7ef6ef5f500..758d762e26b 100644
--- a/docs/ru/query_language/agg_functions/reference.md
+++ b/docs/ru/query_language/agg_functions/reference.md
@@ -1087,38 +1087,39 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
 Для прогнозирования мы используем функцию [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod), которая принимает в качестве аргументов состояние и свойства для прогнозирования.
 
 <a name="stochasticlinearregression-usage-fitting"></a>
-1. Построение модели
 
-    Пример запроса:
+**1.** Построение модели
 
-    ```sql
-    CREATE TABLE IF NOT EXISTS train_data
-    (
-        param1 Float64,
-        param2 Float64,
-        target Float64
-    ) ENGINE = Memory;
+Пример запроса:
 
-    CREATE TABLE your_model ENGINE = Memory AS SELECT
-    stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2)
-    AS state FROM train_data;
-    ```
+```sql
+CREATE TABLE IF NOT EXISTS train_data
+(
+    param1 Float64,
+    param2 Float64,
+    target Float64
+) ENGINE = Memory;
 
-    Здесь нам также нужно вставить данные в таблицу `train_data`. Количество параметров не фиксировано, оно зависит только от количества аргументов, перешедших в `linearRegressionState`. Все они должны быть числовыми значениями.
+CREATE TABLE your_model ENGINE = Memory AS SELECT
+stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2)
+AS state FROM train_data;
+```
+
+Здесь нам также нужно вставить данные в таблицу `train_data`. Количество параметров не фиксировано, оно зависит только от количества аргументов, перешедших в `linearRegressionState`. Все они должны быть числовыми значениями.
 Обратите внимание, что столбец с целевым значением (которое мы хотели бы научиться предсказывать) вставляется в качестве первого аргумента.
 
-2. Прогнозирование
+**2.** Прогнозирование
 
-    После сохранения состояния в таблице мы можем использовать его несколько раз для прогнозирования или смёржить с другими состояниями и создать новые, улучшенные модели.
+После сохранения состояния в таблице мы можем использовать его несколько раз для прогнозирования или смёржить с другими состояниями и создать новые, улучшенные модели.
 
-    ```sql
-    WITH (SELECT state FROM your_model) AS model SELECT
-    evalMLMethod(model, param1, param2) FROM test_data
-    ```
+```sql
+WITH (SELECT state FROM your_model) AS model SELECT
+evalMLMethod(model, param1, param2) FROM test_data
+```
 
-    Запрос возвращает столбец прогнозируемых значений. Обратите внимание, что первый аргумент `evalMLMethod` это объект `AggregateFunctionState`, далее идут столбцы свойств.
+Запрос возвращает столбец прогнозируемых значений. Обратите внимание, что первый аргумент `evalMLMethod` это объект `AggregateFunctionState`, далее идут столбцы свойств.
 
-    `test_data` — это таблица, подобная `train_data`, но при этом может не содержать целевое значение.
+`test_data` — это таблица, подобная `train_data`, но при этом может не содержать целевое значение.
 
 ### Примечания {#agg_functions-stochasticlinearregression-notes}
 

From 01d384e6bdf09acd1f1b4eda156339d576f7057d Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Sat, 26 Oct 2019 21:38:05 +0800
Subject: [PATCH 061/222] Add docs

---
 .../functions/higher_order_functions.md       | 80 ++++++++++++++++++-
 1 file changed, 78 insertions(+), 2 deletions(-)

diff --git a/docs/en/query_language/functions/higher_order_functions.md b/docs/en/query_language/functions/higher_order_functions.md
index c114f912980..fdf39b4403f 100644
--- a/docs/en/query_language/functions/higher_order_functions.md
+++ b/docs/en/query_language/functions/higher_order_functions.md
@@ -16,12 +16,16 @@ A lambda function can't be omitted for the following functions:
 
 - [arrayMap](#higher_order_functions-array-map)
 - [arrayFilter](#higher_order_functions-array-filter)
+- [arrayFill](#higher_order_functions-array-fill)
+- [arrayReverseFill](#higher_order_functions-array-reverse-fill)
+- [arraySplit](#higher_order_functions-array-split)
+- [arrayReverseSplit](#higher_order_functions-array-reverse-split)
 - [arrayFirst](#higher_order_functions-array-first)
 - [arrayFirstIndex](#higher_order_functions-array-first-index)
 
 ### arrayMap(func, arr1, ...) {#higher_order_functions-array-map}
 
-Returns an array obtained from the original application of the `func` function to each element in the `arr` array. 
+Returns an array obtained from the original application of the `func` function to each element in the `arr` array.
 
 Examples:
 
@@ -79,6 +83,78 @@ SELECT
 
 Note that the first argument (lambda function) can't be omitted in the `arrayFilter` function.
 
+### arrayFill(func, arr1, ...) {#higher_order_functions-array-fill}
+
+Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced.
+
+Examples:
+
+```sql
+SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
+```
+
+```text
+┌─res──────────────────────────────┐
+│ [1,1,3,11,12,12,12,5,6,14,14,14] │
+└──────────────────────────────────┘
+```
+
+Note that the first argument (lambda function) can't be omitted in the `arrayFill` function.
+
+### arrayReverseFill(func, arr1, ...) {#higher_order_functions-array-reverse-fill}
+
+Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced.
+
+Examples:
+
+```sql
+SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
+```
+
+```text
+┌─res────────────────────────────────┐
+│ [1,3,3,11,12,5,5,5,6,14,NULL,NULL] │
+└────────────────────────────────────┘
+```
+
+Note that the first argument (lambda function) can't be omitted in the `arrayReverseFill` function.
+
+### arraySplit(func, arr1, ...) {#higher_order_functions-array-split}
+
+Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element.
+
+Examples:
+
+```sql
+SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
+```
+
+```text
+┌─res─────────────┐
+│ [[1,2,3],[4,5]] │
+└─────────────────┘
+```
+
+Note that the first argument (lambda function) can't be omitted in the `arraySplit` function.
+
+### arrayReverseSplit(func, arr1, ...) {#higher_order_functions-array-reverse-split}
+
+Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element.
+
+Examples:
+
+```sql
+SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
+```
+
+```text
+┌─res───────────────┐
+│ [[1],[2,3,4],[5]] │
+└───────────────────┘
+```
+
+Note that the first argument (lambda function) can't be omitted in the `arraySplit` function.
+
 ### arrayCount(\[func,\] arr1, ...) {#higher_order_functions-array-count}
 
 Returns the number of elements in the arr array for which func returns something other than 0. If 'func' is not specified, it returns the number of non-zero elements in the array.
@@ -139,7 +215,7 @@ SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res
 
 ### arraySort(\[func,\] arr1, ...)
 
-Returns an array as result of sorting the elements of `arr1` in ascending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays)  
+Returns an array as result of sorting the elements of `arr1` in ascending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays)
 
 The [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used to improve sorting efficiency.
 

From 538b77c12309b38cd241ccee84fa0974213b0ae7 Mon Sep 17 00:00:00 2001
From: sundy-li <543950155@qq.com>
Date: Fri, 25 Oct 2019 18:43:52 +0800
Subject: [PATCH 062/222] functions: add char function as mysql behavior

---
 dbms/src/Functions/FunctionsCoding.cpp        |  1 +
 dbms/src/Functions/FunctionsCoding.h          | 91 +++++++++++++++++++
 .../0_stateless/00076_ip_coding_functions.sql |  2 +-
 .../0_stateless/01020_function_char.reference |  2 +
 .../0_stateless/01020_function_char.sql       |  3 +
 .../functions/encoding_functions.md           |  3 +
 6 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 dbms/tests/queries/0_stateless/01020_function_char.reference
 create mode 100644 dbms/tests/queries/0_stateless/01020_function_char.sql

diff --git a/dbms/src/Functions/FunctionsCoding.cpp b/dbms/src/Functions/FunctionsCoding.cpp
index e168b8b61e4..934dfd150f9 100644
--- a/dbms/src/Functions/FunctionsCoding.cpp
+++ b/dbms/src/Functions/FunctionsCoding.cpp
@@ -25,6 +25,7 @@ void registerFunctionsCoding(FunctionFactory & factory)
     factory.registerFunction<FunctionUUIDStringToNum>();
     factory.registerFunction<FunctionHex>();
     factory.registerFunction<FunctionUnhex>();
+    factory.registerFunction<FunctionChar>(FunctionFactory::CaseInsensitive);
     factory.registerFunction<FunctionBitmaskToArray>();
     factory.registerFunction<FunctionToIPv4>();
     factory.registerFunction<FunctionToIPv6>();
diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index 1ab00d725f6..23249621230 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -1276,6 +1276,97 @@ public:
     }
 };
 
+class FunctionChar : public IFunction
+{
+public:
+    static constexpr auto name = "char";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionChar>(); }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    bool isVariadic() const override { return true; }
+    bool isInjective(const Block &) override { return true; }
+    size_t getNumberOfArguments() const override { return 0; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.empty())
+            throw Exception("Number of arguments for function " + getName() + " can't be " + toString(arguments.size())
+                    + ", should be at least 1", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        for (const auto & arg : arguments)
+        {
+            WhichDataType which(arg);
+            if (!(which.isInt() || which.isUInt() || which.isFloat()))
+                throw Exception("Illegal type " + arg->getName() + " of argument of function " + getName()
+                    + ", must be Int, UInt or Float number",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        return std::make_shared<DataTypeString>();
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
+    {
+        const size_t rows = block.getByPosition(arguments[0]).column.get()->size();
+        auto col_str = ColumnString::create();
+        ColumnString::Chars & out_vec = col_str->getChars();
+        ColumnString::Offsets & out_offsets = col_str->getOffsets();
+
+        const auto size_per_row = arguments.size() + 1;
+        out_vec.resize(size_per_row * rows);
+        out_offsets.resize(rows);
+
+        for (size_t row = 0; row < rows; ++row)
+        {
+            out_offsets[row] = size_per_row + out_offsets[row - 1];
+            out_vec[row * size_per_row + size_per_row - 1] = '\0';
+        }
+
+        for (const size_t & column_idx : arguments)
+        {
+            const IColumn * column = block.getByPosition(column_idx).column.get();
+            if (!(executeNumber<UInt8>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<UInt16>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<UInt32>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<UInt64>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<Int8>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<Int16>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<Int32>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<Int64>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<Float32>(*column, out_vec, column_idx, rows, size_per_row)
+                || executeNumber<Float64>(*column, out_vec, column_idx, rows, size_per_row)))
+            {
+                throw Exception{"Illegal column " + block.getByPosition(column_idx).column->getName()
+                                + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+            }
+        }
+
+        block.getByPosition(result).column = std::move(col_str);
+    }
+
+private:
+    template <typename T>
+    bool executeNumber(const IColumn & src_data, ColumnString::Chars & out_vec, const size_t & column_idx, const size_t & rows, const size_t & size_per_row)
+    {
+        const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
+
+        if (!src_data_concrete)
+        {
+            return false;
+        }
+
+        for (size_t row = 0; row < rows; ++row)
+        {
+            out_vec[row * size_per_row + column_idx] = static_cast<char>(src_data_concrete->getInt(row));
+        }
+        return true;
+    }
+};
 
 class FunctionBitmaskToArray : public IFunction
 {
diff --git a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql
index 659267c61ed..a2820b56ba6 100644
--- a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql
+++ b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql
@@ -125,4 +125,4 @@ CREATE TABLE addresses(addr String) ENGINE = Memory;
 INSERT INTO addresses(addr) VALUES ('00000000000000000000FFFFC1FC110A'), ('00000000000000000000FFFF4D583737'), ('00000000000000000000FFFF7F000001');
 SELECT cutIPv6(toFixedString(unhex(addr), 16), 0, 3) FROM addresses ORDER BY addr ASC;
 
-DROP TABLE addresses;
+DROP TABLE addresses;
\ No newline at end of file
diff --git a/dbms/tests/queries/0_stateless/01020_function_char.reference b/dbms/tests/queries/0_stateless/01020_function_char.reference
new file mode 100644
index 00000000000..9f055b585d4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01020_function_char.reference
@@ -0,0 +1,2 @@
+ABCDabcdefg
+ABC
diff --git a/dbms/tests/queries/0_stateless/01020_function_char.sql b/dbms/tests/queries/0_stateless/01020_function_char.sql
new file mode 100644
index 00000000000..ce099aa17a4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01020_function_char.sql
@@ -0,0 +1,3 @@
+/* char function */
+SELECT char(65, 66.1, 67.2, 68.3, 97.4, 98.5, 99.6, 100.7, 101.0, 102.0, 103.0);
+SELECT char(65 + 256, 66 + 1024, 66 + 1024 + 1);
diff --git a/docs/en/query_language/functions/encoding_functions.md b/docs/en/query_language/functions/encoding_functions.md
index 74ef53f82f7..9f7413795d8 100644
--- a/docs/en/query_language/functions/encoding_functions.md
+++ b/docs/en/query_language/functions/encoding_functions.md
@@ -1,5 +1,8 @@
 # Encoding functions
 
+## char
+Accepts multiple arguments of `Number` types. Returns a string, each char of the results corresponds to the ascii character of the input numbers. It'll cast the first byte from the number, if the byte overflows the range of ascii(which is 127), it returns an unrecognized character(�).
+
 ## hex
 
 Accepts arguments of types: `String`, `unsigned integer`, `Date`, or `DateTime`. Returns a string containing the argument's hexadecimal representation. Uses uppercase letters `A-F`. Does not use `0x` prefixes or `h` suffixes. For strings, all bytes are simply encoded as two hexadecimal numbers. Numbers are converted to big endian ("human readable") format. For numbers, older zeros are trimmed, but only by entire bytes. For example, `hex (1) = '01'`. `Date` is encoded as the number of days since the beginning of the Unix epoch. `DateTime` is encoded as the number of seconds since the beginning of the Unix epoch.

From 90333ac9aaf37b214154cbcfef21037b034dfa79 Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Mon, 28 Oct 2019 11:15:52 +0800
Subject: [PATCH 063/222] modified getReturnType

---
 dbms/src/Functions/array/arrayCompact.cpp | 50 ++++++++++++-----------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index bd3d8fa303e..09a0dce3d75 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -22,31 +22,33 @@ namespace DB
         {
             WhichDataType which(expression_return);
 
-            if (which.isNativeUInt())
-                return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
+            if (which.isUInt8()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>()); }
+            else if (which.isUInt16()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt16>()); }
+            else if (which.isUInt32()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt32>()); }
+            else if (which.isUInt64()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()); }
+            else if (which.isInt8()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt8>()); }
+            else if (which.isInt16()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt16>()); }
+            else if (which.isInt32()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>()); }
+            else if (which.isInt64()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>()); }
+            else if (which.isFloat32()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat32>()); }
+            else if (which.isFloat64()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>()); }
 
-            if (which.isNativeInt())
-                return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>());
-
-            if (which.isFloat())
-                return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
 
             throw Exception("arrayCompact cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         }
 
-
-        template <typename Element, typename Result>
+        template <typename T>
         static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
         {
-            const ColumnVector<Element> * column = checkAndGetColumn<ColumnVector<Element>>(&*mapped);
+            const ColumnVector<T> * column = checkAndGetColumn<ColumnVector<T>>(&*mapped);
 
             if (!column)
                 return false;
 
             const IColumn::Offsets & offsets = array.getOffsets();
-            const typename ColumnVector<Element>::Container & data = column->getData();
-            auto column_data = ColumnVector<Result>::create(data.size());
-            typename ColumnVector<Result>::Container & res_values = column_data->getData();
+            const typename ColumnVector<T>::Container & data = column->getData();
+            auto column_data = ColumnVector<T>::create(data.size());
+            typename ColumnVector<T>::Container & res_values = column_data->getData();
             auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size());
             IColumn::Offsets & res_offsets = column_offsets->getData();
 
@@ -79,16 +81,16 @@ namespace DB
         {
             ColumnPtr res;
 
-            if (executeType< UInt8 , UInt64>(mapped, array, res) ||
-                executeType< UInt16, UInt64>(mapped, array, res) ||
-                executeType< UInt32, UInt64>(mapped, array, res) ||
-                executeType< UInt64, UInt64>(mapped, array, res) ||
-                executeType<  Int8 ,  Int64>(mapped, array, res) ||
-                executeType<  Int16,  Int64>(mapped, array, res) ||
-                executeType<  Int32,  Int64>(mapped, array, res) ||
-                executeType<  Int64,  Int64>(mapped, array, res) ||
-                executeType<Float32,Float64>(mapped, array, res) ||
-                executeType<Float64,Float64>(mapped, array, res))
+            if (executeType< UInt8 >(mapped, array, res) ||
+                executeType< UInt16>(mapped, array, res) ||
+                executeType< UInt32>(mapped, array, res) ||
+                executeType< UInt64>(mapped, array, res) ||
+                executeType< Int8  >(mapped, array, res) ||
+                executeType< Int16 >(mapped, array, res) ||
+                executeType< Int32 >(mapped, array, res) ||
+                executeType< Int64 >(mapped, array, res) ||
+                executeType<Float32>(mapped, array, res) ||
+                executeType<Float64>(mapped, array, res))
                 return res;
             else
                 throw Exception("Unexpected column for arrayCompact: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
@@ -104,4 +106,4 @@ namespace DB
         factory.registerFunction<FunctionArrayCompact>();
     }
 
-}
+}
\ No newline at end of file

From f4d93785cf6961e4d3e92b515b92fe9ba914a5a0 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Mon, 28 Oct 2019 02:17:50 +0800
Subject: [PATCH 064/222] add easy diff for document tracking

---
 docs/tools/easy_diff.py     | 64 +++++++++++++++++++++++++++++++++++++
 docs/tools/requirements.txt |  1 +
 2 files changed, 65 insertions(+)
 create mode 100755 docs/tools/easy_diff.py

diff --git a/docs/tools/easy_diff.py b/docs/tools/easy_diff.py
new file mode 100755
index 00000000000..d8a596f36ba
--- /dev/null
+++ b/docs/tools/easy_diff.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os, sys
+import subprocess
+from git import cmd
+from tempfile import NamedTemporaryFile
+
+SCRIPT_PATH = os.path.abspath(__file__)
+CLICK_HOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '../../')
+
+
+def diffFile(reference_file, working_file, git, temp_diff):
+    if os.path.islink(working_file):
+        print "Need translate document:" + reference_file
+
+    if not os.path.exists(working_file):
+        print 'Need link document ' + reference_file + ' to ' + working_file
+
+    if os.path.exists(working_file) and not os.path.islink(working_file):
+        git_hash = git.execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', working_file])
+        temp_diff.write(git.execute(['git', 'diff', git_hash.strip('"'), reference_file]).encode('utf-8'))
+        temp_diff.write('\n'.encode('utf-8'))
+
+    return 0
+
+
+def diffDirectory(reference_directory, working_directory, git, temp_diff):
+    if not os.path.isdir(reference_directory):
+        raise RuntimeError('The [' + reference_directory + '] is not directory.')
+
+    for list_item in os.listdir(reference_directory):
+        working_item = os.path.join(working_directory, list_item)
+        reference_item = os.path.join(reference_directory, list_item)
+        if diffFile(reference_item, working_item, git, temp_diff) if os.path.isfile(reference_item) else diffDirectory(reference_item, working_item, git, temp_diff) != 0:
+            return 1
+
+    return 0
+
+
+def findLanguageDoc(custom_document, other_language='en', children=[]):
+    if len(custom_document) == 0:
+        raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.")
+
+    if os.path.samefile(os.path.join(CLICK_HOUSE_REPO_HOME, 'docs'), custom_document):
+        return os.path.join(CLICK_HOUSE_REPO_HOME, 'docs', other_language, *children[1:])
+    children.insert(0, os.path.split(custom_document)[1])
+    return findLanguageDoc(os.path.split(custom_document)[0], other_language, children)
+
+
+if __name__ == '__main__':
+    git = cmd.Git(CLICK_HOUSE_REPO_HOME)
+    git_pager = git.execute(['git', 'var', 'GIT_PAGER'])
+    working_language = os.path.join(CLICK_HOUSE_REPO_HOME, 'docs', sys.argv[1])
+
+    reference_language = findLanguageDoc(working_language)
+    with NamedTemporaryFile(mode='r+') as temp_diff:
+        if not os.path.isdir(reference_language):
+            diffFile(reference_language, working_language, git, temp_diff)
+        else:
+            diffDirectory(reference_language, working_language, git, temp_diff)
+
+        temp_diff.flush()
+        subprocess.check_call([git_pager, temp_diff.name])
diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt
index 2c395da402c..d95ddee9452 100644
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@@ -33,3 +33,4 @@ tornado==5.1
 typing==3.6.2
 Unidecode==1.0.23
 urllib3==1.24.2
+gitpython==2.1.14

From 3e98ba4a61fec05a7093981a4abf926ea69cef03 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Mon, 28 Oct 2019 12:18:22 +0800
Subject: [PATCH 065/222] sync zh getting started

---
 .../example_datasets/amplab_benchmark.md      |  34 ++---
 .../example_datasets/criteo.md                |   6 +-
 .../example_datasets/nyc_taxi.md              |  57 ++++---
 .../example_datasets/ontime.md                | 140 ++++++++++++++----
 .../example_datasets/star_schema.md           |  92 +++++++-----
 .../example_datasets/wikistat.md              |   8 +-
 docs/zh/getting_started/index.md              |  36 ++---
 7 files changed, 241 insertions(+), 132 deletions(-)

diff --git a/docs/zh/getting_started/example_datasets/amplab_benchmark.md b/docs/zh/getting_started/example_datasets/amplab_benchmark.md
index 415457c9403..5afd7dfd705 100644
--- a/docs/zh/getting_started/example_datasets/amplab_benchmark.md
+++ b/docs/zh/getting_started/example_datasets/amplab_benchmark.md
@@ -7,21 +7,21 @@
 在控制台运行以下命令：
 
 ```bash
-sudo apt-get install s3cmd
-mkdir tiny; cd tiny;
-s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/tiny/ .
-cd ..
-mkdir 1node; cd 1node;
-s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/1node/ .
-cd ..
-mkdir 5nodes; cd 5nodes;
-s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/5nodes/ .
-cd ..
+$ sudo apt-get install s3cmd
+$ mkdir tiny; cd tiny;
+$ s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/tiny/ .
+$ cd ..
+$ mkdir 1node; cd 1node;
+$ s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/1node/ .
+$ cd ..
+$ mkdir 5nodes; cd 5nodes;
+$ s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/5nodes/ .
+$ cd ..
 ```
 
 在ClickHouse运行如下查询：
 
-``` sql
+```sql
 CREATE TABLE rankings_tiny
 (
     pageURL String,
@@ -86,12 +86,12 @@ CREATE TABLE uservisits_5nodes_on_single
 回到控制台运行如下命令：
 
 ```bash
-for i in tiny/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_tiny FORMAT CSV"; done
-for i in tiny/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_tiny FORMAT CSV"; done
-for i in 1node/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_1node FORMAT CSV"; done
-for i in 1node/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_1node FORMAT CSV"; done
-for i in 5nodes/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_5nodes_on_single FORMAT CSV"; done
-for i in 5nodes/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_5nodes_on_single FORMAT CSV"; done
+$ for i in tiny/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_tiny FORMAT CSV"; done
+$ for i in tiny/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_tiny FORMAT CSV"; done
+$ for i in 1node/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_1node FORMAT CSV"; done
+$ for i in 1node/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_1node FORMAT CSV"; done
+$ for i in 5nodes/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_5nodes_on_single FORMAT CSV"; done
+$ for i in 5nodes/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_5nodes_on_single FORMAT CSV"; done
 ```
 
 简单的查询示例：
diff --git a/docs/zh/getting_started/example_datasets/criteo.md b/docs/zh/getting_started/example_datasets/criteo.md
index 9914bb8720c..3a86f630d7b 100644
--- a/docs/zh/getting_started/example_datasets/criteo.md
+++ b/docs/zh/getting_started/example_datasets/criteo.md
@@ -4,14 +4,14 @@
 
 创建原始数据对应的表结构：
 
-``` sql
+```sql
 CREATE TABLE criteo_log (date Date, clicked UInt8, int1 Int32, int2 Int32, int3 Int32, int4 Int32, int5 Int32, int6 Int32, int7 Int32, int8 Int32, int9 Int32, int10 Int32, int11 Int32, int12 Int32, int13 Int32, cat1 String, cat2 String, cat3 String, cat4 String, cat5 String, cat6 String, cat7 String, cat8 String, cat9 String, cat10 String, cat11 String, cat12 String, cat13 String, cat14 String, cat15 String, cat16 String, cat17 String, cat18 String, cat19 String, cat20 String, cat21 String, cat22 String, cat23 String, cat24 String, cat25 String, cat26 String) ENGINE = Log
 ```
 
 下载数据：
 
 ```bash
-for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done
+$ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done
 ```
 
 创建转换后的数据对应的表结构：
@@ -65,7 +65,7 @@ CREATE TABLE criteo
 
 将第一张表中的原始数据转化写入到第二张表中去：
 
-``` sql
+```sql
 INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, int12, int13, reinterpretAsUInt32(unhex(cat1)) AS icat1, reinterpretAsUInt32(unhex(cat2)) AS icat2, reinterpretAsUInt32(unhex(cat3)) AS icat3, reinterpretAsUInt32(unhex(cat4)) AS icat4, reinterpretAsUInt32(unhex(cat5)) AS icat5, reinterpretAsUInt32(unhex(cat6)) AS icat6, reinterpretAsUInt32(unhex(cat7)) AS icat7, reinterpretAsUInt32(unhex(cat8)) AS icat8, reinterpretAsUInt32(unhex(cat9)) AS icat9, reinterpretAsUInt32(unhex(cat10)) AS icat10, reinterpretAsUInt32(unhex(cat11)) AS icat11, reinterpretAsUInt32(unhex(cat12)) AS icat12, reinterpretAsUInt32(unhex(cat13)) AS icat13, reinterpretAsUInt32(unhex(cat14)) AS icat14, reinterpretAsUInt32(unhex(cat15)) AS icat15, reinterpretAsUInt32(unhex(cat16)) AS icat16, reinterpretAsUInt32(unhex(cat17)) AS icat17, reinterpretAsUInt32(unhex(cat18)) AS icat18, reinterpretAsUInt32(unhex(cat19)) AS icat19, reinterpretAsUInt32(unhex(cat20)) AS icat20, reinterpretAsUInt32(unhex(cat21)) AS icat21, reinterpretAsUInt32(unhex(cat22)) AS icat22, reinterpretAsUInt32(unhex(cat23)) AS icat23, reinterpretAsUInt32(unhex(cat24)) AS icat24, reinterpretAsUInt32(unhex(cat25)) AS icat25, reinterpretAsUInt32(unhex(cat26)) AS icat26 FROM criteo_log;
 
 DROP TABLE criteo_log;
diff --git a/docs/zh/getting_started/example_datasets/nyc_taxi.md b/docs/zh/getting_started/example_datasets/nyc_taxi.md
index 16adae18120..338ac5ba0b7 100644
--- a/docs/zh/getting_started/example_datasets/nyc_taxi.md
+++ b/docs/zh/getting_started/example_datasets/nyc_taxi.md
@@ -1,5 +1,10 @@
 # 纽约市出租车数据
 
+纽约市出租车数据有以下两个方式获取：
+
+从原始数据导入
+下载预处理好的分区数据
+
 ## 怎样导入原始数据
 
 可以参考<https://github.com/toddwschneider/nyc-taxi-data>和<http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html>中的关于数据集结构描述与数据下载指令说明。
@@ -24,8 +29,8 @@ mv data/yellow_tripdata_2010-03.csv_ data/yellow_tripdata_2010-03.csv
 
 您可以按如下方式检查下载的行数：
 
-```
-time psql nyc-taxi-data -c "SELECT count(*) FROM trips;"
+```bash
+$ time psql nyc-taxi-data -c "SELECT count(*) FROM trips;"
 ## Count
  1298979494
 (1 row)
@@ -39,7 +44,7 @@ PostgreSQL处理这些数据大概需要370GB的磁盘空间。
 
 从PostgreSQL中导出数据：
 
-``` sql
+```sql
 COPY
 (
     SELECT trips.id,
@@ -114,7 +119,7 @@ COPY
 
 在ClickHouse中创建临时表：
 
-``` sql
+```sql
 CREATE TABLE trips
 (
 trip_id                 UInt32,
@@ -173,8 +178,8 @@ dropoff_puma            Nullable(String)
 
 接下来,需要将字段转换为更正确的数据类型，并且在可能的情况下，消除NULL。
 
-```
-time clickhouse-client --query="INSERT INTO trips FORMAT TabSeparated" < trips.tsv
+```bash
+$ time clickhouse-client --query="INSERT INTO trips FORMAT TabSeparated" < trips.tsv
 
 real    75m56.214s
 ```
@@ -191,7 +196,7 @@ real    75m56.214s
 
 创建表结构并写入数据：
 
-```
+```sql
 CREATE TABLE trips_mergetree
 ENGINE = MergeTree(pickup_date, pickup_datetime, 8192)
 AS SELECT
@@ -258,13 +263,10 @@ FROM trips
 
 这个表需要使用126GB的磁盘空间。
 
+```sql
+SELECT formatReadableSize(sum(bytes)) FROM system.parts WHERE table = 'trips_mergetree' AND active
 ```
-:) SELECT formatReadableSize(sum(bytes)) FROM system.parts WHERE table = 'trips_mergetree' AND active
-
-SELECT formatReadableSize(sum(bytes))
-FROM system.parts
-WHERE (table = 'trips_mergetree') AND active
-
+```text
 ┌─formatReadableSize(sum(bytes))─┐
 │ 126.18 GiB                     │
 └────────────────────────────────┘
@@ -272,11 +274,26 @@ WHERE (table = 'trips_mergetree') AND active
 
 除此之外，你还可以在MergeTree上运行OPTIMIZE查询来进行优化。但这不是必须的，因为即使在没有进行优化的情况下它的表现依然是很好的。
 
+## 下载预处理好的分区数据
+
+```bash
+$ curl -O https://clickhouse-datasets.s3.yandex.net/trips_mergetree/partitions/trips_mergetree.tar
+$ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory
+$ # check permissions of unpacked data, fix if required
+$ sudo service clickhouse-server restart
+$ clickhouse-client --query "select count(*) from datasets.trips_mergetree"
+```
+
+!!!info
+  如果要运行下面的SQL查询，必须使用完整的表名，
+  `datasets.trips_mergetree`。
+
+
 ## 单台服务器运行结果
 
 Q1:
 
-``` sql
+```sql
 SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type
 ```
 
@@ -284,7 +301,7 @@ SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type
 
 Q2:
 
-``` sql
+```sql
 SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count
 ```
 
@@ -292,7 +309,7 @@ SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenge
 
 Q3:
 
-``` sql
+```sql
 SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year
 ```
 
@@ -300,7 +317,7 @@ SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetr
 
 Q4:
 
-``` sql
+```sql
 SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*)
 FROM trips_mergetree
 GROUP BY passenger_count, year, distance
@@ -319,19 +336,19 @@ Two Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, 16 physical kernels total,128 GiB
 
 在每台服务器中运行：
 
-```
+```sql
 CREATE TABLE default.trips_mergetree_third ( trip_id UInt32, vendor_id Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14), pickup_date Date, pickup_datetime DateTime, dropoff_date Date, dropoff_datetime DateTime, store_and_fwd_flag UInt8, rate_code_id UInt8, pickup_longitude Float64, pickup_latitude Float64, dropoff_longitude Float64, dropoff_latitude Float64, passenger_count UInt8, trip_distance Float64, fare_amount Float32, extra Float32, mta_tax Float32, tip_amount Float32, tolls_amount Float32, ehail_fee Float32, improvement_surcharge Float32, total_amount Float32, payment_type_ Enum8('UNK' = 0, 'CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4), trip_type UInt8, pickup FixedString(25), dropoff FixedString(25), cab_type Enum8('yellow' = 1, 'green' = 2, 'uber' = 3), pickup_nyct2010_gid UInt8, pickup_ctlabel Float32, pickup_borocode UInt8, pickup_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), pickup_ct2010 FixedString(6), pickup_boroct2010 FixedString(7), pickup_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), pickup_ntacode FixedString(4), pickup_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), pickup_puma UInt16, dropoff_nyct2010_gid UInt8, dropoff_ctlabel Float32, dropoff_borocode UInt8, dropoff_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), dropoff_ct2010 FixedString(6), dropoff_boroct2010 FixedString(7), dropoff_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), dropoff_ntacode FixedString(4), dropoff_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), dropoff_puma UInt16) ENGINE = MergeTree(pickup_date, pickup_datetime, 8192)
 ```
 
 在之前的服务器中运行：
 
-``` sql
+```sql
 CREATE TABLE trips_mergetree_x3 AS trips_mergetree_third ENGINE = Distributed(perftest, default, trips_mergetree_third, rand())
 ```
 
 运行如下查询重新分布数据：
 
-``` sql
+```sql
 INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree
 ```
 
diff --git a/docs/zh/getting_started/example_datasets/ontime.md b/docs/zh/getting_started/example_datasets/ontime.md
index ed81e2459e7..152f2d5d808 100644
--- a/docs/zh/getting_started/example_datasets/ontime.md
+++ b/docs/zh/getting_started/example_datasets/ontime.md
@@ -1,6 +1,13 @@
 
 # 航班飞行数据
 
++This dataset can be obtained in two ways:
++
++- import from raw data
++- download of prepared partitions
++
++## Import From Raw Data
++
 下载数据：
 
 ```bash
@@ -134,39 +141,75 @@ CREATE TABLE `ontime` (
 加载数据：
 
 ```bash
-for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
+$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
 ```
 
-查询：
++## Download of Prepared Partitions
++
++```bash
++$ curl -O https://clickhouse-datasets.s3.yandex.net/ontime/partitions/ontime.tar
++$ tar xvf ontime.tar -C /var/lib/clickhouse # path to ClickHouse data directory
++$ # check permissions of unpacked data, fix if required
++$ sudo service clickhouse-server restart
++$ clickhouse-client --query "select count(*) from datasets.ontime"
+ ```
++!!!info
++    If you will run queries described below, you have to use full table name,
++    `datasets.ontime`.
++
+## 查询：
 
 Q0.
 
 ```sql
-select avg(c1) from (select Year, Month, count(*) as c1 from ontime group by Year, Month);
+SELECT avg(c1)
+FROM
+(
+    SELECT Year, Month, count(*) AS c1
+    FROM ontime
+    GROUP BY Year, Month
+);
 ```
 
 Q1. 查询从2000年到2008年每天的航班数
 
 ```sql
-SELECT DayOfWeek, count(*) AS c FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC;
+SELECT DayOfWeek, count(*) AS c
+FROM ontime
+WHERE Year>=2000 AND Year<=2008
+GROUP BY DayOfWeek
+ORDER BY c DESC;
 ```
 
 Q2. 查询从2000年到2008年每周延误超过10分钟的航班数。
 
 ```sql
-SELECT DayOfWeek, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC
+SELECT DayOfWeek, count(*) AS c
+FROM ontime
+WHERE DepDelay>10 AND Year>=2000 AND Year<=2008
+GROUP BY DayOfWeek
+ORDER BY c DESC;
 ```
 
 Q3. 查询2000年到2008年每个机场延误超过10分钟以上的次数
 
 ```sql
-SELECT Origin, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY Origin ORDER BY c DESC LIMIT 10
+SELECT Origin, count(*) AS c
+FROM ontime
+WHERE DepDelay>10 AND Year>=2000 AND Year<=2008
+GROUP BY Origin
+ORDER BY c DESC
+LIMIT 10;
 ```
 
 Q4. 查询2007年各航空公司延误超过10分钟以上的次数
 
 ```sql
-SELECT Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year = 2007 GROUP BY Carrier ORDER BY count(*) DESC
+SELECT Carrier, count(*)
+FROM ontime
+WHERE DepDelay>10 AND Year=2007
+GROUP BY Carrier
+ORDER BY count(*) DESC;
 ```
 
 Q5. 查询2007年各航空公司延误超过10分钟以上的百分比
@@ -198,7 +241,11 @@ ORDER BY c3 DESC;
 更好的查询版本：
 
 ```sql
-SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier
+SELECT Carrier, avg(DepDelay>10)*100 AS c3
+FROM ontime
+WHERE Year=2007
+GROUP BY Carrier
+ORDER BY Carrier
 ```
 
 Q6. 同上一个查询一致,只是查询范围扩大到2000年到2008年
@@ -212,7 +259,7 @@ FROM
         count(*) AS c
     FROM ontime
     WHERE DepDelay>10
-        AND Year >= 2000 AND Year <= 2008
+        AND Year>=2000 AND Year<=2008
     GROUP BY Carrier
 )
 ANY INNER JOIN
@@ -221,7 +268,7 @@ ANY INNER JOIN
         Carrier,
         count(*) AS c2
     FROM ontime
-    WHERE Year >= 2000 AND Year <= 2008
+    WHERE Year>=2000 AND Year<=2008
     GROUP BY Carrier
 ) USING Carrier
 ORDER BY c3 DESC;
@@ -230,7 +277,11 @@ ORDER BY c3 DESC;
 更好的查询版本：
 
 ```sql
-SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY Carrier ORDER BY Carrier
+SELECT Carrier, avg(DepDelay>10)*100 AS c3
+FROM ontime
+WHERE Year>=2000 AND Year<=2008
+GROUP BY Carrier
+ORDER BY Carrier;
 ```
 
 Q7. 每年航班延误超过10分钟的百分比
@@ -254,41 +305,50 @@ ANY INNER JOIN
     from ontime
     GROUP BY Year
 ) USING (Year)
-ORDER BY Year
+ORDER BY Year;
 ```
 
 更好的查询版本：
 
 ```sql
-SELECT Year, avg(DepDelay > 10) FROM ontime GROUP BY Year ORDER BY Year
+SELECT Year, avg(DepDelay>10)
+FROM ontime
+GROUP BY Year
+ORDER BY Year;
 ```
 
 Q8. 每年更受人们喜爱的目的地
 
 ```sql
-SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime WHERE Year >= 2000 and Year <= 2010 GROUP BY DestCityName ORDER BY u DESC LIMIT 10;
+SELECT DestCityName, uniqExact(OriginCityName) AS u
+FROM ontime
+WHERE Year >= 2000 and Year <= 2010
+GROUP BY DestCityName
+ORDER BY u DESC LIMIT 10;
 ```
 
 Q9.
 
 ```sql
-select Year, count(*) as c1 from ontime group by Year;
+SELECT Year, count(*) AS c1
+FROM ontime
+GROUP BY Year;
 ```
 
 Q10.
 
 ```sql
-select
-   min(Year), max(Year), Carrier, count(*) as cnt,
-   sum(ArrDelayMinutes>30) as flights_delayed,
-   round(sum(ArrDelayMinutes>30)/count(*),2) as rate
+SELECT
+   min(Year), max(Year), Carrier, count(*) AS cnt,
+   sum(ArrDelayMinutes>30) AS flights_delayed,
+   round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
 FROM ontime
 WHERE
-   DayOfWeek not in (6,7) and OriginState not in ('AK', 'HI', 'PR', 'VI')
-   and DestState not in ('AK', 'HI', 'PR', 'VI')
-   and FlightDate < '2010-01-01'
+   DayOfWeek NOT IN (6,7) AND OriginState NOT IN ('AK', 'HI', 'PR', 'VI')
+   AND DestState NOT IN ('AK', 'HI', 'PR', 'VI')
+   AND FlightDate < '2010-01-01'
 GROUP by Carrier
-HAVING cnt > 100000 and max(Year) > 1990
+HAVING cnt>100000 and max(Year)>1990
 ORDER by rate DESC
 LIMIT 1000;
 ```
@@ -296,15 +356,39 @@ LIMIT 1000;
 Bonus:
 
 ```sql
-SELECT avg(cnt) FROM (SELECT Year,Month,count(*) AS cnt FROM ontime WHERE DepDel15=1 GROUP BY Year,Month)
+SELECT avg(cnt)
+FROM
+(
+    SELECT Year,Month,count(*) AS cnt
+    FROM ontime
+    WHERE DepDel15=1
+    GROUP BY Year,Month
+);
 
-select avg(c1) from (select Year,Month,count(*) as c1 from ontime group by Year,Month)
+SELECT avg(c1) FROM
+(
+    SELECT Year,Month,count(*) AS c1
+    FROM ontime
+    GROUP BY Year,Month
+);
 
-SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime GROUP BY DestCityName ORDER BY u DESC LIMIT 10;
+SELECT DestCityName, uniqExact(OriginCityName) AS u
+FROM ontime
+GROUP BY DestCityName
+ORDER BY u DESC
+LIMIT 10;
 
-SELECT OriginCityName, DestCityName, count() AS c FROM ontime GROUP BY OriginCityName, DestCityName ORDER BY c DESC LIMIT 10;
+SELECT OriginCityName, DestCityName, count() AS c
+FROM ontime
+GROUP BY OriginCityName, DestCityName
+ORDER BY c DESC
+LIMIT 10;
 
-SELECT OriginCityName, count() AS c FROM ontime GROUP BY OriginCityName ORDER BY c DESC LIMIT 10;
+SELECT OriginCityName, count() AS c
+FROM ontime
+GROUP BY OriginCityName
+ORDER BY c DESC
+LIMIT 10;
 ```
 
 这个性能测试由Vadim Tkachenko提供。参考：
diff --git a/docs/zh/getting_started/example_datasets/star_schema.md b/docs/zh/getting_started/example_datasets/star_schema.md
index 1d8af3b29a5..865327b50ec 100644
--- a/docs/zh/getting_started/example_datasets/star_schema.md
+++ b/docs/zh/getting_started/example_datasets/star_schema.md
@@ -1,26 +1,26 @@
 # Star Schema Benchmark
 
-Compiling dbgen:
+编译 dbgen:
 
-```
-git clone git@github.com:vadimtk/ssb-dbgen.git
-cd ssb-dbgen
-make
+```bash
+$ git clone git@github.com:vadimtk/ssb-dbgen.git
+$ cd ssb-dbgen
+$ make
 ```
 
-Generating data:
+开始生成数据：
 
-```
-./dbgen -s 1000 -T c
-./dbgen -s 1000 -T l
-./dbgen -s 1000 -T p
-./dbgen -s 1000 -T s
-./dbgen -s 1000 -T d
+```bash
+$ ./dbgen -s 1000 -T c
+$ ./dbgen -s 1000 -T l
+$ ./dbgen -s 1000 -T p
+$ ./dbgen -s 1000 -T s
+$ ./dbgen -s 1000 -T d
 ```
 
-Creating tables in ClickHouse:
+在ClickHouse中创建表结构：
 
-```
+```sql
 CREATE TABLE customer
 (
         C_CUSTKEY       UInt32,
@@ -83,73 +83,85 @@ CREATE TABLE supplier
 ENGINE = MergeTree ORDER BY S_SUPPKEY;
 ```
 
-Inserting data:
+写入数据：
 
-```
-clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl
-clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl
-clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
-clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
+```bash
+$ clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl
+$ clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl
+$ clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
+$ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
 ```
 
-Converting "star schema" to denormalized "flat schema":
+将“星型模型”转换为非规范化的“平面模型”：
 
-```
+```sql
 SET max_memory_usage = 20000000000, allow_experimental_multiple_joins_emulation = 1;
 
 CREATE TABLE lineorder_flat
 ENGINE = MergeTree
 PARTITION BY toYear(LO_ORDERDATE)
 ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
-SELECT *
-FROM lineorder
-ANY INNER JOIN customer ON LO_CUSTKEY = C_CUSTKEY
-ANY INNER JOIN supplier ON LO_SUPPKEY = S_SUPPKEY
-ANY INNER JOIN part ON LO_PARTKEY = P_PARTKEY;
+SELECT l.*, c.*, s.*, p.*
+FROM lineorder l
+ ANY INNER JOIN customer c ON (c.C_CUSTKEY = l.LO_CUSTKEY)
+ ANY INNER JOIN supplier s ON (s.S_SUPPKEY = l.LO_SUPPKEY)
+ ANY INNER JOIN part p ON  (p.P_PARTKEY = l.LO_PARTKEY);
 
 ALTER TABLE lineorder_flat DROP COLUMN C_CUSTKEY, DROP COLUMN S_SUPPKEY, DROP COLUMN P_PARTKEY;
 ```
 
 Running the queries:
 
-```
 Q1.1
+```sql
 SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
-
+```
 Q1.2
+```sql
 SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;
-
+```
 Q1.3
+```sql
 SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toISOWeek(LO_ORDERDATE) = 6 AND toYear(LO_ORDERDATE) = 1994 AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;
-
+```
 Q2.1
+```sql
 SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
-
+```
 Q2.2
+```sql
 SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
-
+```
 Q2.3
+```sql
 SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
-
+```
 Q3.1
+```sql
 SELECT C_NATION, S_NATION, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION, S_NATION, year ORDER BY year asc, revenue desc;
-
+```
 Q3.2
+```sql
 SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;
-
+```
 Q3.3
+```sql
 SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;
-
+```
 Q3.4
+```sql
 SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND toYYYYMM(LO_ORDERDATE) = '199712' GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;
-
+```
 Q4.1
+```sql
 SELECT toYear(LO_ORDERDATE) AS year, C_NATION, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, C_NATION ORDER BY year, C_NATION;
-
+```
 Q4.2
+```sql
 SELECT toYear(LO_ORDERDATE) AS year, S_NATION, P_CATEGORY, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, S_NATION, P_CATEGORY ORDER BY year, S_NATION, P_CATEGORY;
-
+```
 Q4.3
+```sql
 SELECT toYear(LO_ORDERDATE) AS year, S_CITY, P_BRAND, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year, S_CITY, P_BRAND ORDER BY year, S_CITY, P_BRAND;
 ```
 
diff --git a/docs/zh/getting_started/example_datasets/wikistat.md b/docs/zh/getting_started/example_datasets/wikistat.md
index c306c644551..ee3b800f47b 100644
--- a/docs/zh/getting_started/example_datasets/wikistat.md
+++ b/docs/zh/getting_started/example_datasets/wikistat.md
@@ -4,7 +4,7 @@
 
 创建表结构：
 
-``` sql
+```sql
 CREATE TABLE wikistat
 (
     date Date,
@@ -20,9 +20,9 @@ CREATE TABLE wikistat
 加载数据：
 
 ```bash
-for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt
-cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done
-ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done
+$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt
+$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done
+$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done
 ```
 
 
diff --git a/docs/zh/getting_started/index.md b/docs/zh/getting_started/index.md
index 5e1a5777292..b1c94600da0 100644
--- a/docs/zh/getting_started/index.md
+++ b/docs/zh/getting_started/index.md
@@ -22,8 +22,8 @@ ClickHouse还可以在FreeBSD与Mac OS X上工作。同时它可以在不支持S
 
 在`/etc/apt/sources.list` (或创建`/etc/apt/sources.list.d/clickhouse.list`文件)中添加仓库：
 
-```text
-deb http://repo.yandex.ru/clickhouse/deb/stable/ main/
+```bash
+$ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/
 ```
 
 如果你想使用最新的测试版本，请使用'testing'替换'stable'。
@@ -31,10 +31,10 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/
 然后运行：
 
 ```bash
-sudo apt-get install dirmngr    # optional
-sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4    # optional
-sudo apt-get update
-sudo apt-get install clickhouse-client clickhouse-server
+$ sudo apt-get install dirmngr    # optional
+$ sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4    # optional
+$ sudo apt-get update
+$ sudo apt-get install clickhouse-client clickhouse-server
 ```
 
 你也可以从这里手动下载安装包：<https://repo.yandex.ru/clickhouse/deb/stable/main/>。
@@ -43,16 +43,16 @@ ClickHouse包含访问控制配置，它们位于`users.xml`文件中(与'config
 默认情况下，允许从任何地方使用默认的‘default’用户无密码的访问ClickHouse。参考‘user/default/networks’。
 有关更多信息，请参考"Configuration files"部分。
 
-###来自RPM包
+###为CentOS/RedHat安装
 
 Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包，用于CentOS，RedHat和所有其他基于rpm的Linux发行版。
 
 首先，您需要添加官方存储库：
 
 ```bash
-sudo yum install yum-utils
-sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG
-sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64
+$ sudo yum install yum-utils
+$ sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG
+$ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64
 ```
 
 如果您想使用最新版本，请将`stable`替换为`testing`（建议您在测试环境中使用）。
@@ -60,12 +60,12 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/
 然后运行这些命令以实际安装包：
 
 ```bash
-sudo yum install clickhouse-server clickhouse-client
+$ sudo yum install clickhouse-server clickhouse-client
 ```
 
 您也可以从此处手动下载和安装软件包：<https://repo.yandex.ru/clickhouse/rpm/stable/x86_64>。
 
-###来自Docker
+###使用Docker安装
 
 要在Docker中运行ClickHouse，请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。这些镜像使用官方的`deb`包构建。
 
@@ -136,18 +136,14 @@ milovidov@hostname:~/work/metrica/src/dbms/src/Client$ ./clickhouse-client
 ClickHouse client version 0.0.18749.
 Connecting to localhost:9000.
 Connected to ClickHouse server version 0.0.18749.
-
+```
+```sql
 :) SELECT 1
-
-SELECT 1
-
+```
+```text
 ┌─1─┐
 │ 1 │
 └───┘
-
-1 rows in set. Elapsed: 0.003 sec.
-
-:)
 ```
 
 **恭喜，系统已经工作了!**

From 0f572e5d7dea0284032ecc1d10c0b633a84f4c0b Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Mon, 28 Oct 2019 12:51:59 +0800
Subject: [PATCH 066/222] add line

---
 dbms/src/Functions/array/arrayCompact.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index 09a0dce3d75..7c02e2bfe49 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -106,4 +106,4 @@ namespace DB
         factory.registerFunction<FunctionArrayCompact>();
     }
 
-}
\ No newline at end of file
+}

From 3064e716dbefd80e956f882d1a8860660cded6e6 Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Mon, 28 Oct 2019 14:48:25 +0800
Subject: [PATCH 067/222] add line

---
 dbms/src/Functions/array/arrayCompact.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index 7c02e2bfe49..4ca6a5426da 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -107,3 +107,4 @@ namespace DB
     }
 
 }
+

From 15d12db16a2ac65af695aadcecc2c55b9a9c668a Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 28 Oct 2019 13:42:51 +0800
Subject: [PATCH 068/222] Fix DUMP debug.

---
 dbms/src/Core/iostream_debug_helpers.cpp    | 18 ++++++------------
 dbms/src/Core/iostream_debug_helpers.h      | 16 +++++-----------
 dbms/src/Parsers/iostream_debug_helpers.cpp |  9 +++++++++
 dbms/src/Parsers/iostream_debug_helpers.h   |  3 +++
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/dbms/src/Core/iostream_debug_helpers.cpp b/dbms/src/Core/iostream_debug_helpers.cpp
index 6a1f7f3006a..8e673d1c547 100644
--- a/dbms/src/Core/iostream_debug_helpers.cpp
+++ b/dbms/src/Core/iostream_debug_helpers.cpp
@@ -18,15 +18,17 @@
 
 namespace DB
 {
-std::ostream & operator<<(std::ostream & stream, const IBlockInputStream & what)
+
+template <>
+std::ostream & operator<< <Field>(std::ostream & stream, const Field & what)
 {
-    stream << "IBlockInputStream(name = " << what.getName() << ")";
+    stream << applyVisitor(FieldVisitorDump(), what);
     return stream;
 }
 
-std::ostream & operator<<(std::ostream & stream, const Field & what)
+std::ostream & operator<<(std::ostream & stream, const IBlockInputStream & what)
 {
-    stream << applyVisitor(FieldVisitorDump(), what);
+    stream << "IBlockInputStream(name = " << what.getName() << ")";
     return stream;
 }
 
@@ -102,14 +104,6 @@ std::ostream & operator<<(std::ostream & stream, const Connection::Packet & what
     return stream;
 }
 
-std::ostream & operator<<(std::ostream & stream, const IAST & what)
-{
-    stream << "IAST{";
-    what.dumpTree(stream);
-    stream << "}";
-    return stream;
-}
-
 std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what)
 {
     stream << "ExpressionAction(" << what.toString() << ")";
diff --git a/dbms/src/Core/iostream_debug_helpers.h b/dbms/src/Core/iostream_debug_helpers.h
index 7d109b4604b..35fc05faf1d 100644
--- a/dbms/src/Core/iostream_debug_helpers.h
+++ b/dbms/src/Core/iostream_debug_helpers.h
@@ -7,18 +7,15 @@
 namespace DB
 {
 
-// Used to disable implicit casting for certain overloaded types such as Field, which leads to
-// overload resolution ambiguity.
-template <typename T> struct Dumpable;
-template <typename T>
-std::ostream & operator<<(std::ostream & stream, const typename Dumpable<T>::Type & what);
+// Use template to disable implicit casting for certain overloaded types such as Field, which leads
+// to overload resolution ambiguity.
+class Field;
+template <typename T, typename U = std::enable_if_t<std::is_same_v<T, Field>>>
+std::ostream & operator<<(std::ostream & stream, const T & what);
 
 class IBlockInputStream;
 std::ostream & operator<<(std::ostream & stream, const IBlockInputStream & what);
 
-class Field;
-template <> struct Dumpable<Field> { using Type = Field; };
-
 struct NameAndTypePair;
 std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what);
 
@@ -43,9 +40,6 @@ std::ostream & operator<<(std::ostream & stream, const ColumnWithTypeAndName & w
 class IColumn;
 std::ostream & operator<<(std::ostream & stream, const IColumn & what);
 
-class IAST;
-std::ostream & operator<<(std::ostream & stream, const IAST & what);
-
 std::ostream & operator<<(std::ostream & stream, const Connection::Packet & what);
 
 struct ExpressionAction;
diff --git a/dbms/src/Parsers/iostream_debug_helpers.cpp b/dbms/src/Parsers/iostream_debug_helpers.cpp
index 61dd08ecddc..66666f0dbfc 100644
--- a/dbms/src/Parsers/iostream_debug_helpers.cpp
+++ b/dbms/src/Parsers/iostream_debug_helpers.cpp
@@ -1,4 +1,5 @@
 #include "iostream_debug_helpers.h"
+#include <Parsers/IAST.h>
 #include <Parsers/IParser.h>
 #include <Parsers/Lexer.h>
 #include <Parsers/TokenIterator.h>
@@ -20,4 +21,12 @@ std::ostream & operator<<(std::ostream & stream, const Expected & what)
     return stream;
 }
 
+std::ostream & operator<<(std::ostream & stream, const IAST & what)
+{
+    stream << "IAST{";
+    what.dumpTree(stream);
+    stream << "}";
+    return stream;
+}
+
 }
diff --git a/dbms/src/Parsers/iostream_debug_helpers.h b/dbms/src/Parsers/iostream_debug_helpers.h
index f94c0da5f17..39f52ebcbc2 100644
--- a/dbms/src/Parsers/iostream_debug_helpers.h
+++ b/dbms/src/Parsers/iostream_debug_helpers.h
@@ -9,6 +9,9 @@ std::ostream & operator<<(std::ostream & stream, const Token & what);
 struct Expected;
 std::ostream & operator<<(std::ostream & stream, const Expected & what);
 
+class IAST;
+std::ostream & operator<<(std::ostream & stream, const IAST & what);
+
 }
 
 #include <Core/iostream_debug_helpers.h>

From 3449e8d107c2fbb272bb4378ff198bbf254e9a98 Mon Sep 17 00:00:00 2001
From: memo <joeywang98@outlook.com>
Date: Tue, 29 Oct 2019 10:27:54 +0800
Subject: [PATCH 069/222] fix

---
 dbms/src/Functions/array/arrayCompact.cpp      | 18 ++----------------
 .../functions/array_functions.md               |  4 ++--
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index 4ca6a5426da..4c04aff369a 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -18,23 +18,9 @@ namespace DB
         static bool needExpression() { return false; }
         static bool needOneArray() { return false; }
 
-        static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/)
+        static DataTypePtr getReturnType(const DataTypePtr & nested_type, const DataTypePtr & /*nested_type*/)
         {
-            WhichDataType which(expression_return);
-
-            if (which.isUInt8()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>()); }
-            else if (which.isUInt16()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt16>()); }
-            else if (which.isUInt32()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt32>()); }
-            else if (which.isUInt64()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()); }
-            else if (which.isInt8()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt8>()); }
-            else if (which.isInt16()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt16>()); }
-            else if (which.isInt32()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>()); }
-            else if (which.isInt64()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>()); }
-            else if (which.isFloat32()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat32>()); }
-            else if (which.isFloat64()) { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>()); }
-
-
-            throw Exception("arrayCompact cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            return std::make_shared<DataTypeArray>(nested_type);
         }
 
         template <typename T>
diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md
index 5da4f939713..5bd5eb60a09 100644
--- a/docs/en/query_language/functions/array_functions.md
+++ b/docs/en/query_language/functions/array_functions.md
@@ -793,7 +793,7 @@ Synonym for ["arrayReverse"](#array_functions-arrayreverse)
 
 ## arrayCompact(arr) {#array_functions-arraycompact}
 
-Takes an array, returns an array with elements that are different between two adjacent elements.
+Takes an array, returns an array with consecutive duplicate elements removed.
 
 Example:
 
@@ -802,7 +802,7 @@ SELECT arrayCompact([1, 2, 2, 3, 2, 3, 3])
 ```
 
 ```text
-┌─arrayDistinct([1, 2, 2, 3, 2, 3, 3])─┐
+┌─arrayCompact([1, 2, 2, 3, 2, 3, 3])──┐
 │ [1,2,3,2,3]                          │
 └──────────────────────────────────────┘
 ```

From 9002f2240333a5103d85467a707e1416631dcf39 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Tue, 29 Oct 2019 10:45:51 +0800
Subject: [PATCH 070/222] Update upcoming meetups

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 83cf3e9adbc..08be0b9ed07 100644
--- a/README.md
+++ b/README.md
@@ -13,8 +13,9 @@ ClickHouse is an open-source column-oriented database management system that all
 * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
 
 ## Upcoming Events
-* [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27.
 * [ClickHouse Meetup in Tokyo](https://clickhouse.connpass.com/event/147001/) on November 14.
 * [ClickHouse Meetup in Istanbul](https://www.eventbrite.com/e/clickhouse-meetup-istanbul-create-blazing-fast-experiences-w-clickhouse-tickets-73101120419) on November 19.
 * [ClickHouse Meetup in Ankara](https://www.eventbrite.com/e/clickhouse-meetup-ankara-create-blazing-fast-experiences-w-clickhouse-tickets-73100530655) on November 21.
 * [ClickHouse Meetup in Singapore](https://www.meetup.com/Singapore-Clickhouse-Meetup-Group/events/265085331/) on November 23.
+* [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-december-meetup-registration-78642047481) on December 3.
+

From 68fa3411b290a9f5e3c786b8ebe1498e8114065b Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Tue, 29 Oct 2019 11:07:11 +0800
Subject: [PATCH 071/222] stale.yml tuning

---
 .github/stale.yml | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/stale.yml b/.github/stale.yml
index 6b18b043853..6628bbbd305 100644
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -1,7 +1,7 @@
 # Configuration for probot-stale - https://github.com/probot/stale
 
 # Number of days of inactivity before an Issue or Pull Request becomes stale
-daysUntilStale: 45
+daysUntilStale: 60
 
 # Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
 # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
@@ -21,6 +21,8 @@ exemptLabels:
   - st-accepted
   - st-in-progress
   - st-waiting-for-fix
+  - segfault
+  - crash
 
 # Set to true to ignore issues in a project (defaults to false)
 exemptProjects: false
@@ -37,8 +39,8 @@ staleLabel: stale
 # Comment to post when marking as stale. Set to `false` to disable
 markComment: >
   This issue has been automatically marked as stale because it has not had
-  recent activity. It will be closed if no further activity occurs. Thank you
-  for your contributions.
+  recent activity. Please post a comment if this issue is still relevant to you.
+  Thank you for your contributions.
 
 # Comment to post when removing the stale label.
 # unmarkComment: >
@@ -57,10 +59,11 @@ limitPerRun: 30
 # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
 pulls:
   daysUntilStale: 365
+  daysUntilClose: 60
   markComment: >
     This pull request has been automatically marked as stale because it has not had
-    any activity for over a year. It will be closed if no further activity occurs. Thank you
-    for your contributions.
+    any activity for over a year. Please post a comment about whether you intend to continue working on it.
+    Thank you for your contributions.
 
 # issues:
 #   exemptLabels:

From dbd3227e5a28a7dbadabe5369336f7790d83347c Mon Sep 17 00:00:00 2001
From: sundy-li <543950155@qq.com>
Date: Tue, 29 Oct 2019 11:51:27 +0800
Subject: [PATCH 072/222] fixup partial const column

---
 dbms/src/Functions/FunctionsCoding.h                         | 5 +++--
 dbms/tests/queries/0_stateless/01020_function_char.reference | 3 +++
 dbms/tests/queries/0_stateless/01020_function_char.sql       | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index 23249621230..a7c069c0fbe 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -1328,8 +1328,9 @@ public:
         }
 
         for (const size_t & column_idx : arguments)
-        {
-            const IColumn * column = block.getByPosition(column_idx).column.get();
+        {   
+            //partial const column 
+            const IColumn * column = block.getByPosition(column_idx).column->convertToFullColumnIfConst().get();
             if (!(executeNumber<UInt8>(*column, out_vec, column_idx, rows, size_per_row)
                 || executeNumber<UInt16>(*column, out_vec, column_idx, rows, size_per_row)
                 || executeNumber<UInt32>(*column, out_vec, column_idx, rows, size_per_row)
diff --git a/dbms/tests/queries/0_stateless/01020_function_char.reference b/dbms/tests/queries/0_stateless/01020_function_char.reference
index 9f055b585d4..b4fc4539d43 100644
--- a/dbms/tests/queries/0_stateless/01020_function_char.reference
+++ b/dbms/tests/queries/0_stateless/01020_function_char.reference
@@ -1,2 +1,5 @@
 ABCDabcdefg
 ABC
+ABC
+ACD
+ADE
diff --git a/dbms/tests/queries/0_stateless/01020_function_char.sql b/dbms/tests/queries/0_stateless/01020_function_char.sql
index ce099aa17a4..f726b5ed39a 100644
--- a/dbms/tests/queries/0_stateless/01020_function_char.sql
+++ b/dbms/tests/queries/0_stateless/01020_function_char.sql
@@ -1,3 +1,4 @@
 /* char function */
 SELECT char(65, 66.1, 67.2, 68.3, 97.4, 98.5, 99.6, 100.7, 101.0, 102.0, 103.0);
 SELECT char(65 + 256, 66 + 1024, 66 + 1024 + 1);
+SELECT char(65, 66 + number, 67 + number) from numbers(3);

From 9eb77a59e3160c74da5ead04ce799df4553c4706 Mon Sep 17 00:00:00 2001
From: sundy-li <543950155@qq.com>
Date: Tue, 29 Oct 2019 13:20:18 +0800
Subject: [PATCH 073/222] fix index

---
 dbms/src/Functions/FunctionsCoding.h | 31 +++++++++++++++-------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index a7c069c0fbe..986521e56ca 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -1327,22 +1327,25 @@ public:
             out_vec[row * size_per_row + size_per_row - 1] = '\0';
         }
 
-        for (const size_t & column_idx : arguments)
-        {   
+        Columns columns_holder(arguments.size());
+        for (size_t idx = 0; idx < arguments.size(); ++idx)
+        {
             //partial const column 
-            const IColumn * column = block.getByPosition(column_idx).column->convertToFullColumnIfConst().get();
-            if (!(executeNumber<UInt8>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<UInt16>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<UInt32>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<UInt64>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<Int8>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<Int16>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<Int32>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<Int64>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<Float32>(*column, out_vec, column_idx, rows, size_per_row)
-                || executeNumber<Float64>(*column, out_vec, column_idx, rows, size_per_row)))
+            columns_holder[idx] = std::move(block.getByPosition(arguments[idx]).column->convertToFullColumnIfConst());
+            const IColumn * column = columns_holder[idx].get();
+
+            if (!(executeNumber<UInt8>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<UInt16>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<UInt32>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<UInt64>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<Int8>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<Int16>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<Int32>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<Int64>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<Float32>(*column, out_vec, idx, rows, size_per_row)
+                || executeNumber<Float64>(*column, out_vec, idx, rows, size_per_row)))
             {
-                throw Exception{"Illegal column " + block.getByPosition(column_idx).column->getName()
+                throw Exception{"Illegal column " + block.getByPosition(arguments[idx]).column->getName()
                                 + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
             }
         }

From 6db3da855da57b02cb0ee79b90dab3d079cce857 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Tue, 29 Oct 2019 14:15:39 +0800
Subject: [PATCH 074/222] add show help & no-pager

---
 docs/tools/easy_diff.py | 142 +++++++++++++++++++++++++++++++---------
 1 file changed, 112 insertions(+), 30 deletions(-)

diff --git a/docs/tools/easy_diff.py b/docs/tools/easy_diff.py
index d8a596f36ba..2c7b5429994 100755
--- a/docs/tools/easy_diff.py
+++ b/docs/tools/easy_diff.py
@@ -2,63 +2,145 @@
 # -*- coding: utf-8 -*-
 
 import os, sys
+import argparse
 import subprocess
+import contextlib
 from git import cmd
 from tempfile import NamedTemporaryFile
 
+SCRIPT_DESCRIPTION = '''
+    usage: ./easy_diff.py language/document path
+
+    Show the difference between a language document and an English document.
+
+    This script is based on the assumption that documents in other languages are fully synchronized with the en document at a commit.
+
+    For example:
+        Execute:
+            ./easy_diff.py --no-pager zh/data_types
+        Output:
+            Need translate document:~/ClickHouse/docs/en/data_types/uuid.md
+            Need link document:~/ClickHouse/docs/en/data_types/decimal.md to ~/ClickHouse/docs/zh/data_types/decimal.md
+            diff --git a/docs/en/data_types/domains/ipv6.md b/docs/en/data_types/domains/ipv6.md
+            index 1bfbe3400b..e2abaff017 100644
+            --- a/docs/en/data_types/domains/ipv6.md
+            +++ b/docs/en/data_types/domains/ipv6.md
+            @@ -4,13 +4,13 @@
+ 
+             ### Basic Usage
+ 
+            -``` sql
+            +```sql
+             CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY url;
+ 
+             DESCRIBE TABLE hits;
+             ```
+ 
+            -```
+            +```text
+             ┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐
+             │ url  │ String │              │                    │         │                  │
+             │ from │ IPv6   │              │                    │         │                  │
+            @@ -19,19 +19,19 @@ DESCRIBE TABLE hits;
+ 
+             OR you can use `IPv6` domain as a key:
+ 
+            -``` sql
+            +```sql
+             CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from;
+            ... MORE
+    
+    OPTIONS:
+        -h, --help  show this help message and exit
+        --no-pager  use stdout as difference result output
+'''
+
 SCRIPT_PATH = os.path.abspath(__file__)
-CLICK_HOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '../../')
+CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..')
+SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
+
+SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
+SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None)
+SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False)
+SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False)
 
 
-def diffFile(reference_file, working_file, git, temp_diff):
+def execute(commands):
+    return SCRIPT_COMMAND_EXECUTOR.execute(commands)
+
+
+def get_hash(file_name):
+    return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name])
+
+
+def diff_file(reference_file, working_file, out):
+    if not os.path.exists(reference_file):
+        raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.')
+
     if os.path.islink(working_file):
-        print "Need translate document:" + reference_file
-
-    if not os.path.exists(working_file):
-        print 'Need link document ' + reference_file + ' to ' + working_file
-
-    if os.path.exists(working_file) and not os.path.islink(working_file):
-        git_hash = git.execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', working_file])
-        temp_diff.write(git.execute(['git', 'diff', git_hash.strip('"'), reference_file]).encode('utf-8'))
-        temp_diff.write('\n'.encode('utf-8'))
+        out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
+    elif not os.path.exists(working_file):
+        out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)])
+    elif get_hash(working_file) != get_hash(reference_file):
+        out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))])
 
     return 0
 
 
-def diffDirectory(reference_directory, working_directory, git, temp_diff):
+def diff_directory(reference_directory, working_directory, out):
     if not os.path.isdir(reference_directory):
-        raise RuntimeError('The [' + reference_directory + '] is not directory.')
+        return diff_file(reference_directory, working_directory, out)
 
     for list_item in os.listdir(reference_directory):
         working_item = os.path.join(working_directory, list_item)
         reference_item = os.path.join(reference_directory, list_item)
-        if diffFile(reference_item, working_item, git, temp_diff) if os.path.isfile(reference_item) else diffDirectory(reference_item, working_item, git, temp_diff) != 0:
+        if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0:
             return 1
 
     return 0
 
 
-def findLanguageDoc(custom_document, other_language='en', children=[]):
+def find_language_doc(custom_document, other_language='en', children=[]):
     if len(custom_document) == 0:
         raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.")
 
-    if os.path.samefile(os.path.join(CLICK_HOUSE_REPO_HOME, 'docs'), custom_document):
-        return os.path.join(CLICK_HOUSE_REPO_HOME, 'docs', other_language, *children[1:])
+    if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document):
+        return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:])
     children.insert(0, os.path.split(custom_document)[1])
-    return findLanguageDoc(os.path.split(custom_document)[0], other_language, children)
+    return find_language_doc(os.path.split(custom_document)[0], other_language, children)
+
+
+class ToPager:
+    def __init__(self, temp_named_file):
+        self.temp_named_file = temp_named_file
+
+    def writelines(self, lines):
+        self.temp_named_file.writelines(lines)
+
+    def close(self):
+        self.temp_named_file.flush()
+        git_pager = execute(['git', 'var', 'GIT_PAGER'])
+        subprocess.check_call([git_pager, self.temp_named_file.name])
+        self.temp_named_file.close()
+
+
+class ToStdOut:
+    def writelines(self, lines):
+        self.system_stdout_stream.writelines(lines)
+
+    def close(self):
+        self.system_stdout_stream.flush()
+
+    def __init__(self, system_stdout_stream):
+        self.system_stdout_stream = system_stdout_stream
 
 
 if __name__ == '__main__':
-    git = cmd.Git(CLICK_HOUSE_REPO_HOME)
-    git_pager = git.execute(['git', 'var', 'GIT_PAGER'])
-    working_language = os.path.join(CLICK_HOUSE_REPO_HOME, 'docs', sys.argv[1])
+    arguments = SCRIPT_COMMAND_PARSER.parse_args()
+    if arguments.help or not arguments.path:
+        sys.stdout.write(SCRIPT_DESCRIPTION)
+        sys.exit(0)
 
-    reference_language = findLanguageDoc(working_language)
-    with NamedTemporaryFile(mode='r+') as temp_diff:
-        if not os.path.isdir(reference_language):
-            diffFile(reference_language, working_language, git, temp_diff)
-        else:
-            diffDirectory(reference_language, working_language, git, temp_diff)
-
-        temp_diff.flush()
-        subprocess.check_call([git_pager, temp_diff.name])
+    working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path)
+    with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer:
+        exit(diff_directory(find_language_doc(working_language), working_language, writer))

From 281c81cab7974ecd35e1657acb9a0890ae0e65c2 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Tue, 29 Oct 2019 14:26:54 +0800
Subject: [PATCH 075/222] fix sync zh get_started

---
 .../example_datasets/ontime.md                | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/docs/zh/getting_started/example_datasets/ontime.md b/docs/zh/getting_started/example_datasets/ontime.md
index 152f2d5d808..ec4053490a5 100644
--- a/docs/zh/getting_started/example_datasets/ontime.md
+++ b/docs/zh/getting_started/example_datasets/ontime.md
@@ -1,13 +1,13 @@
 
 # 航班飞行数据
 
-+This dataset can be obtained in two ways:
-+
-+- import from raw data
-+- download of prepared partitions
-+
-+## Import From Raw Data
-+
+航班飞行数据有以下两个方式获取：
+
+- 从原始数据导入
+- 下载预处理好的分区数据
+
+## 从原始数据导入
+
 下载数据：
 
 ```bash
@@ -144,19 +144,19 @@ CREATE TABLE `ontime` (
 $ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
 ```
 
-+## Download of Prepared Partitions
-+
-+```bash
-+$ curl -O https://clickhouse-datasets.s3.yandex.net/ontime/partitions/ontime.tar
-+$ tar xvf ontime.tar -C /var/lib/clickhouse # path to ClickHouse data directory
-+$ # check permissions of unpacked data, fix if required
-+$ sudo service clickhouse-server restart
-+$ clickhouse-client --query "select count(*) from datasets.ontime"
- ```
-+!!!info
-+    If you will run queries described below, you have to use full table name,
-+    `datasets.ontime`.
-+
+## 下载预处理好的分区数据
+
+```bash
+$ curl -O https://clickhouse-datasets.s3.yandex.net/ontime/partitions/ontime.tar
+$ tar xvf ontime.tar -C /var/lib/clickhouse # path to ClickHouse data directory
+$ # check permissions of unpacked data, fix if required
+$ sudo service clickhouse-server restart
+$ clickhouse-client --query "select count(*) from datasets.ontime"
+```
+!!!info
+    如果要运行下面的SQL查询，必须使用完整的表名，
+    `datasets.ontime`。
+
 ## 查询：
 
 Q0.

From 7b3e34a79a204d82b997fc21d6c250640cbc71e9 Mon Sep 17 00:00:00 2001
From: liu-bov <liu-bov@yandex-team.ru>
Date: Thu, 24 Oct 2019 17:59:00 +0300
Subject: [PATCH 076/222] docs(repeat): Added repeat function description

---
 .../functions/string_functions.md             | 36 +++++++++++++++++++
 .../functions/string_functions.md             | 36 +++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md
index 02a8e1d64aa..d40caf36209 100644
--- a/docs/en/query_language/functions/string_functions.md
+++ b/docs/en/query_language/functions/string_functions.md
@@ -85,6 +85,42 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
 └───────────────────────┘
 ```
 
+## repeat {#repeat}
+
+Repeats a string as many times as specified and concatenates the replicated values as a single string.
+
+**Syntax**
+
+```sql
+repeat(s, n)
+```
+
+**Parameters**
+
+- `s` — The string to repeat. [String](../../data_types/string.md).
+- `n` — The number of times to repeat the string. [UInt](../../data_types/int_uint.md).
+
+**Returned value**
+
+The single string, which contains the string  `s` repeated `n` times. If `n` < 1, the function returns empty string.
+
+Type: `String`.
+
+**Example**
+
+Query:
+
+```sql
+SELECT repeat('abc', 10)
+```
+
+Result:
+
+```text
+┌─repeat('abc', 10)──────────────┐
+│ abcabcabcabcabcabcabcabcabcabc │
+└────────────────────────────────┘
+```
 
 ## reverse
 
diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md
index 193da6f2753..5c0314dc6b7 100644
--- a/docs/ru/query_language/functions/string_functions.md
+++ b/docs/ru/query_language/functions/string_functions.md
@@ -66,6 +66,42 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
 └───────────────────────┘
 ```
 
+## repeat {#repeat}
+
+Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку.
+
+**Синтаксис**
+
+```sql
+repeat(s, n)
+```
+
+**Параметры**
+
+- `s` — Строка для повторения. [String](../../data_types/string.md).
+- `n` — Количество повторов. [UInt](../../data_types/int_uint.md).
+
+**Возвращаемое значение**
+
+Строка, состоящая из повторений `n` раз исходной строки `s`. Если `n` < 1, то функция вернет пустую строку.
+
+Тип: `String`.
+
+**Пример**
+
+Запрос:
+
+```sql
+SELECT repeat('abc', 10)
+```
+
+Ответ:
+
+```text
+┌─repeat('abc', 10)──────────────┐
+│ abcabcabcabcabcabcabcabcabcabc │
+└────────────────────────────────┘
+```
 
 ## reverse
 Разворачивает строку (как последовательность байт).

From 2c75a51d4f97da536f0bbf3373936a5bb8c71a95 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Tue, 29 Oct 2019 01:27:43 +0800
Subject: [PATCH 077/222] Instant count() for MergeTree

Use (Replicated)MergeTree's metadata to do trivial count()
---
 dbms/src/Core/Settings.h                      |   1 +
 .../Interpreters/InterpreterSelectQuery.cpp   |  62 +++++++++
 dbms/src/Interpreters/SyntaxAnalyzer.cpp      |   1 +
 dbms/src/Interpreters/SyntaxAnalyzer.h        |   2 +
 dbms/src/Storages/IStorage.h                  |   7 +
 dbms/src/Storages/MergeTree/MergeTreeData.cpp |  20 ++-
 dbms/src/Storages/MergeTree/MergeTreeData.h   |   8 +-
 dbms/src/Storages/StorageMergeTree.cpp        |   5 +
 dbms/src/Storages/StorageMergeTree.h          |   2 +
 .../Storages/StorageReplicatedMergeTree.cpp   | 121 +++++++++++-------
 .../src/Storages/StorageReplicatedMergeTree.h |  12 +-
 ...read_distribution_and_max_rows_to_read.sql |   4 +-
 12 files changed, 188 insertions(+), 57 deletions(-)

diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 9361b909590..fb3021275be 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -380,6 +380,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.") \
     \
     M(SettingBool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.") \
+    M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.") \
     \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index dc7331f7031..22d17ca8a52 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -26,6 +26,7 @@
 #include <DataStreams/ReverseBlockInputStream.h>
 #include <DataStreams/FillingBlockInputStream.h>
 #include <DataStreams/SquashingBlockInputStream.h>
+#include <DataStreams/OneBlockInputStream.h>
 
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
@@ -65,6 +66,7 @@
 #include <Common/checkStackSize.h>
 #include <Parsers/queryToString.h>
 #include <ext/map.h>
+#include <ext/scope_guard.h>
 #include <memory>
 
 #include <Processors/Sources/NullSource.h>
@@ -90,6 +92,7 @@
 #include <Processors/Transforms/FinishSortingTransform.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataStreams/materializeBlock.h>
+#include <IO/MemoryReadWriteBuffer.h>
 
 
 namespace DB
@@ -1273,6 +1276,65 @@ void InterpreterSelectQuery::executeFetchColumns(
     auto & query = getSelectQuery();
     const Settings & settings = context.getSettingsRef();
 
+    /// Optimization for trivial query like SELECT count() FROM table.
+    auto check_trivial_count_query = [&]() -> std::optional<AggregateDescription>
+    {
+        if (!settings.optimize_trivial_count_query || !syntax_analyzer_result->maybe_optimize_trivial_count || !storage
+            || query.sample_size() || query.sample_offset() || query.final() || query.prewhere() || query.where()
+            || !query_analyzer->hasAggregation() || processing_stage != QueryProcessingStage::FetchColumns)
+            return {};
+
+        Names key_names;
+        AggregateDescriptions aggregates;
+        query_analyzer->getAggregateInfo(key_names, aggregates);
+
+        if (aggregates.size() != 1)
+            return {};
+
+        const AggregateDescription & desc = aggregates[0];
+        if (typeid_cast<AggregateFunctionCount *>(desc.function.get()))
+            return desc;
+
+        return {};
+    };
+
+    if (auto desc = check_trivial_count_query())
+    {
+        auto func = desc->function;
+        std::optional<UInt64> num_rows = storage->totalRows();
+        if (num_rows)
+        {
+            AggregateFunctionCount & agg_count = static_cast<AggregateFunctionCount &>(*func);
+
+            /// We will process it up to "WithMergeableState".
+            std::vector<char> state(agg_count.sizeOfData());
+            AggregateDataPtr place = state.data();
+
+            agg_count.create(place);
+            SCOPE_EXIT(agg_count.destroy(place));
+
+            MemoryWriteBuffer out;
+            writeVarUInt(*num_rows, out);
+            auto in = out.tryGetReadBuffer();
+            agg_count.deserialize(place, *in, nullptr);
+
+            auto column = ColumnAggregateFunction::create(func);
+            column->insertFrom(place);
+
+            Block block_with_count{
+                {std::move(column), std::make_shared<DataTypeAggregateFunction>(func, DataTypes(), Array()), desc->column_name}};
+
+            auto istream = std::make_shared<OneBlockInputStream>(block_with_count);
+            if constexpr (pipeline_with_processors)
+                pipeline.init({std::make_shared<SourceFromInputStream>(istream)});
+            else
+                pipeline.streams.emplace_back(istream);
+            from_stage = QueryProcessingStage::WithMergeableState;
+            analysis_result.first_stage = false;
+            return;
+        }
+    }
+
     /// Actions to calculate ALIAS if required.
     ExpressionActionsPtr alias_actions;
 
diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
index 67a1b3ea7db..935ac67808a 100644
--- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
@@ -727,6 +727,7 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA
     /// You need to read at least one column to find the number of rows.
     if (select_query && required.empty())
     {
+        maybe_optimize_trivial_count = true;
         /// We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
         /// Because it is the column that is cheapest to read.
         struct ColumnSizeTuple
diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h
index 96f5678ac6f..0f1709df2fc 100644
--- a/dbms/src/Interpreters/SyntaxAnalyzer.h
+++ b/dbms/src/Interpreters/SyntaxAnalyzer.h
@@ -48,6 +48,8 @@ struct SyntaxAnalyzerResult
     /// Results of scalar sub queries
     Scalars scalars;
 
+    bool maybe_optimize_trivial_count = false;
+
     void collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns);
     Names requiredSourceColumns() const { return required_source_columns.getNames(); }
     const Scalars & getScalars() const { return scalars; }
diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index b4d6b2c3085..74126bc123b 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -406,6 +406,13 @@ public:
     /// Returns storage policy if storage supports it
     virtual DiskSpace::StoragePolicyPtr getStoragePolicy() const { return {}; }
 
+    /** If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it.
+     */
+    virtual std::optional<UInt64> totalRows() const
+    {
+        return {};
+    }
+
 private:
     /// You always need to take the next three locks in this order.
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index c2475395101..b7408634e3f 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2374,6 +2374,20 @@ size_t MergeTreeData::getTotalActiveSizeInBytes() const
 }
 
 
+size_t MergeTreeData::getTotalActiveSizeInRows() const
+{
+    size_t res = 0;
+    {
+        auto lock = lockParts();
+
+        for (auto & part : getDataPartsStateRange(DataPartState::Committed))
+            res += part->rows_count;
+    }
+
+    return res;
+}
+
+
 size_t MergeTreeData::getPartsCount() const
 {
     auto lock = lockParts();
@@ -2486,7 +2500,7 @@ void MergeTreeData::throwInsertIfNeeded() const
 }
 
 MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(
-    const MergeTreePartInfo & part_info, MergeTreeData::DataPartState state, DataPartsLock & /*lock*/)
+    const MergeTreePartInfo & part_info, MergeTreeData::DataPartState state, DataPartsLock & /*lock*/) const
 {
     auto current_state_parts_range = getDataPartsStateRange(state);
 
@@ -2534,13 +2548,13 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy)
 }
 
 
-MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const MergeTreePartInfo & part_info)
+MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const MergeTreePartInfo & part_info) const
 {
     auto lock = lockParts();
     return getActiveContainingPart(part_info, DataPartState::Committed, lock);
 }
 
-MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name)
+MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name) const
 {
     auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version);
     return getActiveContainingPart(part_info);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h
index a8bd661fafa..64ed0199522 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.h
@@ -435,9 +435,9 @@ public:
     DataPartsVector getDataPartsVector() const;
 
     /// Returns a committed part with the given name or a part containing it. If there is no such part, returns nullptr.
-    DataPartPtr getActiveContainingPart(const String & part_name);
-    DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info);
-    DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info, DataPartState state, DataPartsLock & lock);
+    DataPartPtr getActiveContainingPart(const String & part_name) const;
+    DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info) const;
+    DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info, DataPartState state, DataPartsLock & lock) const;
 
     /// Swap part with it's identical copy (possible with another path on another disk).
     /// If original part is not active or doesn't exist exception will be thrown.
@@ -453,6 +453,8 @@ public:
     /// Total size of active parts in bytes.
     size_t getTotalActiveSizeInBytes() const;
 
+    size_t getTotalActiveSizeInRows() const;
+
     size_t getPartsCount() const;
     size_t getMaxPartsCountForPartition() const;
 
diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp
index c752109e328..fce5f038f8c 100644
--- a/dbms/src/Storages/StorageMergeTree.cpp
+++ b/dbms/src/Storages/StorageMergeTree.cpp
@@ -134,6 +134,11 @@ BlockInputStreams StorageMergeTree::read(
     return reader.read(column_names, query_info, context, max_block_size, num_streams);
 }
 
+std::optional<UInt64> StorageMergeTree::totalRows() const
+{
+    return getTotalActiveSizeInRows();
+}
+
 BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const Context & context)
 {
     return std::make_shared<MergeTreeBlockOutputStream>(*this, context.getSettingsRef().max_partitions_per_insert_block);
diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h
index 15080cfcbf8..a6cc2800c9e 100644
--- a/dbms/src/Storages/StorageMergeTree.h
+++ b/dbms/src/Storages/StorageMergeTree.h
@@ -45,6 +45,8 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
+    std::optional<UInt64> totalRows() const override;
+
     BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override;
 
     /** Perform the next step in combining the parts.
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index c55378d8526..fc6e8ab2676 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -171,13 +171,13 @@ void StorageReplicatedMergeTree::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
     current_zookeeper = zookeeper;
 }
 
-zkutil::ZooKeeperPtr StorageReplicatedMergeTree::tryGetZooKeeper()
+zkutil::ZooKeeperPtr StorageReplicatedMergeTree::tryGetZooKeeper() const
 {
     std::lock_guard lock(current_zookeeper_mutex);
     return current_zookeeper;
 }
 
-zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeper()
+zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeper() const
 {
     auto res = tryGetZooKeeper();
     if (!res)
@@ -2920,6 +2920,58 @@ StorageReplicatedMergeTree::~StorageReplicatedMergeTree()
 }
 
 
+ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMergeTree::getMaxAddedBlocks() const
+{
+    ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock max_added_blocks;
+
+    for (const auto & data_part : getDataParts())
+    {
+        max_added_blocks[data_part->info.partition_id]
+            = std::max(max_added_blocks[data_part->info.partition_id], data_part->info.max_block);
+    }
+
+    auto zookeeper = getZooKeeper();
+
+    const String quorum_status_path = zookeeper_path + "/quorum/status";
+
+    String value;
+    Coordination::Stat stat;
+
+    if (zookeeper->tryGet(quorum_status_path, value, &stat))
+    {
+        ReplicatedMergeTreeQuorumEntry quorum_entry;
+        quorum_entry.fromString(value);
+
+        auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name, format_version);
+
+        max_added_blocks[part_info.partition_id] = part_info.max_block - 1;
+    }
+
+    String added_parts_str;
+    if (zookeeper->tryGet(zookeeper_path + "/quorum/last_part", added_parts_str))
+    {
+        if (!added_parts_str.empty())
+        {
+            ReplicatedMergeTreeQuorumAddedParts part_with_quorum(format_version);
+            part_with_quorum.fromString(added_parts_str);
+
+            auto added_parts = part_with_quorum.added_parts;
+
+            for (const auto & added_part : added_parts)
+                if (!getActiveContainingPart(added_part.second))
+                    throw Exception(
+                        "Replica doesn't have part " + added_part.second
+                            + " which was successfully written to quorum of other replicas."
+                              " Send query to another replica or disable 'select_sequential_consistency' setting.",
+                        ErrorCodes::REPLICA_IS_NOT_IN_QUORUM);
+
+            for (const auto & max_block : part_with_quorum.getMaxInsertedBlocks())
+                max_added_blocks[max_block.first] = max_block.second;
+        }
+    }
+    return max_added_blocks;
+}
+
 BlockInputStreams StorageReplicatedMergeTree::read(
     const Names & column_names,
     const SelectQueryInfo & query_info,
@@ -2937,50 +2989,7 @@ BlockInputStreams StorageReplicatedMergeTree::read(
     */
     if (settings_.select_sequential_consistency)
     {
-        ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock max_added_blocks;
-
-        for (const auto & data_part : getDataParts())
-        {
-            max_added_blocks[data_part->info.partition_id] = std::max(max_added_blocks[data_part->info.partition_id], data_part->info.max_block);
-        }
-
-        auto zookeeper = getZooKeeper();
-
-        const String quorum_status_path = zookeeper_path + "/quorum/status";
-
-        String value;
-        Coordination::Stat stat;
-
-        if (zookeeper->tryGet(quorum_status_path, value, &stat))
-        {
-            ReplicatedMergeTreeQuorumEntry quorum_entry;
-            quorum_entry.fromString(value);
-
-            auto part_info = MergeTreePartInfo::fromPartName(quorum_entry.part_name, format_version);
-
-            max_added_blocks[part_info.partition_id] = part_info.max_block - 1;
-        }
-
-        String added_parts_str;
-        if (zookeeper->tryGet(zookeeper_path + "/quorum/last_part", added_parts_str))
-        {
-            if (!added_parts_str.empty())
-            {
-                ReplicatedMergeTreeQuorumAddedParts part_with_quorum(format_version);
-                part_with_quorum.fromString(added_parts_str);
-
-                auto added_parts = part_with_quorum.added_parts;
-
-                for (const auto & added_part : added_parts)
-                    if (!getActiveContainingPart(added_part.second))
-                        throw Exception("Replica doesn't have part " + added_part.second + " which was successfully written to quorum of other replicas."
-                            " Send query to another replica or disable 'select_sequential_consistency' setting.", ErrorCodes::REPLICA_IS_NOT_IN_QUORUM);
-
-                for (const auto & max_block : part_with_quorum.getMaxInsertedBlocks())
-                        max_added_blocks[max_block.first] = max_block.second;
-            }
-        }
-
+        auto max_added_blocks = getMaxAddedBlocks();
         return reader.read(column_names, query_info, context, max_block_size, num_streams, &max_added_blocks);
     }
 
@@ -2988,6 +2997,26 @@ BlockInputStreams StorageReplicatedMergeTree::read(
 }
 
 
+std::optional<UInt64> StorageReplicatedMergeTree::totalRows() const
+{
+    size_t res = 0;
+    auto max_added_blocks = getMaxAddedBlocks();
+    auto lock = lockParts();
+    for (auto & part : getDataPartsStateRange(DataPartState::Committed))
+    {
+        if (part->isEmpty())
+            continue;
+
+        auto blocks_iterator = max_added_blocks.find(part->info.partition_id);
+        if (blocks_iterator == max_added_blocks.end() || part->info.max_block > blocks_iterator->second)
+            continue;
+
+        res += part->rows_count;
+    }
+    return res;
+}
+
+
 void StorageReplicatedMergeTree::assertNotReadonly() const
 {
     if (is_readonly)
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h
index facdb1660f0..5eebd030dc8 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.h
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.h
@@ -96,6 +96,8 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
+    std::optional<UInt64> totalRows() const override;
+
     BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override;
 
     bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) override;
@@ -174,6 +176,10 @@ public:
     bool canUseAdaptiveGranularity() const override;
 
 private:
+
+    /// Get a sequential consistent view of current parts.
+    ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const;
+
     /// Delete old parts from disk and from ZooKeeper.
     void clearOldPartsAndRemoveFromZK();
 
@@ -191,10 +197,10 @@ private:
     using LogEntryPtr = LogEntry::Ptr;
 
     zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
-    std::mutex current_zookeeper_mutex;            /// To recreate the session in the background thread.
+    mutable std::mutex current_zookeeper_mutex;    /// To recreate the session in the background thread.
 
-    zkutil::ZooKeeperPtr tryGetZooKeeper();
-    zkutil::ZooKeeperPtr getZooKeeper();
+    zkutil::ZooKeeperPtr tryGetZooKeeper() const;
+    zkutil::ZooKeeperPtr getZooKeeper() const;
     void setZooKeeper(zkutil::ZooKeeperPtr zookeeper);
 
     /// If true, the table is offline and can not be written to it.
diff --git a/dbms/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql b/dbms/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql
index 37d09a3d3dd..b3d9612b39f 100644
--- a/dbms/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql
+++ b/dbms/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql
@@ -14,9 +14,9 @@ SELECT count() FROM merge_tree;
 SET max_rows_to_read = 900000;
 
 SET merge_tree_uniform_read_distribution = 1;
-SELECT count() FROM merge_tree; -- { serverError 158 }
+SELECT count() FROM merge_tree WHERE not ignore(); -- { serverError 158 }
 
 SET merge_tree_uniform_read_distribution = 0;
-SELECT count() FROM merge_tree; -- { serverError 158 }
+SELECT count() FROM merge_tree WHERE not ignore(); -- { serverError 158 }
 
 DROP TABLE merge_tree;

From cde07dd62097d4269a252dfc73538fe0d2c0925c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 29 Oct 2019 13:27:19 +0300
Subject: [PATCH 078/222] Disable context replacement for internal queries

---
 dbms/src/Interpreters/ThreadStatusExt.cpp | 7 +++----
 dbms/src/Interpreters/executeQuery.cpp    | 9 +++++++--
 dbms/src/Interpreters/executeQuery.h      | 4 ++--
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/dbms/src/Interpreters/ThreadStatusExt.cpp b/dbms/src/Interpreters/ThreadStatusExt.cpp
index 1407d0d2073..8c578422d6e 100644
--- a/dbms/src/Interpreters/ThreadStatusExt.cpp
+++ b/dbms/src/Interpreters/ThreadStatusExt.cpp
@@ -104,12 +104,11 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool
     }
 
     if (query_context)
+    {
         query_id = query_context->getCurrentQueryId();
 
 #if defined(__linux__)
-    /// Set "nice" value if required.
-    if (query_context)
-    {
+        /// Set "nice" value if required.
         Int32 new_os_thread_priority = query_context->getSettingsRef().os_thread_priority;
         if (new_os_thread_priority && hasLinuxCapability(CAP_SYS_NICE))
         {
@@ -120,8 +119,8 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool
 
             os_thread_priority = new_os_thread_priority;
         }
-    }
 #endif
+    }
 
     initPerformanceCounters();
     initQueryProfiler();
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index edd052469d8..2407d936c5d 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -192,8 +192,13 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 {
     time_t current_time = time(nullptr);
 
-    context.makeQueryContext();
-    CurrentThread::attachQueryContext(context);
+    /// If we already executing query and it requires to execute internal query, than
+    /// don't replace thread context with given (it can be temporary). Otherwise, attach context to thread.
+    if (!internal)
+    {
+        context.makeQueryContext();
+        CurrentThread::attachQueryContext(context);
+    }
 
     const Settings & settings = context.getSettingsRef();
 
diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h
index d2b0504f685..3cff461f6d6 100644
--- a/dbms/src/Interpreters/executeQuery.h
+++ b/dbms/src/Interpreters/executeQuery.h
@@ -21,7 +21,7 @@ void executeQuery(
     Context & context,                  /// DB, tables, data types, storage engines, functions, aggregate functions...
     std::function<void(const String &)> set_content_type, /// If non-empty callback is passed, it will be called with the Content-Type of the result.
     std::function<void(const String &)> set_query_id /// If non-empty callback is passed, it will be called with the query id.
-    );
+);
 
 
 /// More low-level function for server-to-server interaction.
@@ -45,7 +45,7 @@ BlockIO executeQuery(
     QueryProcessingStage::Enum stage = QueryProcessingStage::Complete,    /// To which stage the query must be executed.
     bool may_have_embedded_data = false, /// If insert query may have embedded data
     bool allow_processors = true /// If can use processors pipeline
-    );
+);
 
 
 QueryPipeline executeQueryWithProcessors(

From 6f2cce37d7506e7c1ddd9a911e158c08922e0989 Mon Sep 17 00:00:00 2001
From: sundy-li <543950155@qq.com>
Date: Tue, 29 Oct 2019 19:22:00 +0800
Subject: [PATCH 079/222] fix move

---
 dbms/src/Functions/FunctionsCoding.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index 986521e56ca..ac7d9244512 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -1331,7 +1331,7 @@ public:
         for (size_t idx = 0; idx < arguments.size(); ++idx)
         {
             //partial const column 
-            columns_holder[idx] = std::move(block.getByPosition(arguments[idx]).column->convertToFullColumnIfConst());
+            columns_holder[idx] = block.getByPosition(arguments[idx]).column->convertToFullColumnIfConst();
             const IColumn * column = columns_holder[idx].get();
 
             if (!(executeNumber<UInt8>(*column, out_vec, idx, rows, size_per_row)

From d25279cf5c1e3fb43ae4b6dc0e90e61846977047 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 29 Oct 2019 14:51:25 +0300
Subject: [PATCH 080/222] Update security changelog

---
 docs/en/security_changelog.md | 7 +++++++
 docs/ru/security_changelog.md | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/docs/en/security_changelog.md b/docs/en/security_changelog.md
index 83eb55237fc..22f7f729ac1 100644
--- a/docs/en/security_changelog.md
+++ b/docs/en/security_changelog.md
@@ -1,3 +1,10 @@
+## Fixed in ClickHouse Release 19.13.6.1, 2019-09-20
+
+### CVE-2019-
+Table function `url` had the vulnerability allowed the attacker to inject arbitrary HTTP headers in the request.
+
+Credits: [Nikita Tikhomirov](https://github.com/NSTikhomirov)
+
 ## Fixed in ClickHouse Release 18.12.13, 2018-09-10
 
 ### CVE-2018-14672
diff --git a/docs/ru/security_changelog.md b/docs/ru/security_changelog.md
index 77408e7ba30..8118681f2fe 100644
--- a/docs/ru/security_changelog.md
+++ b/docs/ru/security_changelog.md
@@ -1,3 +1,10 @@
+## Исправлено в релизе 19.13.6.1 от 20 сентября 2019
+
+### CVE-2019-
+Уязвимость в табличной функции `url` позволяла злоумышленнику добавлять произвольные HTTP-заголовки в запрос.
+
+Обнаружено благодаря: [Никите Тихомирову](https://github.com/NSTikhomirov)
+
 ## Исправлено в релизе 18.12.13 от 10 сентября 2018
 
 ### CVE-2018-14672

From 9f8c8ab481ce1cde5382801a9c4730a6ef910eb4 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Tue, 29 Oct 2019 15:20:14 +0300
Subject: [PATCH 081/222] CLICKHOUSEDOCS-420: EN review. RU translation.

---
 docs/en/interfaces/third-party/gui.md | 13 ++++++-------
 docs/ru/interfaces/third-party/gui.md | 12 ++++++++++++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md
index 3ed0d8924e2..a98c112a1c0 100644
--- a/docs/en/interfaces/third-party/gui.md
+++ b/docs/en/interfaces/third-party/gui.md
@@ -50,16 +50,15 @@ Features:
 
 ### Redash
 
-[Redash](https://github.com/getredash/redash) is an insights platform.
+[Redash](https://github.com/getredash/redash) is a platform for data visualization.
+
+Supports for multiple data sources including ClickHouse, Redash can join results of queries from different data sources into one final dataset.
 
 Features:
 
-- Connect with multiple sources (including ClickHouse).
-- Autocompletion query editor.
-- Schema/Database explorer.
-- Data plotting.
-- Create snippets for elements you frequently use.
-- Use query results as data sources to join different databases.
+- Powerful editor of queries.
+- Database explorer.
+- Visualization tools, that allow you to represent data in different forms.
 
 ### DBeaver
 
diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md
index 3280f1f2472..9c3a1f6d936 100644
--- a/docs/ru/interfaces/third-party/gui.md
+++ b/docs/ru/interfaces/third-party/gui.md
@@ -51,6 +51,18 @@
 - Предварительный просмотр таблицы с фильтрацией и сортировкой;
 - Выполнение запросов только для чтения.
 
+### Redash
+
+[Redash](https://github.com/getredash/redash) — платформа для отображения данных.
+
+Поддерживает множество источников данных, включая ClickHouse. Redash может объединять результаты запросов из разных источников в финальный набор данных.
+
+Основные возможности:
+
+- Мощный редактор запросов.
+- Проводник по базе данных.
+- Инструменты визуализации, позволяющие представить данные в различных формах.
+
 ### DBeaver
 
 [DBeaver](https://dbeaver.io/) - универсальный desktop клиент баз данных с поддержкой ClickHouse.

From 12b57aedf0a2277e6c2abb7236651f0cf9677229 Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Tue, 29 Oct 2019 16:52:05 +0300
Subject: [PATCH 082/222] remove error prone code

---
 dbms/src/Interpreters/AnalyzedJoin.cpp | 15 ---------------
 dbms/src/Interpreters/AnalyzedJoin.h   |  2 +-
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp
index b2e967b213c..a11ea893f54 100644
--- a/dbms/src/Interpreters/AnalyzedJoin.cpp
+++ b/dbms/src/Interpreters/AnalyzedJoin.cpp
@@ -204,21 +204,6 @@ void AnalyzedJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) c
 
         bool make_nullable = join_use_nulls && left_or_full_join;
 
-        if (!make_nullable)
-        {
-            /// Keys from right table are usually not stored in Join, but copied from the left one.
-            /// So, if left key is nullable, let's make right key nullable too.
-            /// Note: for some join types it's not needed and, probably, may be removed.
-            /// Note: changing this code, take into account the implementation in Join.cpp.
-            auto it = std::find(key_names_right.begin(), key_names_right.end(), col.name);
-            if (it != key_names_right.end())
-            {
-                auto pos = it - key_names_right.begin();
-                const auto & left_key_name = key_names_left[pos];
-                make_nullable = sample_block.getByName(left_key_name).type->isNullable();
-            }
-        }
-
         if (make_nullable && res_type->canBeInsideNullable())
             res_type = makeNullable(res_type);
 
diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h
index c979b50d3a3..f0b6a8d434c 100644
--- a/dbms/src/Interpreters/AnalyzedJoin.h
+++ b/dbms/src/Interpreters/AnalyzedJoin.h
@@ -94,7 +94,7 @@ public:
     void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast);
 
     bool hasUsing() const { return table_join.using_expression_list != nullptr; }
-    bool hasOn() const { return !hasUsing(); }
+    bool hasOn() const { return table_join.on_expression != nullptr; }
 
     NameSet getQualifiedColumnsSet() const;
     NameSet getOriginalColumnsSet() const;

From 1eb44c6837dec560984d619795277135594941f9 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Tue, 29 Oct 2019 22:26:43 +0800
Subject: [PATCH 083/222] Add missing tests

---
 .../queries/0_stateless/00612_count.reference  | 14 ++++++++++++++
 dbms/tests/queries/0_stateless/00612_count.sql | 18 ++++++++++++++++++
 .../0_stateless/00612_shard_count.reference    |  2 ++
 .../queries/0_stateless/00612_shard_count.sql  |  9 +++++++++
 4 files changed, 43 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00612_count.reference
 create mode 100644 dbms/tests/queries/0_stateless/00612_count.sql
 create mode 100644 dbms/tests/queries/0_stateless/00612_shard_count.reference
 create mode 100644 dbms/tests/queries/0_stateless/00612_shard_count.sql

diff --git a/dbms/tests/queries/0_stateless/00612_count.reference b/dbms/tests/queries/0_stateless/00612_count.reference
new file mode 100644
index 00000000000..2e519138d9f
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00612_count.reference
@@ -0,0 +1,14 @@
+1234567
+2469134
+2469134
+1234567
+
+1234567
+1234567
+1234567
+1234567
+1234567
+1234567
+1234567
+1234568
+1234567
diff --git a/dbms/tests/queries/0_stateless/00612_count.sql b/dbms/tests/queries/0_stateless/00612_count.sql
new file mode 100644
index 00000000000..e701fa1cc73
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00612_count.sql
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS test.count;
+
+CREATE TABLE test.count (x UInt64) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO test.count SELECT * FROM numbers(1234567);
+
+SELECT count() FROM test.count;
+SELECT count() * 2 FROM test.count;
+SELECT count() FROM (SELECT * FROM test.count UNION ALL SELECT * FROM test.count);
+SELECT count() FROM test.count WITH TOTALS;
+SELECT arrayJoin([count(), count()]) FROM test.count;
+SELECT arrayJoin([count(), count()]) FROM test.count LIMIT 1;
+SELECT arrayJoin([count(), count()]) FROM test.count LIMIT 1, 1;
+SELECT arrayJoin([count(), count()]) AS x FROM test.count LIMIT 1 BY x;
+SELECT arrayJoin([count(), count() + 1]) AS x FROM test.count LIMIT 1 BY x;
+SELECT count() FROM test.count HAVING count() = 1234567;
+SELECT count() FROM test.count HAVING count() != 1234567;
+
+DROP TABLE test.count;
diff --git a/dbms/tests/queries/0_stateless/00612_shard_count.reference b/dbms/tests/queries/0_stateless/00612_shard_count.reference
new file mode 100644
index 00000000000..094f191d24b
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00612_shard_count.reference
@@ -0,0 +1,2 @@
+2469134
+1234567
diff --git a/dbms/tests/queries/0_stateless/00612_shard_count.sql b/dbms/tests/queries/0_stateless/00612_shard_count.sql
new file mode 100644
index 00000000000..b59ba029351
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00612_shard_count.sql
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS test.count;
+
+CREATE TABLE test.count (x UInt64) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO test.count SELECT * FROM numbers(1234567);
+
+SELECT count() FROM remote('127.0.0.{1,2}', test.count);
+SELECT count() / 2 FROM remote('127.0.0.{1,2}', test.count);
+
+DROP TABLE test.count;

From ff4d90f6021667e789e87107da514a3b33f90600 Mon Sep 17 00:00:00 2001
From: Olga Khvostikova <insubconsciousness@gmail.com>
Date: Tue, 29 Oct 2019 19:49:37 +0300
Subject: [PATCH 084/222] Add test for greatCircleDist

---
 dbms/tests/performance/grear_circle_dist.xml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 dbms/tests/performance/grear_circle_dist.xml

diff --git a/dbms/tests/performance/grear_circle_dist.xml b/dbms/tests/performance/grear_circle_dist.xml
new file mode 100644
index 00000000000..bb26605bd89
--- /dev/null
+++ b/dbms/tests/performance/grear_circle_dist.xml
@@ -0,0 +1,15 @@
+<test>
+    <type>once</type>
+
+    <stop_conditions>
+        <any_of>
+            <average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
+            <total_time_ms>10000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <!-- lon [-180; 180], lat [-90; 90] -->
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance((rand() % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand()) / 4294967296 - 180, (rand() % 180) * 1. - 90))</query>
+    <!-- 55.755830, 37.617780 is center of Moscow -->
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance(55. + toFloat64(rand()) / 4294967296, 37. + toFloat64(rand()) / 4294967296, 55. + toFloat64(rand()) / 4294967296, 37. + toFloat64(rand()) / 4294967296))</query>
+</test>

From 4d27e8aa5bc5e2db6973952177de8611ab09c46d Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Tue, 29 Oct 2019 20:33:31 +0300
Subject: [PATCH 085/222] Unpack toolchains into mounted volume (#7534)

---
 docker/packager/binary/Dockerfile | 4 ----
 docker/packager/binary/build.sh   | 6 ++++++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index c15de71e848..9018afb1153 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -71,14 +71,10 @@ RUN cd cctools-port/cctools && ./configure --prefix=/cctools --with-libtapi=/cct
 RUN rm -rf cctools-port
 
 # Download toolchain for Darwin
-RUN mkdir -p /build/cmake/toolchain/darwin-x86_64
 RUN wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
-RUN tar xJf MacOSX10.14.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
 
 # Download toolchain for ARM
-RUN mkdir -p /build/cmake/toolchain/linux-aarch64
 RUN wget "https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en" -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz
-RUN tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C /build/cmake/toolchain/linux-aarch64 --strip-components=1
 
 COPY build.sh /
 CMD ["/bin/bash", "/build.sh"]
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index b5c50763b17..115ec06b787 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -2,6 +2,12 @@
 
 set -x -e
 
+mkdir -p build/cmake/toolchain/darwin-x86_64
+tar xJf MacOSX10.14.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
+
+mkdir -p build/cmake/toolchain/linux-aarch64
+tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1
+
 mkdir -p build/build_docker
 cd build/build_docker
 ccache --show-stats ||:

From deb2406b306cf4e93d6886446c8f3fc2d7cc8533 Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Tue, 29 Oct 2019 22:39:42 +0300
Subject: [PATCH 086/222] fix JOIN right table keys nullability

---
 dbms/src/Interpreters/Join.cpp                | 66 ++++++++++++++-----
 dbms/src/Interpreters/join_common.cpp         | 16 +++++
 dbms/src/Interpreters/join_common.h           |  1 +
 .../00853_join_with_nulls_crash.reference     | 16 ++---
 4 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 3267e6a779b..0f068343da9 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -37,11 +37,52 @@ namespace ErrorCodes
 }
 
 
-/// Converts column to nullable if needed. No backward convertion.
+static ColumnPtr filterWithBlanks(ColumnPtr src_column, const IColumn::Filter & filter, bool inverse_filter = false)
+{
+    ColumnPtr column = src_column->convertToFullColumnIfConst();
+    MutableColumnPtr mut_column = column->cloneEmpty();
+    mut_column->reserve(column->size());
+
+    if (inverse_filter)
+    {
+        for (size_t row = 0; row < filter.size(); ++row)
+        {
+            if (filter[row])
+                mut_column->insertDefault();
+            else
+                mut_column->insertFrom(*column, row);
+        }
+    }
+    else
+    {
+        for (size_t row = 0; row < filter.size(); ++row)
+        {
+            if (filter[row])
+                mut_column->insertFrom(*column, row);
+            else
+                mut_column->insertDefault();
+        }
+    }
+
+    return mut_column;
+}
+
 static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable)
 {
     if (nullable)
+    {
         JoinCommon::convertColumnToNullable(column);
+    }
+    else
+    {
+        /// We have to replace values masked by NULLs with defaults.
+        if (column.column)
+            if (auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column.column))
+                column.column = filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true);
+
+        JoinCommon::removeColumnNullability(column);
+    }
+
     return std::move(column);
 }
 
@@ -57,6 +98,9 @@ static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column,
             column.column = std::move(mutable_column);
         }
     }
+    else
+        JoinCommon::removeColumnNullability(column);
+
     return std::move(column);
 }
 
@@ -769,7 +813,7 @@ void Join::joinBlockImpl(
         for (size_t i = 0; i < existing_columns; ++i)
             block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(row_filter, -1);
 
-        /// Add join key columns from right block if they has different name.
+        /// Add join key columns from right block if needed.
         for (size_t i = 0; i < right_table_keys.columns(); ++i)
         {
             const auto & right_key = right_table_keys.getByPosition(i);
@@ -791,7 +835,7 @@ void Join::joinBlockImpl(
         null_map_filter.getData().swap(row_filter);
         const IColumn::Filter & filter = null_map_filter.getData();
 
-        /// Add join key columns from right block if they has different name.
+        /// Add join key columns from right block if needed.
         for (size_t i = 0; i < right_table_keys.columns(); ++i)
         {
             const auto & right_key = right_table_keys.getByPosition(i);
@@ -800,20 +844,10 @@ void Join::joinBlockImpl(
             if (required_right_keys.count(right_key.name) && !block.has(right_key.name))
             {
                 const auto & col = block.getByName(left_name);
-                ColumnPtr column = col.column->convertToFullColumnIfConst();
-                MutableColumnPtr mut_column = column->cloneEmpty();
-                mut_column->reserve(column->size());
-
-                for (size_t row = 0; row < filter.size(); ++row)
-                {
-                    if (filter[row])
-                        mut_column->insertFrom(*column, row);
-                    else
-                        mut_column->insertDefault();
-                }
-
                 bool is_nullable = nullable_right_side || right_key.type->isNullable();
-                block.insert(correctNullability({std::move(mut_column), col.type, right_key.name}, is_nullable, null_map_filter));
+
+                ColumnPtr thin_column = filterWithBlanks(col.column, filter);
+                block.insert(correctNullability({thin_column, col.type, right_key.name}, is_nullable, null_map_filter));
 
                 if constexpr (is_all_join)
                     right_keys_to_replicate.push_back(block.getPositionByName(right_key.name));
diff --git a/dbms/src/Interpreters/join_common.cpp b/dbms/src/Interpreters/join_common.cpp
index dad4fbc7301..852867f7775 100644
--- a/dbms/src/Interpreters/join_common.cpp
+++ b/dbms/src/Interpreters/join_common.cpp
@@ -32,6 +32,22 @@ void convertColumnsToNullable(Block & block, size_t starting_pos)
         convertColumnToNullable(block.getByPosition(i));
 }
 
+/// @warning It assumes that every NULL has default value in nested column (or it does not matter)
+void removeColumnNullability(ColumnWithTypeAndName & column)
+{
+    if (!column.type->isNullable())
+        return;
+
+    column.type = static_cast<const DataTypeNullable &>(*column.type).getNestedType();
+    if (column.column)
+    {
+        auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column.column);
+        ColumnPtr nested_column = nullable_column->getNestedColumnPtr();
+        MutableColumnPtr mutable_column = (*std::move(nested_column)).mutate();
+        column.column = std::move(mutable_column);
+    }
+}
+
 ColumnRawPtrs temporaryMaterializeColumns(const Block & block, const Names & names, Columns & materialized)
 {
     ColumnRawPtrs ptrs;
diff --git a/dbms/src/Interpreters/join_common.h b/dbms/src/Interpreters/join_common.h
index 8567ea68f01..85c24515b41 100644
--- a/dbms/src/Interpreters/join_common.h
+++ b/dbms/src/Interpreters/join_common.h
@@ -15,6 +15,7 @@ namespace JoinCommon
 
 void convertColumnToNullable(ColumnWithTypeAndName & column);
 void convertColumnsToNullable(Block & block, size_t starting_pos = 0);
+void removeColumnNullability(ColumnWithTypeAndName & column);
 ColumnRawPtrs temporaryMaterializeColumns(const Block & block, const Names & names, Columns & materialized);
 void removeLowCardinalityInplace(Block & block);
 
diff --git a/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference
index 4eafec8a6c9..459b73acdbf 100644
--- a/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference
+++ b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference
@@ -1,11 +1,11 @@
-\N	test	0	1	Nullable(String)	Nullable(String)
-bar	bar	1	2	Nullable(String)	Nullable(String)
-\N		0	1	Nullable(String)	Nullable(String)
-foo	\N	2	0	Nullable(String)	Nullable(String)
-\N	test	0	1	Nullable(String)	Nullable(String)
-bar	bar	1	2	Nullable(String)	Nullable(String)
-\N		0	1	Nullable(String)	Nullable(String)
-foo	\N	2	0	Nullable(String)	Nullable(String)
+\N	test	0	1	Nullable(String)	String
+bar	bar	1	2	Nullable(String)	String
+\N		0	1	Nullable(String)	String
+foo		2	0	Nullable(String)	String
+\N	test	0	1	Nullable(String)	String
+bar	bar	1	2	Nullable(String)	String
+\N		0	1	Nullable(String)	String
+foo		2	0	Nullable(String)	String
 foo	\N	2	0	String	Nullable(String)
 bar	bar	1	2	String	Nullable(String)
 	test	0	1	String	Nullable(String)

From 4f2f5cca8479b4902744aefaaee9885fb0fe427b Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 30 Oct 2019 10:01:53 +0300
Subject: [PATCH 087/222] Add support for cross-compiling to the CPU
 architecture AARCH64 (#7370)

---
 .gitignore                                    |  3 +
 CMakeLists.txt                                |  6 +-
 cmake/arch.cmake                              |  1 +
 cmake/darwin/default_libs.cmake               |  8 +++
 cmake/darwin/sdk.cmake                        | 11 ---
 cmake/darwin/toolchain-x86_64.cmake           | 13 ++++
 cmake/linux/default_libs.cmake                |  2 +-
 cmake/linux/toolchain-aarch64.cmake           | 20 ++++++
 cmake/target.cmake                            | 67 +++----------------
 cmake/toolchain/darwin-x86_64/README.txt      |  2 +
 cmake/toolchain/linux-aarch64/README.txt      |  2 +
 cmake/tools.cmake                             | 41 ++++++++++++
 dbms/src/Common/StackTrace.cpp                |  2 +-
 dbms/src/Common/tests/int_hashes_perf.cpp     |  6 +-
 dbms/src/IO/tests/parse_int_perf.cpp          |  2 +-
 .../Interpreters/tests/hash_map_string.cpp    |  4 +-
 docker/packager/packager                      | 16 ++++-
 17 files changed, 124 insertions(+), 82 deletions(-)
 delete mode 100644 cmake/darwin/sdk.cmake
 create mode 100644 cmake/darwin/toolchain-x86_64.cmake
 create mode 100644 cmake/linux/toolchain-aarch64.cmake
 create mode 100644 cmake/toolchain/darwin-x86_64/README.txt
 create mode 100644 cmake/toolchain/linux-aarch64/README.txt
 create mode 100644 cmake/tools.cmake

diff --git a/.gitignore b/.gitignore
index 817e333d833..4ec07227e66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -245,3 +245,6 @@ website/package-lock.json
 /.ccls-cache
 
 /compile_commands.json
+
+# Toolchains
+/cmake/toolchain/*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 53021dbe666..986096ba9e8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,10 @@ foreach(policy
 endforeach()
 
 project(ClickHouse)
+
+include (cmake/arch.cmake)
 include (cmake/target.cmake)
+include (cmake/tools.cmake)
 
 # Ignore export() since we don't use it,
 # but it gets broken with a global targets via link_libraries()
@@ -26,8 +29,6 @@ set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so
 set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE)
 set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.")    # To be consistent with CMakeLists from contrib libs.
 
-include (cmake/arch.cmake)
-
 option(ENABLE_IPO "Enable inter-procedural optimization (aka LTO)" OFF) # need cmake 3.9+
 if(ENABLE_IPO)
     cmake_policy(SET CMP0069 NEW)
@@ -230,7 +231,6 @@ include(cmake/dbms_glob_sources.cmake)
 if (OS_LINUX)
     include(cmake/linux/default_libs.cmake)
 elseif (OS_DARWIN)
-    include(cmake/darwin/sdk.cmake)
     include(cmake/darwin/default_libs.cmake)
 endif ()
 
diff --git a/cmake/arch.cmake b/cmake/arch.cmake
index f339236af64..79fe92c03e5 100644
--- a/cmake/arch.cmake
+++ b/cmake/arch.cmake
@@ -17,6 +17,7 @@ endif ()
 
 if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)")
     set (ARCH_PPC64LE 1)
+    # FIXME: move this check into tools.cmake
     if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
         message(FATAL_ERROR "Only gcc-8 is supported for powerpc architecture")
     endif ()
diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake
index e684a10a08f..679fef8808a 100644
--- a/cmake/darwin/default_libs.cmake
+++ b/cmake/darwin/default_libs.cmake
@@ -11,6 +11,14 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
 set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
 set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
 
+# Minimal supported SDK version
+
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.14")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.14")
+
+set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.14")
+set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.14")
+
 # Global libraries
 
 add_library(global-libs INTERFACE)
diff --git a/cmake/darwin/sdk.cmake b/cmake/darwin/sdk.cmake
deleted file mode 100644
index 92a9f8f66e5..00000000000
--- a/cmake/darwin/sdk.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-option (SDK_PATH "Path to the SDK to build with" "")
-
-if (NOT EXISTS "${SDK_PATH}/SDKSettings.plist")
-    message (FATAL_ERROR "Wrong SDK path provided: ${SDK_PATH}")
-endif ()
-
-set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
-
-set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
-set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
diff --git a/cmake/darwin/toolchain-x86_64.cmake b/cmake/darwin/toolchain-x86_64.cmake
new file mode 100644
index 00000000000..9128311e3bb
--- /dev/null
+++ b/cmake/darwin/toolchain-x86_64.cmake
@@ -0,0 +1,13 @@
+set (CMAKE_SYSTEM_NAME "Darwin")
+set (CMAKE_SYSTEM_PROCESSOR "x86_64")
+set (CMAKE_C_COMPILER_TARGET "x86_64-apple-darwin")
+set (CMAKE_CXX_COMPILER_TARGET "x86_64-apple-darwin")
+set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-x86_64")
+
+set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake
+
+set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
+set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
+
+set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
+set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index d1f4730c97e..ef1354628fe 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -5,7 +5,7 @@ set (DEFAULT_LIBS "-nodefaultlibs")
 
 # We need builtins from Clang's RT even without libcxx - for ubsan+int128.
 # See https://bugs.llvm.org/show_bug.cgi?id=16404
-if (COMPILER_CLANG)
+if (COMPILER_CLANG AND NOT (CMAKE_CROSSCOMPILING AND ARCH_AARCH64))
     execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
 else ()
     set (BUILTINS_LIBRARY "-lgcc")
diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake
new file mode 100644
index 00000000000..e93f0f47faf
--- /dev/null
+++ b/cmake/linux/toolchain-aarch64.cmake
@@ -0,0 +1,20 @@
+set (CMAKE_SYSTEM_NAME "Linux")
+set (CMAKE_SYSTEM_PROCESSOR "aarch64")
+set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
+set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
+set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
+
+# We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
+set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar")
+
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
+
+set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
+set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
+
+set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
+set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
+
+set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
+set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/target.cmake b/cmake/target.cmake
index 5ce425b82cd..3c6aa225af9 100644
--- a/cmake/target.cmake
+++ b/cmake/target.cmake
@@ -9,62 +9,8 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
     add_definitions(-D OS_DARWIN)
 endif ()
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    set (COMPILER_GCC 1)
-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set (COMPILER_CLANG 1)
-endif ()
-
-if (COMPILER_GCC)
-    # Require minimum version of gcc
-    set (GCC_MINIMUM_VERSION 8)
-    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION} AND NOT CMAKE_VERSION VERSION_LESS 2.8.9)
-        message (FATAL_ERROR "GCC version must be at least ${GCC_MINIMUM_VERSION}. For example, if GCC ${GCC_MINIMUM_VERSION} is available under gcc-${GCC_MINIMUM_VERSION}, g++-${GCC_MINIMUM_VERSION} names, do the following: export CC=gcc-${GCC_MINIMUM_VERSION} CXX=g++-${GCC_MINIMUM_VERSION}; rm -rf CMakeCache.txt CMakeFiles; and re run cmake or ./release.")
-    endif ()
-elseif (COMPILER_CLANG)
-    # Require minimum version of clang
-    set (CLANG_MINIMUM_VERSION 7)
-    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
-        message (FATAL_ERROR "Clang version must be at least ${CLANG_MINIMUM_VERSION}.")
-    endif ()
-else ()
-    message (WARNING "You are using an unsupported compiler. Compilation has only been tested with Clang 6+ and GCC 7+.")
-endif ()
-
-string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER})
-
-if (OS_LINUX)
-    find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld")
-    find_program (GOLD_PATH NAMES "ld.gold" "gold")
-endif()
-
-option (LINKER_NAME "Linker name or full path")
-if (NOT LINKER_NAME)
-    if (COMPILER_CLANG AND LLD_PATH)
-        set (LINKER_NAME "lld")
-    elseif (GOLD_PATH)
-        set (LINKER_NAME "gold")
-    endif ()
-endif ()
-
-if (LINKER_NAME)
-    message(STATUS "Using linker: ${LINKER_NAME} (selected from: LLD_PATH=${LLD_PATH}; GOLD_PATH=${GOLD_PATH}; COMPILER_POSTFIX=${COMPILER_POSTFIX})")
-    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
-    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
-endif ()
-
 if (CMAKE_CROSSCOMPILING)
     if (OS_DARWIN)
-        set (CMAKE_SYSTEM_PROCESSOR x86_64)
-        set (CMAKE_C_COMPILER_TARGET x86_64-apple-darwin)
-        set (CMAKE_CXX_COMPILER_TARGET x86_64-apple-darwin)
-
-        set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-        set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
-        set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
-        set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
-
         # FIXME: broken dependencies
         set (USE_SNAPPY OFF CACHE INTERNAL "")
         set (ENABLE_SSL OFF CACHE INTERNAL "")
@@ -73,12 +19,19 @@ if (CMAKE_CROSSCOMPILING)
         set (ENABLE_READLINE OFF CACHE INTERNAL "")
         set (ENABLE_ICU OFF CACHE INTERNAL "")
         set (ENABLE_FASTOPS OFF CACHE INTERNAL "")
-
-        message (STATUS "Cross-compiling for Darwin")
+    elseif (OS_LINUX)
+        if (ARCH_AARCH64)
+            # FIXME: broken dependencies
+            set (ENABLE_PROTOBUF OFF CACHE INTERNAL "")
+            set (ENABLE_PARQUET OFF CACHE INTERNAL "")
+            set (ENABLE_MYSQL OFF CACHE INTERNAL "")
+        endif ()
     else ()
-        message (FATAL_ERROR "Trying to cross-compile to unsupported target: ${CMAKE_SYSTEM_NAME}!")
+        message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
     endif ()
 
     # Don't know why but CXX_STANDARD doesn't work for cross-compilation
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
+
+    message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
 endif ()
diff --git a/cmake/toolchain/darwin-x86_64/README.txt b/cmake/toolchain/darwin-x86_64/README.txt
new file mode 100644
index 00000000000..4ece240f029
--- /dev/null
+++ b/cmake/toolchain/darwin-x86_64/README.txt
@@ -0,0 +1,2 @@
+wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
+tar --strip-components=1 xJf MacOSX10.14.sdk.tar.xz
diff --git a/cmake/toolchain/linux-aarch64/README.txt b/cmake/toolchain/linux-aarch64/README.txt
new file mode 100644
index 00000000000..7146c1d2b9a
--- /dev/null
+++ b/cmake/toolchain/linux-aarch64/README.txt
@@ -0,0 +1,2 @@
+wget https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz
+tar --strip-components=1 xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz
diff --git a/cmake/tools.cmake b/cmake/tools.cmake
new file mode 100644
index 00000000000..04e0946ee73
--- /dev/null
+++ b/cmake/tools.cmake
@@ -0,0 +1,41 @@
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    set (COMPILER_GCC 1)
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set (COMPILER_CLANG 1)
+endif ()
+
+if (COMPILER_GCC)
+    # Require minimum version of gcc
+    set (GCC_MINIMUM_VERSION 8)
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION} AND NOT CMAKE_VERSION VERSION_LESS 2.8.9)
+        message (FATAL_ERROR "GCC version must be at least ${GCC_MINIMUM_VERSION}. For example, if GCC ${GCC_MINIMUM_VERSION} is available under gcc-${GCC_MINIMUM_VERSION}, g++-${GCC_MINIMUM_VERSION} names, do the following: export CC=gcc-${GCC_MINIMUM_VERSION} CXX=g++-${GCC_MINIMUM_VERSION}; rm -rf CMakeCache.txt CMakeFiles; and re run cmake or ./release.")
+    endif ()
+elseif (COMPILER_CLANG)
+    # Require minimum version of clang
+    set (CLANG_MINIMUM_VERSION 7)
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
+        message (FATAL_ERROR "Clang version must be at least ${CLANG_MINIMUM_VERSION}.")
+    endif ()
+else ()
+    message (WARNING "You are using an unsupported compiler. Compilation has only been tested with Clang 6+ and GCC 7+.")
+endif ()
+
+option (LINKER_NAME "Linker name or full path")
+
+find_program (LLD_PATH NAMES "ld.lld" "lld")
+find_program (GOLD_PATH NAMES "ld.gold" "gold")
+
+if (NOT LINKER_NAME)
+    if (LLD_PATH)
+        set (LINKER_NAME "lld")
+    elseif (GOLD_PATH)
+        set (LINKER_NAME "gold")
+    endif ()
+endif ()
+
+if (LINKER_NAME)
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
+    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
+
+    message(STATUS "Using custom linker by name: ${LINKER_NAME}")
+endif ()
diff --git a/dbms/src/Common/StackTrace.cpp b/dbms/src/Common/StackTrace.cpp
index 072bb55cf57..4ab0847ac18 100644
--- a/dbms/src/Common/StackTrace.cpp
+++ b/dbms/src/Common/StackTrace.cpp
@@ -30,7 +30,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext
             else
                 error << "Address: " << info.si_addr;
 
-#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__)
+#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__)
             auto err_mask = context.uc_mcontext.gregs[REG_ERR];
             if ((err_mask & 0x02))
                 error << " Access: write.";
diff --git a/dbms/src/Common/tests/int_hashes_perf.cpp b/dbms/src/Common/tests/int_hashes_perf.cpp
index d5545b1b865..24f3f36a7da 100644
--- a/dbms/src/Common/tests/int_hashes_perf.cpp
+++ b/dbms/src/Common/tests/int_hashes_perf.cpp
@@ -31,7 +31,7 @@ void setAffinity()
 
 static inline ALWAYS_INLINE UInt64 rdtsc()
 {
-#if __x86_64__
+#if defined(__x86_64__)
     UInt32 a, d;
     __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
     return static_cast<UInt64>(a) | (static_cast<UInt64>(d) << 32);
@@ -109,7 +109,7 @@ static inline size_t murmurMix(UInt64 x)
 }
 
 
-#if __x86_64__
+#if defined(__x86_64__)
 static inline size_t crc32Hash(UInt64 x)
 {
     UInt64 crc = -1ULL;
@@ -309,7 +309,7 @@ int main(int argc, char ** argv)
     if (!method || method == 8) test<mulShift>  (n, data.data(), "7: mulShift");
     if (!method || method == 9) test<tabulation>(n, data.data(), "8: tabulation");
 
-#if __x86_64__
+#if defined(__x86_64__)
     if (!method || method == 10) test<crc32Hash> (n, data.data(), "9: crc32");
 #endif
 
diff --git a/dbms/src/IO/tests/parse_int_perf.cpp b/dbms/src/IO/tests/parse_int_perf.cpp
index 02a7a0e5cdb..4e24d89f100 100644
--- a/dbms/src/IO/tests/parse_int_perf.cpp
+++ b/dbms/src/IO/tests/parse_int_perf.cpp
@@ -17,7 +17,7 @@
 
 UInt64 rdtsc()
 {
-#if __x86_64__
+#if defined(__x86_64__)
     UInt64 val;
     __asm__ __volatile__("rdtsc" : "=A" (val) :);
     return val;
diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp
index 61980a614ab..f8335453a96 100644
--- a/dbms/src/Interpreters/tests/hash_map_string.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string.cpp
@@ -131,7 +131,7 @@ struct FastHash64
 };
 
 
-#if __x86_64__
+#if defined(__x86_64__)
 struct CrapWow
 {
     size_t operator() (CompactStringRef x) const
@@ -381,7 +381,7 @@ int main(int argc, char ** argv)
             << std::endl;
     }
 
-#if __x86_64__
+#if defined(__x86_64__)
     if (!m || m == 3)
     {
         Stopwatch watch;
diff --git a/docker/packager/packager b/docker/packager/packager
index a29702d5cce..8dd4f108edb 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -107,6 +107,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, di
     result = []
     cmake_flags = ['$CMAKE_FLAGS', '-DADD_GDB_INDEX_FOR_GOLD=1']
 
+    is_clang = "clang" in compiler
+    is_cross_compile = ("darwin" in compiler) or ("aarch64" in compiler)
+
+    # Explicitly use LLD with Clang by default.
+    # Don't force linker for cross-compilation.
+    if is_clang and not is_cross_compile:
+        cmake_flags = ['$CMAKE_FLAGS', '-DLINKER_NAME=lld']
+
     if compiler.endswith("-darwin"):
         cc = compiler[:-len("-darwin")]
     else:
@@ -124,9 +132,11 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, di
     if "darwin" in compiler:
         cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar")
         cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
-        cmake_flags.append("-DCMAKE_SYSTEM_NAME=Darwin")
-        cmake_flags.append("-DSDK_PATH=/cctools/MacOSX10.14.sdk")
         cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
+        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
+
+    if "aarch64" in compiler:
+        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
 
     if sanitizer:
         result.append("SANITIZER={}".format(sanitizer))
@@ -181,7 +191,7 @@ if __name__ == "__main__":
     parser.add_argument("--clickhouse-repo-path", default="../../")
     parser.add_argument("--output-dir", required=True)
     parser.add_argument("--build-type", choices=("debug", ""), default="")
-    parser.add_argument("--compiler", choices=("clang-6.0", "clang-7", "gcc-7", "clang-8", "clang-8-darwin", "gcc-8", "gcc-9"), default="gcc-7")
+    parser.add_argument("--compiler", choices=("clang-6.0", "clang-7", "gcc-7", "clang-8", "clang-8-darwin", "clang-8-aarch64", "gcc-8", "gcc-9"), default="gcc-7")
     parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
     parser.add_argument("--unbundled", action="store_true")
     parser.add_argument("--split-binary", action="store_true")

From e2bceb4b017b86b8d48c68daea5a584f70224633 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Wed, 30 Oct 2019 15:46:22 +0300
Subject: [PATCH 088/222] CLICKHOUSEDOCS-443: Fixes in translation and markup.

---
 docs/en/operations/server_settings/settings.md       |  6 +++---
 docs/en/query_language/alter.md                      | 12 +++++-------
 docs/ru/operations/server_settings/settings.md       |  8 ++++----
 docs/ru/query_language/alter.md                      | 10 ++++------
 .../dicts/external_dicts_dict_layout.md              |  2 +-
 5 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md
index 56151911f50..9e0fbb3d1aa 100644
--- a/docs/en/operations/server_settings/settings.md
+++ b/docs/en/operations/server_settings/settings.md
@@ -368,12 +368,12 @@ For more information, see the section "[Creating replicated tables](../../operat
 
 ## mark_cache_size {#server-mark-cache-size}
 
-Approximate size (in bytes) of the cache of "marks" used by [MergeTree](../../operations/table_engines/mergetree.md).
+Approximate size (in bytes) of the cache of marks used by table engines of the [MergeTree](../../operations/table_engines/mergetree.md) family.
 
 The cache is shared for the server and memory is allocated as needed. The cache size must be at least 5368709120.
 
-!!! note IMPORTANT
-    This parameter could be exceeded by user's setting [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime).
+!!! warning "Warning"
+    This parameter could be exceeded by the [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime) setting.
 
 **Example**
 
diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md
index f275a908fd9..a8d4588d858 100644
--- a/docs/en/query_language/alter.md
+++ b/docs/en/query_language/alter.md
@@ -291,11 +291,11 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr]
 
 This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once.
 
-Note that for old-styled tables you can specify the prefix of the partition name (for example, '2019') - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
-
 !!! note
     The entire backup process is performed without stopping the server.
 
+Note that for old-styled tables you can specify the prefix of the partition name (for example, '2019') - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
+
 At the time of execution, for a data snapshot, the query creates hardlinks to a table data. Hardlinks are placed in the directory `/var/lib/clickhouse/shadow/N/...`, where:
 
 - `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config.
@@ -387,9 +387,7 @@ Possible values: `0` – do not wait; `1` – only wait for own execution (defau
 
 ### Mutations {#alter-mutations}
 
-Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table.
-
-Currently `*MergeTree` table engines are supported (both replicated and unreplicated).
+Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. Supported for the `MergeTree` family of table engines including the engines with replication support.
 
 Existing tables are ready for mutations as-is (no conversion necessary), but after the first mutation is applied to a table, its metadata format becomes incompatible with previous server versions and falling back to a previous version becomes impossible.
 
@@ -399,13 +397,13 @@ Currently available commands:
 ALTER TABLE [db.]table DELETE WHERE filter_expr
 ```
 
-The `filter_expr` must be of type UInt8. The query deletes rows in the table for which this expression takes a non-zero value.
+The `filter_expr` must be of type `UInt8`. The query deletes rows in the table for which this expression takes a non-zero value.
 
 ```sql
 ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
 ```
 
-The `filter_expr` must be of type UInt8. This query updates values of specified columns to the values of corresponding expressions in rows for which the `filter_expr` takes a non-zero value. Values are casted to the column type using the `CAST` operator. Updating columns that are used in the calculation of the primary or the partition key is not supported.
+The `filter_expr` must be of type `UInt8`. This query updates values of specified columns to the values of corresponding expressions in rows for which the `filter_expr` takes a non-zero value. Values are casted to the column type using the `CAST` operator. Updating columns that are used in the calculation of the primary or the partition key is not supported.
 
 ```sql
 ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name
diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md
index f95b0809650..64b5377804f 100644
--- a/docs/ru/operations/server_settings/settings.md
+++ b/docs/ru/operations/server_settings/settings.md
@@ -368,12 +368,12 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat
 
 ## mark_cache_size {#server-mark-cache-size}
 
-Приблизительный размер (в байтах) кеша "засечек", используемых движками таблиц семейства [MergeTree](../../operations/table_engines/mergetree.md).
+Приблизительный размер (в байтах) кэша засечек, используемых движками таблиц семейства [MergeTree](../../operations/table_engines/mergetree.md).
 
-Кеш общий для сервера, память выделяется по мере необходимости. Кеш не может быть меньше, чем 5368709120.
+Кэш общий для сервера, память выделяется по мере необходимости. Кэш не может быть меньше, чем 5368709120.
 
-!!! note ВАЖНО
-    Этот параметр может быть превышен при большом значении настройки пользователя [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime).
+!!! warning "Внимание"
+    Этот параметр может быть превышен при большом значении настройки [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime).
 
 **Пример**
 
diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md
index d8a5d88d899..bb0f6530b63 100644
--- a/docs/ru/query_language/alter.md
+++ b/docs/ru/query_language/alter.md
@@ -300,7 +300,7 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr]
 
 Создаёт резервную копию для заданной партиции. Если выражение `PARTITION` опущено, резервные копии будут созданы для всех партиций.
 
-!!! note
+!!! note "Примечание"
     Создание резервной копии не требует остановки сервера.
 
 Для таблиц старого стиля имя партиций можно задавать в виде префикса (например, '2019'). В этом случае резервные копии будут созданы для всех соответствующих партиций. Подробнее о том, как корректно задать имя партиции, см. в разделе [Как задавать имя партиции в запросах ALTER](#alter-how-to-specify-part-expr).
@@ -410,9 +410,7 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
 
 ### Мутации {#alter-mutations}
 
-Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов `DELETE` и `UPDATE`, рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице.
-
-Функциональность находится в состоянии beta и доступна начиная с версии 1.1.54388. Реализована поддержка \*MergeTree таблиц (с репликацией и без).
+Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов `DELETE` и `UPDATE`, рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства `MergeTree`, в том числе для движков с репликацией.
 
 Конвертировать существующие таблицы для работы с мутациями не нужно. Но после применения первой мутации формат данных таблицы становится несовместимым с предыдущими версиями и откатиться на предыдущую версию уже не получится.
 
@@ -422,13 +420,13 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
 ALTER TABLE [db.]table DELETE WHERE filter_expr
 ```
 
-Выражение `filter_expr` должно иметь тип UInt8. Запрос удаляет строки таблицы, для которых это выражение принимает ненулевое значение.
+Выражение `filter_expr` должно иметь тип `UInt8`. Запрос удаляет строки таблицы, для которых это выражение принимает ненулевое значение.
 
 ```sql
 ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
 ```
 
-Команда доступна начиная с версии 18.12.14. Выражение `filter_expr` должно иметь тип UInt8. Запрос изменяет значение указанных столбцов на вычисленное значение соответствующих выражений в каждой строке, для которой `filter_expr` принимает ненулевое значение. Вычисленные значения преобразуются к типу столбца с помощью оператора `CAST`. Изменение столбцов, которые используются при вычислении первичного ключа или ключа партиционирования, не поддерживается.
+Выражение `filter_expr` должно иметь тип `UInt8`. Запрос изменяет значение указанных столбцов на вычисленное значение соответствующих выражений в каждой строке, для которой `filter_expr` принимает ненулевое значение. Вычисленные значения преобразуются к типу столбца с помощью оператора `CAST`. Изменение столбцов, которые используются при вычислении первичного ключа или ключа партиционирования, не поддерживается.
 
 ```sql
 ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name
diff --git a/docs/ru/query_language/dicts/external_dicts_dict_layout.md b/docs/ru/query_language/dicts/external_dicts_dict_layout.md
index e038a5c0610..7d6919347ab 100644
--- a/docs/ru/query_language/dicts/external_dicts_dict_layout.md
+++ b/docs/ru/query_language/dicts/external_dicts_dict_layout.md
@@ -47,7 +47,7 @@
 
 ### flat
 
-Словарь полностью хранится в оперативной памяти в виде плоских массивов. Объем памяти, занимаемой словарем? пропорционален размеру самого большого (по размеру) ключа.
+Словарь полностью хранится в оперативной памяти в виде плоских массивов. Объем памяти, занимаемой словарём пропорционален размеру самого большого по размеру ключа.
 
 Ключ словаря имеет тип `UInt64` и его величина ограничена 500 000. Если при создании словаря обнаружен ключ больше, то ClickHouse бросает исключение и не создает словарь.
 

From 4dca99fa6580e27a7ee81644be38d8648c4fdc3e Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Wed, 30 Oct 2019 21:06:23 +0800
Subject: [PATCH 089/222] Fix create as select parsing

---
 dbms/src/Parsers/ParserCreateQuery.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp
index 094d29628f7..b0ca9c399ec 100644
--- a/dbms/src/Parsers/ParserCreateQuery.cpp
+++ b/dbms/src/Parsers/ParserCreateQuery.cpp
@@ -436,9 +436,9 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
         if (!s_as.ignore(pos, expected))
             return false;
 
-        if (!table_function_p.parse(pos, as_table_function, expected))
+        if (!select_p.parse(pos, select, expected)) /// AS SELECT ...
         {
-            if (!select_p.parse(pos, select, expected)) /// AS SELECT ...
+            if (!table_function_p.parse(pos, as_table_function, expected))
             {
                 /// AS [db.]table
                 if (!name_p.parse(pos, as_table, expected))
@@ -686,7 +686,6 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     ParserIdentifier name_p;
     ParserTablePropertiesDeclarationList table_properties_p;
     ParserSelectWithUnionQuery select_p;
-    ParserFunction table_function_p;
     ParserNameList names_p;
 
     ASTPtr database;

From 453ff2c0710b1200b9de9b3eba14089039c86440 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 30 Oct 2019 17:36:27 +0300
Subject: [PATCH 090/222] Refactor packager (#7539)

---
 docker/packager/packager | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index 8dd4f108edb..9cdba69b3ea 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -104,22 +104,37 @@ def run_vagrant_box_with_env(image_path, output_dir, ch_root):
         vagrant.copy_from_image("~/ClickHouse/dbms/programs/clickhouse", output_dir)
 
 def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, distcc_hosts, unbundled, split_binary, version, author, official, alien_pkgs, with_coverage):
+    CLANG_PREFIX = "clang"
+    DARWIN_SUFFIX = "-darwin"
+    ARM_SUFFIX = "-aarch64"
+
     result = []
     cmake_flags = ['$CMAKE_FLAGS', '-DADD_GDB_INDEX_FOR_GOLD=1']
 
-    is_clang = "clang" in compiler
-    is_cross_compile = ("darwin" in compiler) or ("aarch64" in compiler)
+    is_clang = compiler.startswith(CLANG_PREFIX)
+    is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
+    is_cross_arm = compiler.endswith(ARM_SUFFIX)
+    is_cross_compile = is_cross_darwin or is_cross_arm
 
     # Explicitly use LLD with Clang by default.
     # Don't force linker for cross-compilation.
     if is_clang and not is_cross_compile:
-        cmake_flags = ['$CMAKE_FLAGS', '-DLINKER_NAME=lld']
+        cmake_flags.append("-DLINKER_NAME=lld")
 
-    if compiler.endswith("-darwin"):
-        cc = compiler[:-len("-darwin")]
+    if is_cross_darwin:
+        cc = compiler[:-len(DARWIN_SUFFIX)]
+        cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar")
+        cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
+        cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
+        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
+    elif is_cross_arm:
+        cc = compiler[:-len(ARM_SUFFIX)]
+        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
     else:
         cc = compiler
+
     cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')
+
     if package_type == "deb":
         result.append("DEB_CC={}".format(cc))
         result.append("DEB_CXX={}".format(cxx))
@@ -129,15 +144,6 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, di
         cmake_flags.append('-DCMAKE_C_COMPILER=`which {}`'.format(cc))
         cmake_flags.append('-DCMAKE_CXX_COMPILER=`which {}`'.format(cxx))
 
-    if "darwin" in compiler:
-        cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar")
-        cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
-        cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
-        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
-
-    if "aarch64" in compiler:
-        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
-
     if sanitizer:
         result.append("SANITIZER={}".format(sanitizer))
     if build_type:

From 2b5ef8bdef1675fb80ddf9ab20c467365c52d48e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 30 Oct 2019 17:50:59 +0300
Subject: [PATCH 091/222] Fix bug when table contains only tuple columns

---
 dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index b84380442b2..8406d323e02 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -43,6 +43,16 @@ static ReadBufferFromFile openForReading(const String & path)
     return ReadBufferFromFile(path, std::min(static_cast<Poco::File::FileSize>(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize()));
 }
 
+static String getFileNameForColumn(const NameAndTypePair & column)
+{
+    String filename;
+    column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) {
+        if (filename.empty())
+            filename = IDataType::getFileNameForStream(column.name, substream_path);
+    });
+    return filename;
+}
+
 void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const String & part_path)
 {
     size_t minmax_idx_size = data.minmax_idx_column_types.size();
@@ -577,8 +587,9 @@ void MergeTreeDataPart::loadIndexGranularity()
     if (columns.empty())
         throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
 
+
     /// We can use any column, it doesn't matter
-    std::string marks_file_path = index_granularity_info.getMarksFilePath(full_path + escapeForFileName(columns.front().name));
+    std::string marks_file_path = index_granularity_info.getMarksFilePath(full_path + getFileNameForColumn(columns.front()));
     if (!Poco::File(marks_file_path).exists())
         throw Exception("Marks file '" + marks_file_path + "' doesn't exist", ErrorCodes::NO_FILE_IN_DATA_PART);
 
@@ -808,7 +819,7 @@ void MergeTreeDataPart::loadColumns(bool require)
 
         /// If there is no file with a list of columns, write it down.
         for (const NameAndTypePair & column : storage.getColumns().getAllPhysical())
-            if (Poco::File(getFullPath() + escapeForFileName(column.name) + ".bin").exists())
+            if (Poco::File(getFullPath() + getFileNameForColumn(column) + ".bin").exists())
                 columns.push_back(column);
 
         if (columns.empty())

From 7f4e982d1325f802cd9d2d59dbeb74afe4768c94 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 30 Oct 2019 17:51:30 +0300
Subject: [PATCH 092/222] Add tests

---
 .../01021_only_tuple_columns.reference        |  3 +++
 .../0_stateless/01021_only_tuple_columns.sql  | 22 +++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01021_only_tuple_columns.reference
 create mode 100644 dbms/tests/queries/0_stateless/01021_only_tuple_columns.sql

diff --git a/dbms/tests/queries/0_stateless/01021_only_tuple_columns.reference b/dbms/tests/queries/0_stateless/01021_only_tuple_columns.reference
new file mode 100644
index 00000000000..f6fe9f075e7
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01021_only_tuple_columns.reference
@@ -0,0 +1,3 @@
+1000000
+1000000
+1000000
diff --git a/dbms/tests/queries/0_stateless/01021_only_tuple_columns.sql b/dbms/tests/queries/0_stateless/01021_only_tuple_columns.sql
new file mode 100644
index 00000000000..460bdae3cf1
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01021_only_tuple_columns.sql
@@ -0,0 +1,22 @@
+CREATE TABLE test
+(
+    `x` Tuple(UInt64, UInt64)
+)
+ENGINE = MergeTree
+ORDER BY x;
+
+INSERT INTO test SELECT (number, number) FROM numbers(1000000);
+
+SELECT COUNT() FROM test;
+
+ALTER TABLE test DETACH PARTITION tuple();
+
+ALTER TABLE test ATTACH PARTITION tuple();
+
+SELECT COUNT() FROM test;
+
+DETACH TABLE test;
+
+ATTACH TABLE test;
+
+SELECT COUNT() FROM test;

From e2424bda4c87ed0d210a407996736741c5b75516 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 30 Oct 2019 19:00:40 +0300
Subject: [PATCH 093/222] Fix style

---
 dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
index 8406d323e02..14e0191d6d4 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
@@ -46,7 +46,8 @@ static ReadBufferFromFile openForReading(const String & path)
 static String getFileNameForColumn(const NameAndTypePair & column)
 {
     String filename;
-    column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) {
+    column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path)
+    {
         if (filename.empty())
             filename = IDataType::getFileNameForStream(column.name, substream_path);
     });

From a06c8a1deccda6bf93d16a2703b0299b9c67890f Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Wed, 30 Oct 2019 19:47:53 +0300
Subject: [PATCH 094/222] CLICKHOUSEDOCS-443: fixes.

---
 docs/en/query_language/functions/date_time_functions.md | 2 +-
 docs/ru/query_language/functions/date_time_functions.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/query_language/functions/date_time_functions.md b/docs/en/query_language/functions/date_time_functions.md
index c22457d3fcc..971c4032c68 100644
--- a/docs/en/query_language/functions/date_time_functions.md
+++ b/docs/en/query_language/functions/date_time_functions.md
@@ -345,7 +345,7 @@ Supported modifiers for Format:
 | ----------- | -------- | --------------- |
 |%C|year divided by 100 and truncated to integer (00-99)|20
 |%d|day of the month, zero-padded (01-31)|02
-|%D|Short MM/DD/YY date, equivalent to %m/%d/%y|01/02/2018|
+|%D|Short MM/DD/YY date, equivalent to %m/%d/%y|01/02/18|
 |%e|day of the month, space-padded ( 1-31)|  2|
 |%F|short YYYY-MM-DD date, equivalent to %Y-%m-%d|2018-01-02
 |%H|hour in 24h format (00-23)|22|
diff --git a/docs/ru/query_language/functions/date_time_functions.md b/docs/ru/query_language/functions/date_time_functions.md
index 432bf56652b..11156285434 100644
--- a/docs/ru/query_language/functions/date_time_functions.md
+++ b/docs/ru/query_language/functions/date_time_functions.md
@@ -145,7 +145,7 @@ SELECT
 | ----------- | -------- | --------------- |
 |%C|номер года, поделённый на 100 (00-99)|20
 |%d|день месяца, с ведущим нулём (01-31)|02
-|%D|короткая запись %m/%d/%y|01/02/2018|
+|%D|короткая запись %m/%d/%y|01/02/18|
 |%e|день месяца, с ведущим пробелом ( 1-31)|  2|
 |%F|короткая запись %Y-%m-%d|2018-01-02
 |%H|час в 24-часовом формате (00-23)|22|

From 4609ca31897c7a1689b8a6c31c982b4cb61bd548 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 26 Oct 2019 13:57:48 +0800
Subject: [PATCH 095/222] Enable bracketed paste if possible

---
 dbms/programs/client/Client.cpp | 49 ++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index a2826bc8cd2..5041cc2ca0e 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -89,6 +89,40 @@
 #define DISABLE_LINE_WRAPPING "\033[?7l"
 #define ENABLE_LINE_WRAPPING "\033[?7h"
 
+#if USE_READLINE && RL_VERSION_MAJOR >= 7
+
+#define BRACK_PASTE_PREF "\033[200~"
+#define BRACK_PASTE_SUFF "\033[201~"
+
+#define BRACK_PASTE_LAST '~'
+#define BRACK_PASTE_SLEN 6
+
+/// Make sure we don't get ^J for the enter character.
+/// This handler also bypasses some unused macro/event checkings.
+static int clickhouse_rl_bracketed_paste_begin(int /* count */, int /* key */)
+{
+    std::string buf;
+    buf.reserve(128);
+
+    RL_SETSTATE(RL_STATE_MOREINPUT);
+    SCOPE_EXIT(RL_UNSETSTATE(RL_STATE_MOREINPUT));
+    char c;
+    while ((c = rl_read_key()) >= 0)
+    {
+        if (c == '\r' || c == '\n')
+            c = '\n';
+        buf.push_back(c);
+        if (buf.size() >= BRACK_PASTE_SLEN && c == BRACK_PASTE_LAST && buf.substr(buf.size() - BRACK_PASTE_SLEN) == BRACK_PASTE_SUFF)
+        {
+            buf.resize(buf.size() - BRACK_PASTE_SLEN);
+            break;
+        }
+    }
+    return static_cast<size_t>(rl_insert_text(buf.c_str())) == buf.size() ? 0 : 1;
+}
+
+#endif
+
 namespace DB
 {
 
@@ -462,6 +496,18 @@ private:
             if (rl_initialize())
                 throw Exception("Cannot initialize readline", ErrorCodes::CANNOT_READLINE);
 
+#if RL_VERSION_MAJOR >= 7
+            /// When bracketed paste mode is set, pasted text is bracketed with control sequences so
+            ///  that the program can differentiate pasted text from typed-in text. This helps
+            ///  clickhouse-client so that without -m flag, one can still paste multiline queries, and
+            ///  possibly get better pasting performance. See https://cirw.in/blog/bracketed-paste for
+            ///  more details.
+            rl_variable_bind("enable-bracketed-paste", "on");
+
+            /// Use our bracketed paste handler to get better user experience. See comments above.
+            rl_bind_keyseq(BRACK_PASTE_PREF, clickhouse_rl_bracketed_paste_begin);
+#endif
+
             auto clear_prompt_or_exit = [](int)
             {
                 /// This is signal safe.
@@ -632,7 +678,8 @@ private:
                     /// If the user restarts the client then after pressing the "up" button
                     /// every line of the query will be displayed separately.
                     std::string logged_query = input;
-                    std::replace(logged_query.begin(), logged_query.end(), '\n', ' ');
+                    if (config().has("multiline"))
+                        std::replace(logged_query.begin(), logged_query.end(), '\n', ' ');
                     add_history(logged_query.c_str());
 
 #if USE_READLINE && HAVE_READLINE_HISTORY

From fe73354320752239b409529bd79cdf79b1ba09ef Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 30 Oct 2019 22:15:55 +0300
Subject: [PATCH 096/222] Improvements after #7510

---
 dbms/src/AggregateFunctions/AggregateFunctionCount.h | 6 ++++++
 dbms/src/Interpreters/InterpreterSelectQuery.cpp     | 6 +-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionCount.h b/dbms/src/AggregateFunctions/AggregateFunctionCount.h
index c1691e665b3..edf6277cef5 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionCount.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionCount.h
@@ -64,6 +64,12 @@ public:
     }
 
     const char * getHeaderFilePath() const override { return __FILE__; }
+
+    /// Reset the state to specified value. This function is not the part of common interface.
+    void set(AggregateDataPtr place, UInt64 new_count)
+    {
+        data(place).count = new_count;
+    }
 };
 
 
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index 22d17ca8a52..f4c0c279d04 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -92,7 +92,6 @@
 #include <Processors/Transforms/FinishSortingTransform.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataStreams/materializeBlock.h>
-#include <IO/MemoryReadWriteBuffer.h>
 
 
 namespace DB
@@ -1313,10 +1312,7 @@ void InterpreterSelectQuery::executeFetchColumns(
             agg_count.create(place);
             SCOPE_EXIT(agg_count.destroy(place));
 
-            MemoryWriteBuffer out;
-            writeVarUInt(*num_rows, out);
-            auto in = out.tryGetReadBuffer();
-            agg_count.deserialize(place, *in, nullptr);
+            agg_count.set(place, *num_rows);
 
             auto column = ColumnAggregateFunction::create(func);
             column->insertFrom(place);

From 0006f2a370deccf5cd7af0d6e6a75e5351c5c626 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Wed, 30 Oct 2019 23:34:06 +0300
Subject: [PATCH 097/222] Fix build without submodules (#7295)

* Build fixes

*            fix

* Fix

* fix

* Fix install(export..

* Freebsd fixes

* Freebsd fixes

* Fix warning

* fix

* More PRIVATE linking

* Fix review

* Timmy

* Timmy

* fix

* Try fix jemalloc link

* style
---
 cmake/find/capnp.cmake             | 12 ++++++++++--
 cmake/find/orc.cmake               |  5 +++--
 cmake/find/parquet.cmake           |  5 ++++-
 cmake/find/unwind.cmake            |  5 +++++
 dbms/CMakeLists.txt                |  5 +++++
 utils/build/build_no_submodules.sh |  2 +-
 6 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/cmake/find/capnp.cmake b/cmake/find/capnp.cmake
index 1ac6241a5f1..0620a66808b 100644
--- a/cmake/find/capnp.cmake
+++ b/cmake/find/capnp.cmake
@@ -4,6 +4,14 @@ if (ENABLE_CAPNP)
 
 option (USE_INTERNAL_CAPNP_LIBRARY "Set to FALSE to use system capnproto library instead of bundled" ${NOT_UNBUNDLED})
 
+if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/capnproto/CMakeLists.txt")
+    if(USE_INTERNAL_CAPNP_LIBRARY)
+        message(WARNING "submodule contrib/capnproto is missing. to fix try run: \n git submodule update --init --recursive")
+    endif()
+    set(MISSING_INTERNAL_CAPNP_LIBRARY 1)
+    set(USE_INTERNAL_CAPNP_LIBRARY 0)
+endif()
+
 # FIXME: refactor to use `add_library(… IMPORTED)` if possible.
 if (NOT USE_INTERNAL_CAPNP_LIBRARY)
     find_library (KJ kj)
@@ -11,7 +19,7 @@ if (NOT USE_INTERNAL_CAPNP_LIBRARY)
     find_library (CAPNPC capnpc)
 
     set (CAPNP_LIBRARIES ${CAPNPC} ${CAPNP} ${KJ})
-else ()
+elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY)
     add_subdirectory(contrib/capnproto-cmake)
 
     set (CAPNP_LIBRARIES capnpc)
@@ -23,4 +31,4 @@ endif ()
 
 endif ()
 
-message (STATUS "Using capnp: ${CAPNP_LIBRARIES}")
+message (STATUS "Using capnp=${USE_CAPNP}: ${CAPNP_LIBRARIES}")
diff --git a/cmake/find/orc.cmake b/cmake/find/orc.cmake
index 613016c6ba7..26253687c80 100644
--- a/cmake/find/orc.cmake
+++ b/cmake/find/orc.cmake
@@ -1,7 +1,8 @@
 option (ENABLE_ORC "Enable ORC" ${ENABLE_LIBRARIES})
 
 if(ENABLE_ORC)
-option (USE_INTERNAL_ORC_LIBRARY "Set to FALSE to use system ORC instead of bundled" ${NOT_UNBUNDLED})
+include(cmake/find/snappy.cmake)
+option(USE_INTERNAL_ORC_LIBRARY "Set to FALSE to use system ORC instead of bundled" ${NOT_UNBUNDLED})
 
 if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include/orc/OrcFile.hh")
    if(USE_INTERNAL_ORC_LIBRARY)
@@ -25,7 +26,7 @@ endif ()
 
 if (ORC_LIBRARY AND ORC_INCLUDE_DIR)
     set(USE_ORC 1)
-elseif(NOT MISSING_INTERNAL_ORC_LIBRARY AND ARROW_LIBRARY) # (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY)
+elseif(NOT MISSING_INTERNAL_ORC_LIBRARY AND ARROW_LIBRARY AND SNAPPY_LIBRARY) # (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY)
     set(ORC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include")
     set(ORC_LIBRARY orc)
     set(USE_ORC 1)
diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake
index 14332fc9caf..d302fd64e1e 100644
--- a/cmake/find/parquet.cmake
+++ b/cmake/find/parquet.cmake
@@ -24,7 +24,10 @@ endif()
 if(ARROW_INCLUDE_DIR AND PARQUET_INCLUDE_DIR)
 elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
     include(cmake/find/snappy.cmake)
-    set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1)
+    if(SNAPPY_LIBRARY)
+        set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1)
+    endif()
+
     include(CheckCXXSourceCompiles)
     if(NOT USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
         set(CMAKE_REQUIRED_LIBRARIES ${DOUBLE_CONVERSION_LIBRARIES})
diff --git a/cmake/find/unwind.cmake b/cmake/find/unwind.cmake
index efa7618a53b..d3653973082 100644
--- a/cmake/find/unwind.cmake
+++ b/cmake/find/unwind.cmake
@@ -4,6 +4,11 @@ if (NOT CMAKE_SYSTEM MATCHES "Linux" OR ARCH_ARM OR ARCH_32)
     set (USE_UNWIND OFF)
 endif ()
 
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libunwind/CMakeLists.txt")
+    message(WARNING "submodule contrib/libunwind is missing. to fix try run: \n git submodule update --init --recursive")
+    set (USE_UNWIND OFF)
+endif ()
+
 if (USE_UNWIND)
     add_subdirectory(contrib/libunwind-cmake)
     set (UNWIND_LIBRARIES unwind)
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index c70f3d8a068..1d5f4af645b 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -425,6 +425,11 @@ endif()
 if (USE_JEMALLOC)
     dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp
     target_include_directories (clickhouse_new_delete SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR})
+
+    if(NOT MAKE_STATIC_LIBRARIES AND ${JEMALLOC_LIBRARIES} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$")
+        # mallctl in dbms/src/Interpreters/AsynchronousMetrics.cpp
+        target_link_libraries(clickhouse_interpreters PRIVATE ${JEMALLOC_LIBRARIES})
+    endif()
 endif ()
 
 dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include)
diff --git a/utils/build/build_no_submodules.sh b/utils/build/build_no_submodules.sh
index 63349882128..fae10ab3270 100755
--- a/utils/build/build_no_submodules.sh
+++ b/utils/build/build_no_submodules.sh
@@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 cd ${CUR_DIR}/../..
 BRANCH=`git rev-parse --abbrev-ref HEAD`
 BRANCH=${BRANCH:=master}
-ROOT_DIR=${CUR_DIR}/../build_no_submodules
+ROOT_DIR=${CUR_DIR}/../../build_no_submodules
 mkdir -p $ROOT_DIR
 cd $ROOT_DIR
 URL=`git remote get-url origin | sed 's/.git$//'`

From 194fd85f5c5744ebb9686dfd662c4ae6a255b1b1 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 31 Oct 2019 00:29:15 +0300
Subject: [PATCH 098/222] Update arrayCompact.cpp

---
 dbms/src/Functions/array/arrayCompact.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index 4c04aff369a..f90faa457da 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -5,7 +5,7 @@
 
 namespace DB
 {
-/// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays
+    /// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays
     namespace ErrorCodes
     {
         extern const int ILLEGAL_COLUMN;

From 5601d6ae666192a5fae93644fbfe47fe8d923571 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Thu, 31 Oct 2019 00:37:28 +0300
Subject: [PATCH 099/222] Add constraints columns to table system.settings

---
 .../SettingsConstraints.cpp                   | 121 +++++++++++++++++-
 .../SettingsConstraints.h                     |  34 ++++-
 dbms/src/Common/SettingsChanges.h             |   3 +
 dbms/src/Core/SettingsCollectionImpl.h        |  36 ++++--
 dbms/src/Core/SettingsCommon.h                |  54 ++++----
 dbms/src/Interpreters/Context.cpp             |  13 +-
 dbms/src/Interpreters/Context.h               |   6 +-
 dbms/src/Interpreters/InterpreterSetQuery.cpp |   4 +-
 .../Storages/System/StorageSystemSettings.cpp |  38 +++++-
 .../test_settings_constraints/test.py         |   6 +
 10 files changed, 247 insertions(+), 68 deletions(-)
 rename dbms/src/{Interpreters => Access}/SettingsConstraints.cpp (61%)
 rename dbms/src/{Interpreters => Access}/SettingsConstraints.h (70%)

diff --git a/dbms/src/Interpreters/SettingsConstraints.cpp b/dbms/src/Access/SettingsConstraints.cpp
similarity index 61%
rename from dbms/src/Interpreters/SettingsConstraints.cpp
rename to dbms/src/Access/SettingsConstraints.cpp
index 35c431d7ed7..a044b7a0dc1 100644
--- a/dbms/src/Interpreters/SettingsConstraints.cpp
+++ b/dbms/src/Access/SettingsConstraints.cpp
@@ -1,4 +1,4 @@
-#include <Interpreters/SettingsConstraints.h>
+#include <Access/SettingsConstraints.h>
 #include <Core/Settings.h>
 #include <Common/FieldVisitors.h>
 #include <IO/WriteHelpers.h>
@@ -29,22 +29,118 @@ void SettingsConstraints::clear()
 }
 
 
-void SettingsConstraints::setReadOnly(const String & name, bool read_only)
+void SettingsConstraints::setMinValue(const StringRef & name, const Field & min_value)
+{
+    size_t setting_index = Settings::findIndexStrict(name);
+    getConstraintRef(setting_index).min_value = Settings::valueToCorrespondingType(setting_index, min_value);
+}
+
+
+Field SettingsConstraints::getMinValue(const StringRef & name) const
+{
+    size_t setting_index = Settings::findIndexStrict(name);
+    const auto * ptr = tryGetConstraint(setting_index);
+    if (ptr)
+        return ptr->min_value;
+    else
+        return {};
+}
+
+
+void SettingsConstraints::setMaxValue(const StringRef & name, const Field & max_value)
+{
+    size_t setting_index = Settings::findIndexStrict(name);
+    getConstraintRef(setting_index).max_value = Settings::valueToCorrespondingType(setting_index, max_value);
+}
+
+
+Field SettingsConstraints::getMaxValue(const StringRef & name) const
+{
+    size_t setting_index = Settings::findIndexStrict(name);
+    const auto * ptr = tryGetConstraint(setting_index);
+    if (ptr)
+        return ptr->max_value;
+    else
+        return {};
+}
+
+
+void SettingsConstraints::setReadOnly(const StringRef & name, bool read_only)
 {
     size_t setting_index = Settings::findIndexStrict(name);
     getConstraintRef(setting_index).read_only = read_only;
 }
 
-void SettingsConstraints::setMinValue(const String & name, const Field & min_value)
+
+bool SettingsConstraints::isReadOnly(const StringRef & name) const
 {
     size_t setting_index = Settings::findIndexStrict(name);
-    getConstraintRef(setting_index).min_value = Settings::castValueWithoutApplying(setting_index, min_value);
+    const auto * ptr = tryGetConstraint(setting_index);
+    if (ptr)
+        return ptr->read_only;
+    else
+        return false;
 }
 
-void SettingsConstraints::setMaxValue(const String & name, const Field & max_value)
+
+void SettingsConstraints::set(const StringRef & name, const Field & min_value, const Field & max_value, bool read_only)
 {
     size_t setting_index = Settings::findIndexStrict(name);
-    getConstraintRef(setting_index).max_value = Settings::castValueWithoutApplying(setting_index, max_value);
+    auto & ref = getConstraintRef(setting_index);
+    ref.min_value = min_value;
+    ref.max_value = max_value;
+    ref.read_only = read_only;
+}
+
+
+void SettingsConstraints::get(const StringRef & name, Field & min_value, Field & max_value, bool & read_only) const
+{
+    size_t setting_index = Settings::findIndexStrict(name);
+    const auto * ptr = tryGetConstraint(setting_index);
+    if (ptr)
+    {
+        min_value = ptr->min_value;
+        max_value = ptr->max_value;
+        read_only = ptr->read_only;
+    }
+    else
+    {
+        min_value = Field{};
+        max_value = Field{};
+        read_only = false;
+    }
+}
+
+
+void SettingsConstraints::merge(const SettingsConstraints & other)
+{
+    for (const auto & [setting_index, other_constraint] : other.constraints_by_index)
+    {
+        auto & constraint = constraints_by_index[setting_index];
+        if (!other_constraint.min_value.isNull())
+            constraint.min_value = other_constraint.min_value;
+        if (!other_constraint.max_value.isNull())
+            constraint.max_value = other_constraint.max_value;
+        if (other_constraint.read_only)
+            constraint.read_only = true;
+    }
+}
+
+
+SettingsConstraints::Infos SettingsConstraints::getInfo() const
+{
+    Infos result;
+    result.reserve(constraints_by_index.size());
+    for (const auto & [setting_index, constraint] : constraints_by_index)
+    {
+        result.emplace_back();
+        Info & info = result.back();
+        info.name = Settings::getName(setting_index);
+        info.min = constraint.min_value;
+        info.max = constraint.max_value;
+        info.read_only = constraint.read_only;
+    }
+    return result;
 }
 
 
@@ -55,7 +151,7 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting
     if (setting_index == Settings::npos)
         return;
 
-    Field new_value = Settings::castValueWithoutApplying(setting_index, change.value);
+    Field new_value = Settings::valueToCorrespondingType(setting_index, change.value);
     Field current_value = current_settings.get(setting_index);
 
     /// Setting isn't checked if value wasn't changed.
@@ -159,4 +255,15 @@ void SettingsConstraints::loadFromConfig(const String & path_to_constraints, con
     }
 }
 
+
+bool SettingsConstraints::Constraint::operator==(const Constraint & rhs) const
+{
+    return (read_only == rhs.read_only) && (min_value == rhs.min_value) && (max_value == rhs.max_value);
+}
+
+
+bool operator ==(const SettingsConstraints & lhs, const SettingsConstraints & rhs)
+{
+    return lhs.constraints_by_index == rhs.constraints_by_index;
+}
 }
diff --git a/dbms/src/Interpreters/SettingsConstraints.h b/dbms/src/Access/SettingsConstraints.h
similarity index 70%
rename from dbms/src/Interpreters/SettingsConstraints.h
rename to dbms/src/Access/SettingsConstraints.h
index f0ba3b7eb16..3b4d0c28800 100644
--- a/dbms/src/Interpreters/SettingsConstraints.h
+++ b/dbms/src/Access/SettingsConstraints.h
@@ -58,10 +58,32 @@ public:
     ~SettingsConstraints();
 
     void clear();
+    bool empty() const { return constraints_by_index.empty(); }
 
-    void setMinValue(const String & name, const Field & min_value);
-    void setMaxValue(const String & name, const Field & max_value);
-    void setReadOnly(const String & name, bool read_only);
+    void setMinValue(const StringRef & name, const Field & min_value);
+    Field getMinValue(const StringRef & name) const;
+
+    void setMaxValue(const StringRef & name, const Field & max_value);
+    Field getMaxValue(const StringRef & name) const;
+
+    void setReadOnly(const StringRef & name, bool read_only);
+    bool isReadOnly(const StringRef & name) const;
+
+    void set(const StringRef & name, const Field & min_value, const Field & max_value, bool read_only);
+    void get(const StringRef & name, Field & min_value, Field & max_value, bool & read_only) const;
+
+    void merge(const SettingsConstraints & other);
+
+    struct Info
+    {
+        StringRef name;
+        Field min;
+        Field max;
+        bool read_only = false;
+    };
+    using Infos = std::vector<Info>;
+
+    Infos getInfo() const;
 
     void check(const Settings & current_settings, const SettingChange & change) const;
     void check(const Settings & current_settings, const SettingsChanges & changes) const;
@@ -74,12 +96,18 @@ public:
     /// Loads the constraints from configuration file, at "path" prefix in configuration.
     void loadFromConfig(const String & path, const Poco::Util::AbstractConfiguration & config);
 
+    friend bool operator ==(const SettingsConstraints & lhs, const SettingsConstraints & rhs);
+    friend bool operator !=(const SettingsConstraints & lhs, const SettingsConstraints & rhs) { return !(lhs == rhs); }
+
 private:
     struct Constraint
     {
         bool read_only = false;
         Field min_value;
         Field max_value;
+
+        bool operator ==(const Constraint & rhs) const;
+        bool operator !=(const Constraint & rhs) const { return !(*this == rhs); }
     };
 
     Constraint & getConstraintRef(size_t index);
diff --git a/dbms/src/Common/SettingsChanges.h b/dbms/src/Common/SettingsChanges.h
index 1d4406cf1ea..2e037a50a17 100644
--- a/dbms/src/Common/SettingsChanges.h
+++ b/dbms/src/Common/SettingsChanges.h
@@ -10,6 +10,9 @@ struct SettingChange
 {
     String name;
     Field value;
+
+    friend bool operator ==(const SettingChange & lhs, const SettingChange & rhs) { return (lhs.name == rhs.name) && (lhs.value == rhs.value); }
+    friend bool operator !=(const SettingChange & lhs, const SettingChange & rhs) { return !(lhs == rhs); }
 };
 
 using SettingsChanges = std::vector<SettingChange>;
diff --git a/dbms/src/Core/SettingsCollectionImpl.h b/dbms/src/Core/SettingsCollectionImpl.h
index d6c209bf3c8..195dffe609d 100644
--- a/dbms/src/Core/SettingsCollectionImpl.h
+++ b/dbms/src/Core/SettingsCollectionImpl.h
@@ -25,15 +25,27 @@ void SettingsCollection<Derived>::reference::setValue(const Field & value)
 }
 
 template <class Derived>
-Field SettingsCollection<Derived>::castValueWithoutApplying(size_t index, const Field & value)
+String SettingsCollection<Derived>::valueToString(size_t index, const Field & value)
 {
-    return members()[index].cast_value_without_applying(value);
+    return members()[index].value_to_string(value);
 }
 
 template <class Derived>
-Field SettingsCollection<Derived>::castValueWithoutApplying(const String & name, const Field & value)
+String SettingsCollection<Derived>::valueToString(const StringRef & name, const Field & value)
 {
-    return members().findStrict(name)->cast_value_without_applying(value);
+    return members().findStrict(name)->value_to_string(value);
+}
+
+template <class Derived>
+Field SettingsCollection<Derived>::valueToCorrespondingType(size_t index, const Field & value)
+{
+    return members()[index].value_to_corresponding_type(value);
+}
+
+template <class Derived>
+Field SettingsCollection<Derived>::valueToCorrespondingType(const StringRef & name, const Field & value)
+{
+    return members().findStrict(name)->value_to_corresponding_type(value);
 }
 
 template <class Derived>
@@ -43,7 +55,7 @@ void SettingsCollection<Derived>::set(size_t index, const Field & value)
 }
 
 template <class Derived>
-void SettingsCollection<Derived>::set(const String & name, const Field & value)
+void SettingsCollection<Derived>::set(const StringRef & name, const Field & value)
 {
     (*this)[name].setValue(value);
 }
@@ -55,13 +67,13 @@ Field SettingsCollection<Derived>::get(size_t index) const
 }
 
 template <class Derived>
-Field SettingsCollection<Derived>::get(const String & name) const
+Field SettingsCollection<Derived>::get(const StringRef & name) const
 {
     return (*this)[name].getValue();
 }
 
 template <class Derived>
-bool SettingsCollection<Derived>::tryGet(const String & name, Field & value) const
+bool SettingsCollection<Derived>::tryGet(const StringRef & name, Field & value) const
 {
     auto it = find(name);
     if (it == end())
@@ -71,7 +83,7 @@ bool SettingsCollection<Derived>::tryGet(const String & name, Field & value) con
 }
 
 template <class Derived>
-bool SettingsCollection<Derived>::tryGet(const String & name, String & value) const
+bool SettingsCollection<Derived>::tryGet(const StringRef & name, String & value) const
 {
     auto it = find(name);
     if (it == end())
@@ -85,8 +97,8 @@ bool SettingsCollection<Derived>::operator ==(const Derived & rhs) const
 {
     for (const auto & member : members())
     {
-        bool left_changed = member.isChanged(castToDerived());
-        bool right_changed = member.isChanged(rhs);
+        bool left_changed = member.is_changed(castToDerived());
+        bool right_changed = member.is_changed(rhs);
         if (left_changed || right_changed)
         {
             if (left_changed != right_changed)
@@ -105,7 +117,7 @@ SettingsChanges SettingsCollection<Derived>::changes() const
     SettingsChanges found_changes;
     for (const auto & member : members())
     {
-        if (member.isChanged(castToDerived()))
+        if (member.is_changed(castToDerived()))
             found_changes.push_back({member.name.toString(), member.get_field(castToDerived())});
     }
     return found_changes;
@@ -130,7 +142,7 @@ template <class Derived>
 void SettingsCollection<Derived>::copyChangesFrom(const Derived & src)
 {
     for (const auto & member : members())
-        if (member.isChanged(src))
+        if (member.is_changed(src))
             member.set_field(castToDerived(), member.get_field(src));
 }
 
diff --git a/dbms/src/Core/SettingsCommon.h b/dbms/src/Core/SettingsCommon.h
index d99b8f8a357..20da4d77685 100644
--- a/dbms/src/Core/SettingsCommon.h
+++ b/dbms/src/Core/SettingsCommon.h
@@ -311,8 +311,8 @@ private:
     using SetFieldFunction = void (*)(Derived &, const Field &);
     using SerializeFunction = void (*)(const Derived &, WriteBuffer & buf);
     using DeserializeFunction = void (*)(Derived &, ReadBuffer & buf);
-    using CastValueWithoutApplyingFunction = Field (*)(const Field &);
-
+    using ValueToStringFunction = String (*)(const Field &);
+    using ValueToCorrespondingTypeFunction = Field (*)(const Field &);
 
     struct MemberInfo
     {
@@ -325,9 +325,8 @@ private:
         SetFieldFunction set_field;
         SerializeFunction serialize;
         DeserializeFunction deserialize;
-        CastValueWithoutApplyingFunction cast_value_without_applying;
-
-        bool isChanged(const Derived & collection) const { return is_changed(collection); }
+        ValueToStringFunction value_to_string;
+        ValueToCorrespondingTypeFunction value_to_corresponding_type;
     };
 
     class MemberInfos : private boost::noncopyable
@@ -394,7 +393,7 @@ public:
         const_reference(const const_reference & src) = default;
         const StringRef & getName() const { return member->name; }
         const StringRef & getDescription() const { return member->description; }
-        bool isChanged() const { return member->isChanged(*collection); }
+        bool isChanged() const { return member->is_changed(*collection); }
         Field getValue() const;
         String getValueAsString() const { return member->get_string(*collection); }
     protected:
@@ -457,16 +456,20 @@ public:
     static StringRef getDescription(const String & name) { return members().findStrict(name)->description; }
 
     /// Searches a setting by its name; returns `npos` if not found.
-    static size_t findIndex(const String & name) { return members().findIndex(name); }
+    static size_t findIndex(const StringRef & name) { return members().findIndex(name); }
     static constexpr size_t npos = static_cast<size_t>(-1);
 
     /// Searches a setting by its name; throws an exception if not found.
-    static size_t findIndexStrict(const String & name) { return members().findIndexStrict(name); }
+    static size_t findIndexStrict(const StringRef & name) { return members().findIndexStrict(name); }
+
+    /// Casts a value to a string according to a specified setting without actual changing this settings.
+    static String valueToString(size_t index, const Field & value);
+    static String valueToString(const StringRef & name, const Field & value);
 
     /// Casts a value to a type according to a specified setting without actual changing this settings.
     /// E.g. for SettingInt64 it casts Field to Field::Types::Int64.
-    static Field castValueWithoutApplying(size_t index, const Field & value);
-    static Field castValueWithoutApplying(const String & name, const Field & value);
+    static Field valueToCorrespondingType(size_t index, const Field & value);
+    static Field valueToCorrespondingType(const StringRef & name, const Field & value);
 
     iterator begin() { return iterator(castToDerived(), members().begin()); }
     const_iterator begin() const { return const_iterator(castToDerived(), members().begin()); }
@@ -475,39 +478,39 @@ public:
 
     /// Returns a proxy object for accessing to a setting. Throws an exception if there is not setting with such name.
     reference operator[](size_t index) { return reference(castToDerived(), members()[index]); }
-    reference operator[](const String & name) { return reference(castToDerived(), *(members().findStrict(name))); }
+    reference operator[](const StringRef & name) { return reference(castToDerived(), *(members().findStrict(name))); }
     const_reference operator[](size_t index) const { return const_reference(castToDerived(), members()[index]); }
-    const_reference operator[](const String & name) const { return const_reference(castToDerived(), *(members().findStrict(name))); }
+    const_reference operator[](const StringRef & name) const { return const_reference(castToDerived(), *(members().findStrict(name))); }
 
     /// Searches a setting by its name; returns end() if not found.
-    iterator find(const String & name) { return iterator(castToDerived(), members().find(name)); }
-    const_iterator find(const String & name) const { return const_iterator(castToDerived(), members().find(name)); }
+    iterator find(const StringRef & name) { return iterator(castToDerived(), members().find(name)); }
+    const_iterator find(const StringRef & name) const { return const_iterator(castToDerived(), members().find(name)); }
 
     /// Searches a setting by its name; throws an exception if not found.
-    iterator findStrict(const String & name) { return iterator(castToDerived(), members().findStrict(name)); }
-    const_iterator findStrict(const String & name) const { return const_iterator(castToDerived(), members().findStrict(name)); }
+    iterator findStrict(const StringRef & name) { return iterator(castToDerived(), members().findStrict(name)); }
+    const_iterator findStrict(const StringRef & name) const { return const_iterator(castToDerived(), members().findStrict(name)); }
 
     /// Sets setting's value.
     void set(size_t index, const Field & value);
-    void set(const String & name, const Field & value);
+    void set(const StringRef & name, const Field & value);
 
     /// Sets setting's value. Read value in text form from string (for example, from configuration file or from URL parameter).
     void set(size_t index, const String & value) { (*this)[index].setValue(value); }
-    void set(const String & name, const String & value) { (*this)[name].setValue(value); }
+    void set(const StringRef & name, const String & value) { (*this)[name].setValue(value); }
 
     /// Returns value of a setting.
     Field get(size_t index) const;
-    Field get(const String & name) const;
+    Field get(const StringRef & name) const;
 
     /// Returns value of a setting converted to string.
     String getAsString(size_t index) const { return (*this)[index].getValueAsString(); }
-    String getAsString(const String & name) const { return (*this)[name].getValueAsString(); }
+    String getAsString(const StringRef & name) const { return (*this)[name].getValueAsString(); }
 
     /// Returns value of a setting; returns false if there is no setting with the specified name.
-    bool tryGet(const String & name, Field & value) const;
+    bool tryGet(const StringRef & name, Field & value) const;
 
     /// Returns value of a setting converted to string; returns false if there is no setting with the specified name.
-    bool tryGet(const String & name, String & value) const;
+    bool tryGet(const StringRef & name, String & value) const;
 
     /// Compares two collections of settings.
     bool operator ==(const Derived & rhs) const;
@@ -537,7 +540,7 @@ public:
     {
         for (const auto & member : members())
         {
-            if (member.isChanged(castToDerived()))
+            if (member.is_changed(castToDerived()))
             {
                 details::SettingsCollectionUtils::serializeName(member.name, buf);
                 member.serialize(castToDerived(), buf);
@@ -600,7 +603,8 @@ public:
     static void NAME##_setField(Derived & collection, const Field & value) { collection.NAME.set(value); } \
     static void NAME##_serialize(const Derived & collection, WriteBuffer & buf) { collection.NAME.serialize(buf); } \
     static void NAME##_deserialize(Derived & collection, ReadBuffer & buf) { collection.NAME.deserialize(buf); } \
-    static Field NAME##_castValueWithoutApplying(const Field & value) { TYPE temp{DEFAULT}; temp.set(value); return temp.toField(); } \
+    static String NAME##_valueToString(const Field & value) { TYPE temp{DEFAULT}; temp.set(value); return temp.toString(); } \
+    static Field NAME##_valueToCorrespondingType(const Field & value) { TYPE temp{DEFAULT}; temp.set(value); return temp.toField(); } \
 
 
 #define IMPLEMENT_SETTINGS_COLLECTION_ADD_MEMBER_INFO_HELPER_(TYPE, NAME, DEFAULT, DESCRIPTION) \
@@ -609,5 +613,5 @@ public:
          &Functions::NAME##_getString, &Functions::NAME##_getField, \
          &Functions::NAME##_setString, &Functions::NAME##_setField, \
          &Functions::NAME##_serialize, &Functions::NAME##_deserialize, \
-         &Functions::NAME##_castValueWithoutApplying });
+         &Functions::NAME##_valueToString, &Functions::NAME##_valueToCorrespondingType});
 }
diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 3c3bf10c0ce..3c89d029784 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -37,7 +37,7 @@
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/InterserverIOHandler.h>
-#include <Interpreters/SettingsConstraints.h>
+#include <Access/SettingsConstraints.h>
 #include <Interpreters/SystemLog.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DDLWorker.h>
@@ -1168,17 +1168,6 @@ void Context::applySettingsChanges(const SettingsChanges & changes)
         applySettingChange(change);
 }
 
-void Context::updateSettingsChanges(const SettingsChanges & changes)
-{
-    auto lock = getLock();
-    for (const SettingChange & change : changes)
-    {
-        if (change.name == "profile")
-            setProfile(change.value.safeGet<String>());
-        else
-            settings.applyChange(change);
-    }
-}
 
 void Context::checkSettingsConstraints(const SettingChange & change)
 {
diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index b135d16e4e7..85362d01ce7 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -319,13 +319,13 @@ public:
     void applySettingChange(const SettingChange & change);
     void applySettingsChanges(const SettingsChanges & changes);
 
-    /// Update checking that each setting is updatable
-    void updateSettingsChanges(const SettingsChanges & changes);
-
     /// Checks the constraints.
     void checkSettingsConstraints(const SettingChange & change);
     void checkSettingsConstraints(const SettingsChanges & changes);
 
+    /// Returns the currently constraints (can returns null).
+    std::shared_ptr<const SettingsConstraints> getSettingsConstraints() const { return settings_constraints; }
+
     const EmbeddedDictionaries & getEmbeddedDictionaries() const;
     const ExternalDictionariesLoader & getExternalDictionariesLoader() const;
     const ExternalModelsLoader & getExternalModelsLoader() const;
diff --git a/dbms/src/Interpreters/InterpreterSetQuery.cpp b/dbms/src/Interpreters/InterpreterSetQuery.cpp
index ae982611e60..f92e9638822 100644
--- a/dbms/src/Interpreters/InterpreterSetQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSetQuery.cpp
@@ -10,7 +10,7 @@ BlockIO InterpreterSetQuery::execute()
 {
     const auto & ast = query_ptr->as<ASTSetQuery &>();
     context.checkSettingsConstraints(ast.changes);
-    context.getSessionContext().updateSettingsChanges(ast.changes);
+    context.getSessionContext().applySettingsChanges(ast.changes);
     return {};
 }
 
@@ -19,7 +19,7 @@ void InterpreterSetQuery::executeForCurrentContext()
 {
     const auto & ast = query_ptr->as<ASTSetQuery &>();
     context.checkSettingsConstraints(ast.changes);
-    context.updateSettingsChanges(ast.changes);
+    context.applySettingsChanges(ast.changes);
 }
 
 }
diff --git a/dbms/src/Storages/System/StorageSystemSettings.cpp b/dbms/src/Storages/System/StorageSystemSettings.cpp
index 3963fdb655e..0be7618922d 100644
--- a/dbms/src/Storages/System/StorageSystemSettings.cpp
+++ b/dbms/src/Storages/System/StorageSystemSettings.cpp
@@ -1,12 +1,13 @@
+#include <Storages/System/StorageSystemSettings.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/Context.h>
-#include <Storages/System/StorageSystemSettings.h>
+#include <Access/SettingsConstraints.h>
 
 
 namespace DB
 {
-
 NamesAndTypesList StorageSystemSettings::getNamesAndTypes()
 {
     return {
@@ -14,6 +15,9 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes()
         {"value", std::make_shared<DataTypeString>()},
         {"changed", std::make_shared<DataTypeUInt8>()},
         {"description", std::make_shared<DataTypeString>()},
+        {"min", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
+        {"max", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
+        {"readonly", std::make_shared<DataTypeUInt8>()}
     };
 }
 
@@ -23,12 +27,38 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes()
 
 void StorageSystemSettings::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const
 {
-    for (const auto & setting : context.getSettingsRef())
+    const Settings & settings = context.getSettingsRef();
+    auto settings_constraints = context.getSettingsConstraints();
+    for (const auto & setting : settings)
     {
-        res_columns[0]->insert(setting.getName().toString());
+        StringRef setting_name = setting.getName();
+        res_columns[0]->insert(setting_name.toString());
         res_columns[1]->insert(setting.getValueAsString());
         res_columns[2]->insert(setting.isChanged());
         res_columns[3]->insert(setting.getDescription().toString());
+
+        Field min, max;
+        bool read_only = false;
+        if (settings_constraints)
+            settings_constraints->get(setting_name, min, max, read_only);
+
+        /// These two columns can accept strings only.
+        if (!min.isNull())
+            min = Settings::valueToString(setting_name, min);
+        if (!max.isNull())
+            max = Settings::valueToString(setting_name, max);
+
+        if (!read_only)
+        {
+            if ((settings.readonly == 1)
+                || ((settings.readonly > 1) && (setting_name == "readonly"))
+                || ((!settings.allow_ddl) && (setting_name == "allow_ddl")))
+                read_only = true;
+        }
+
+        res_columns[4]->insert(min);
+        res_columns[5]->insert(max);
+        res_columns[6]->insert(read_only);
     }
 }
 
diff --git a/dbms/tests/integration/test_settings_constraints/test.py b/dbms/tests/integration/test_settings_constraints/test.py
index be296b51827..4cb8b2f2553 100644
--- a/dbms/tests/integration/test_settings_constraints/test.py
+++ b/dbms/tests/integration/test_settings_constraints/test.py
@@ -18,6 +18,12 @@ def started_cluster():
         cluster.shutdown()
 
 
+def test_system_tables(started_cluster):
+    assert instance.query("SELECT name, value, min, max, readonly from system.settings WHERE name = 'force_index_by_date' OR name = 'max_memory_usage' ORDER BY name") ==\
+           "force_index_by_date\t0\t\\N\t\\N\t1\n"\
+           "max_memory_usage\t10000000000\t5000000000\t20000000000\t0\n"
+
+
 def test_read_only_constraint(started_cluster):
     # Change a setting for session with SET.
     assert instance.query("SELECT value FROM system.settings WHERE name='force_index_by_date'") ==\

From 4c7a54113d26e0d68e238f9e364d68c970798f46 Mon Sep 17 00:00:00 2001
From: hcz <h.cz@qq.com>
Date: Thu, 31 Oct 2019 11:33:23 +0800
Subject: [PATCH 100/222] Add tests

---
 .../tests/queries/0_stateless/01021_create_as_select.reference | 1 +
 dbms/tests/queries/0_stateless/01021_create_as_select.sql      | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01021_create_as_select.reference
 create mode 100644 dbms/tests/queries/0_stateless/01021_create_as_select.sql

diff --git a/dbms/tests/queries/0_stateless/01021_create_as_select.reference b/dbms/tests/queries/0_stateless/01021_create_as_select.reference
new file mode 100644
index 00000000000..f750ba35102
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01021_create_as_select.reference
@@ -0,0 +1 @@
+(1,1)
diff --git a/dbms/tests/queries/0_stateless/01021_create_as_select.sql b/dbms/tests/queries/0_stateless/01021_create_as_select.sql
new file mode 100644
index 00000000000..4d96d36c7ca
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01021_create_as_select.sql
@@ -0,0 +1,3 @@
+CREATE TABLE create_as_select_01021 engine=Memory AS (SELECT (1, 1));
+SELECT * FROM create_as_select_01021;
+DROP TABLE create_as_select_01021;

From 17212afabb84899f8667455515fca14562b10499 Mon Sep 17 00:00:00 2001
From: sundy-li <543950155@qq.com>
Date: Thu, 31 Oct 2019 13:19:28 +0800
Subject: [PATCH 101/222] fix virtual call

---
 dbms/src/Functions/FunctionsCoding.h | 29 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h
index ac7d9244512..5b504dbb429 100644
--- a/dbms/src/Functions/FunctionsCoding.h
+++ b/dbms/src/Functions/FunctionsCoding.h
@@ -1310,18 +1310,17 @@ public:
 
     bool useDefaultImplementationForConstants() const override { return true; }
 
-    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
     {
-        const size_t rows = block.getByPosition(arguments[0]).column.get()->size();
         auto col_str = ColumnString::create();
         ColumnString::Chars & out_vec = col_str->getChars();
         ColumnString::Offsets & out_offsets = col_str->getOffsets();
 
         const auto size_per_row = arguments.size() + 1;
-        out_vec.resize(size_per_row * rows);
-        out_offsets.resize(rows);
+        out_vec.resize(size_per_row * input_rows_count);
+        out_offsets.resize(input_rows_count);
 
-        for (size_t row = 0; row < rows; ++row)
+        for (size_t row = 0; row < input_rows_count; ++row)
         {
             out_offsets[row] = size_per_row + out_offsets[row - 1];
             out_vec[row * size_per_row + size_per_row - 1] = '\0';
@@ -1334,16 +1333,16 @@ public:
             columns_holder[idx] = block.getByPosition(arguments[idx]).column->convertToFullColumnIfConst();
             const IColumn * column = columns_holder[idx].get();
 
-            if (!(executeNumber<UInt8>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<UInt16>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<UInt32>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<UInt64>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<Int8>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<Int16>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<Int32>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<Int64>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<Float32>(*column, out_vec, idx, rows, size_per_row)
-                || executeNumber<Float64>(*column, out_vec, idx, rows, size_per_row)))
+            if (!(executeNumber<UInt8>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<UInt16>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<UInt32>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<UInt64>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<Int8>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<Int16>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<Int32>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<Int64>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<Float32>(*column, out_vec, idx, input_rows_count, size_per_row)
+                || executeNumber<Float64>(*column, out_vec, idx, input_rows_count, size_per_row)))
             {
                 throw Exception{"Illegal column " + block.getByPosition(arguments[idx]).column->getName()
                                 + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};

From 134bc809400d5bbd7373a0053bbcacf6842cbb32 Mon Sep 17 00:00:00 2001
From: hcz <hczhcz@users.noreply.github.com>
Date: Thu, 31 Oct 2019 14:30:11 +0800
Subject: [PATCH 102/222] Create array_fill.xml

---
 dbms/tests/performance/array_fill.xml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 dbms/tests/performance/array_fill.xml

diff --git a/dbms/tests/performance/array_fill.xml b/dbms/tests/performance/array_fill.xml
new file mode 100644
index 00000000000..c4c0955dfc6
--- /dev/null
+++ b/dbms/tests/performance/array_fill.xml
@@ -0,0 +1,20 @@
+<test>
+    <type>once</type>
+
+    <stop_conditions>
+        <all_of>
+            <total_time_ms>10000</total_time_ms>
+        </all_of>
+    </stop_conditions>
+
+    <main_metric>
+        <min_time/>
+    </main_metric>
+
+    <query>SELECT arraySlice(arrayFill(x -> ((x % 2) >= 0), range(100000000)), 1, 10)</query>
+    <query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) >= 0), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10)</query>
+    <query>SELECT arraySlice(arrayFill(x -> ((x % 2) >= 2), range(100000000)), 1, 10)</query>
+    <query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) >= 2), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10)</query>
+    <query>SELECT arraySlice(arrayFill(x -> ((x % 2) = 0), range(100000000)), 1, 10)</query>
+    <query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) = 0), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10)</query>
+</test>

From 417c00870084976f2e1dc715f90d35592a20d514 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 31 Oct 2019 11:29:30 +0300
Subject: [PATCH 103/222] Update 01021_create_as_select.sql

---
 dbms/tests/queries/0_stateless/01021_create_as_select.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/tests/queries/0_stateless/01021_create_as_select.sql b/dbms/tests/queries/0_stateless/01021_create_as_select.sql
index 4d96d36c7ca..684350e1876 100644
--- a/dbms/tests/queries/0_stateless/01021_create_as_select.sql
+++ b/dbms/tests/queries/0_stateless/01021_create_as_select.sql
@@ -1,3 +1,4 @@
+DROP TABLE IF EXISTS create_as_select_01021;
 CREATE TABLE create_as_select_01021 engine=Memory AS (SELECT (1, 1));
 SELECT * FROM create_as_select_01021;
 DROP TABLE create_as_select_01021;

From 272608762ee485a863f522af4a375ebdc71d4d9f Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Thu, 31 Oct 2019 11:45:14 +0300
Subject: [PATCH 104/222] Fix comment.

---
 dbms/src/Interpreters/Context.h                       |  2 +-
 .../integration/test_settings_constraints/test.py     | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index 85362d01ce7..da4566e6b2c 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -323,7 +323,7 @@ public:
     void checkSettingsConstraints(const SettingChange & change);
     void checkSettingsConstraints(const SettingsChanges & changes);
 
-    /// Returns the currently constraints (can returns null).
+    /// Returns the current constraints (can return null).
     std::shared_ptr<const SettingsConstraints> getSettingsConstraints() const { return settings_constraints; }
 
     const EmbeddedDictionaries & getEmbeddedDictionaries() const;
diff --git a/dbms/tests/integration/test_settings_constraints/test.py b/dbms/tests/integration/test_settings_constraints/test.py
index 4cb8b2f2553..8771738e6b5 100644
--- a/dbms/tests/integration/test_settings_constraints/test.py
+++ b/dbms/tests/integration/test_settings_constraints/test.py
@@ -18,11 +18,16 @@ def started_cluster():
         cluster.shutdown()
 
 
-def test_system_tables(started_cluster):
-    assert instance.query("SELECT name, value, min, max, readonly from system.settings WHERE name = 'force_index_by_date' OR name = 'max_memory_usage' ORDER BY name") ==\
-           "force_index_by_date\t0\t\\N\t\\N\t1\n"\
+def test_system_settings(started_cluster):
+    assert instance.query("SELECT name, value, min, max, readonly from system.settings WHERE name = 'force_index_by_date'") ==\
+           "force_index_by_date\t0\t\\N\t\\N\t1\n"
+
+    assert instance.query("SELECT name, value, min, max, readonly from system.settings WHERE name = 'max_memory_usage'") ==\
            "max_memory_usage\t10000000000\t5000000000\t20000000000\t0\n"
 
+    assert instance.query("SELECT name, value, min, max, readonly from system.settings WHERE name = 'readonly'") ==\
+           "readonly\t0\t\\N\t\\N\t0\n"
+
 
 def test_read_only_constraint(started_cluster):
     # Change a setting for session with SET.

From a38124c68e67707cd4f0765b795a384e5b5d031c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 31 Oct 2019 14:32:24 +0300
Subject: [PATCH 105/222] Review fixes.

---
 .../MergeTreeBaseSelectProcessor.cpp          | 53 +++++++++++++------
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  2 +
 .../MergeTree/MergeTreeRangeReader.cpp        |  4 +-
 dbms/src/Storages/MergeTree/MergeTreeReader.h |  1 +
 .../MergeTree/StorageFromMergeTreeDataPart.h  |  1 +
 5 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 44a4e939565..474e51601d9 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -210,8 +210,17 @@ Chunk MergeTreeBaseSelectProcessor::readFromPart()
 }
 
 
-template <typename InsertCallback>
-static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, MergeTreeReadTask * task, const Names & virtual_columns)
+namespace
+{
+    struct VirtualColumnsInserter
+    {
+        virtual void insertStringColumn(const ColumnPtr & column, const String & name) = 0;
+        virtual void insertUInt64Column(const ColumnPtr & column, const String & name) = 0;
+    };
+}
+
+static void injectVirtualColumnsImpl(size_t rows, VirtualColumnsInserter & inserter,
+                                     MergeTreeReadTask * task, const Names & virtual_columns)
 {
     /// add virtual columns
     /// Except _sample_factor, which is added from the outside.
@@ -231,7 +240,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeString().createColumn();
 
-                callback.template operator()<DataTypeString>(column, virtual_column_name);
+                inserter.insertStringColumn(column, virtual_column_name);
             }
             else if (virtual_column_name == "_part_index")
             {
@@ -241,7 +250,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeUInt64().createColumn();
 
-                callback.template operator()<DataTypeUInt64>(column, virtual_column_name);
+                inserter.insertUInt64Column(column, virtual_column_name);
             }
             else if (virtual_column_name == "_partition_id")
             {
@@ -251,7 +260,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
                 else
                     column = DataTypeString().createColumn();
 
-                callback.template operator()<DataTypeString>(column, virtual_column_name);
+                inserter.insertStringColumn(column, virtual_column_name);
             }
         }
     }
@@ -259,21 +268,33 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer
 
 namespace
 {
-    struct InsertIntoBlockCallback
+    struct VirtualColumnsInserterIntoBlock : public VirtualColumnsInserter
     {
-        template <typename DataType>
-        void operator()(const ColumnPtr & column, const String & name)
+        explicit VirtualColumnsInserterIntoBlock(Block & block_) : block(block_) {}
+
+        void insertStringColumn(const ColumnPtr & column, const String & name) final
         {
-            block.insert({column, std::make_shared<DataType>(), name});
+            block.insert({column, std::make_shared<DataTypeString>(), name});
+        }
+
+        void insertUInt64Column(const ColumnPtr & column, const String & name) final
+        {
+            block.insert({column, std::make_shared<DataTypeUInt64>(), name});
         }
 
         Block & block;
     };
 
-    struct InsertIntoColumnsCallback
+    struct VirtualColumnsInserterIntoColumns : public VirtualColumnsInserter
     {
-        template <typename>
-        void operator()(const ColumnPtr & column, const String &)
+        explicit VirtualColumnsInserterIntoColumns(Columns & columns_) : columns(columns_) {}
+
+        void insertStringColumn(const ColumnPtr & column, const String &) final
+        {
+            columns.push_back(column);
+        }
+
+        void insertUInt64Column(const ColumnPtr & column, const String &) final
         {
             columns.push_back(column);
         }
@@ -284,8 +305,8 @@ namespace
 
 void MergeTreeBaseSelectProcessor::injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns)
 {
-    InsertIntoBlockCallback callback { block };
-    injectVirtualColumnsImpl(block.rows(), callback, task, virtual_columns);
+    VirtualColumnsInserterIntoBlock inserter { block };
+    injectVirtualColumnsImpl(block.rows(), inserter, task, virtual_columns);
 }
 
 void MergeTreeBaseSelectProcessor::injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns)
@@ -293,8 +314,8 @@ void MergeTreeBaseSelectProcessor::injectVirtualColumns(Chunk & chunk, MergeTree
     UInt64 num_rows = chunk.getNumRows();
     auto columns = chunk.detachColumns();
 
-    InsertIntoColumnsCallback callback { columns };
-    injectVirtualColumnsImpl(num_rows, callback, task, virtual_columns);
+    VirtualColumnsInserterIntoColumns inserter { columns };
+    injectVirtualColumnsImpl(num_rows, inserter, task, virtual_columns);
 
     chunk.setColumns(columns, num_rows);
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 42648bce692..60ed25ed43c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1103,6 +1103,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     for (size_t i = 0; i < sort_columns_size; ++i)
         sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
 
+    /// Converts pipes to BlockInputsStreams.
+    /// It is temporary, till not all merging streams are implemented as processors.
     auto streams_to_merge = [&pipes]()
     {
         size_t num_streams = pipes.size();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index ec554d72339..d03160d7ec2 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -504,8 +504,10 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
 
         bool has_columns = false;
         for (auto & column : columns)
+        {
             if (column)
                 has_columns = true;
+        }
 
         bool should_evaluate_missing_defaults = false;
 
@@ -636,7 +638,7 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t &
         return columns;
     }
 
-    columns.resize(merge_tree_reader->getColumns().size());
+    columns.resize(merge_tree_reader->numColumnsInResult());
 
     auto & rows_per_granule = result.rowsPerGranule();
     auto & started_ranges = result.startedRanges();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h
index a690e56155a..140fbcb51b0 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeReader.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h
@@ -45,6 +45,7 @@ public:
     void evaluateMissingDefaults(Block additional_columns, Columns & res_columns);
 
     const NamesAndTypesList & getColumns() const { return columns; }
+    size_t numColumnsInResult() const { return columns.size(); }
 
     /// Return the number of rows has been read or zero if there is no columns to read.
     /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark.
diff --git a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
index 506a8cc3298..6865cc956fa 100644
--- a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
+++ b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
@@ -32,6 +32,7 @@ public:
         auto pipes = MergeTreeDataSelectExecutor(part->storage).readFromParts(
                 {part}, column_names, query_info, context, max_block_size, num_streams);
 
+        /// Wrap processors to BlockInputStreams. It is temporary. Will be changed to processors interface later.
         BlockInputStreams streams;
         streams.reserve(pipes.size());
 

From 18378417dfe8f0baa6f5d9bbdd51db2898e8dec0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 31 Oct 2019 14:48:35 +0300
Subject: [PATCH 106/222] Fix build.

---
 dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index 2cebdd70cc6..335745ea515 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2972,7 +2972,7 @@ ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMerg
     return max_added_blocks;
 }
 
-Pipes StorageReplicatedMergeTree::read(
+Pipes StorageReplicatedMergeTree::readWithProcessors(
     const Names & column_names,
     const SelectQueryInfo & query_info,
     const Context & context,

From f44cee623a37f3531991806cf18682a4b0c9d232 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Thu, 31 Oct 2019 15:22:25 +0300
Subject: [PATCH 107/222] Made mutation to choose the same disk in
 `ReplicatedMergeTree`.

---
 dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index fc6e8ab2676..b9e4a8c1389 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1139,7 +1139,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM
     MutationCommands commands = queue.getMutationCommands(source_part, new_part_info.mutation);
 
     /// Can throw an exception.
-    DiskSpace::ReservationPtr reserved_space = reserveSpace(estimated_space_for_result);
+    DiskSpace::ReservationPtr reserved_space = source_part->disk->reserveSpace(estimated_space_for_result);
 
     auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY);
 

From be1ccaa890e0ec5f19983483e523eec1f2b7c5f9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 31 Oct 2019 16:18:21 +0300
Subject: [PATCH 108/222] Fix clang build.

---
 dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 474e51601d9..af43a0b8a6f 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -212,8 +212,11 @@ Chunk MergeTreeBaseSelectProcessor::readFromPart()
 
 namespace
 {
+    /// Simple interfaces to insert virtual columns.
     struct VirtualColumnsInserter
     {
+        virtual ~VirtualColumnsInserter() = default;
+
         virtual void insertStringColumn(const ColumnPtr & column, const String & name) = 0;
         virtual void insertUInt64Column(const ColumnPtr & column, const String & name) = 0;
     };

From 0dea6f5458ad609b1723dcc693476e96cbd5169a Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Thu, 31 Oct 2019 16:19:38 +0300
Subject: [PATCH 109/222] Added comment for selecting disk for mutation.

---
 dbms/src/Storages/StorageReplicatedMergeTree.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index b9e4a8c1389..7159ffb428f 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1139,6 +1139,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM
     MutationCommands commands = queue.getMutationCommands(source_part, new_part_info.mutation);
 
     /// Can throw an exception.
+    /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks.
     DiskSpace::ReservationPtr reserved_space = source_part->disk->reserveSpace(estimated_space_for_result);
 
     auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY);

From faf1b070ac982f51aa0fc0efac1e3ad4c0d9416e Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Thu, 31 Oct 2019 16:26:11 +0300
Subject: [PATCH 110/222] Typo was fixed.

---
 dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index 7159ffb428f..d0be44eb839 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1140,7 +1140,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM
 
     /// Can throw an exception.
     /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks.
-    DiskSpace::ReservationPtr reserved_space = source_part->disk->reserveSpace(estimated_space_for_result);
+    DiskSpace::ReservationPtr reserved_space = source_part->disk->reserve(estimated_space_for_result);
 
     auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY);
 

From 0bb00496a818c10331e4c7a90f95de148982ea23 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 31 Oct 2019 16:32:18 +0300
Subject: [PATCH 111/222] Changelog for 19.16.2.2

---
 CHANGELOG.md | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6a8c32b588a..be9e59bc968 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,248 @@
+## ClickHouse release v19.16.2.2, 30-10-2019
+
+### Backward Incompatible Change
+* Add missing arity validation for count/counIf.
+  [#7095](https://github.com/ClickHouse/ClickHouse/issues/7095)
+[#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Vdimir](https://github.com/Vdimir))
+* Remove legacy `asterisk_left_columns_only` setting (it was disabled by default).
+  [#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Artem
+Zuikov](https://github.com/4ertus2))
+
+### New Feature
+* Introduce uniqCombined64() to calculate cardinality greater than UINT_MAX.
+  [#7213](https://github.com/ClickHouse/ClickHouse/pull/7213),
+[#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Azat
+Khuzhin](https://github.com/azat))
+* Support Bloom filter indexes on Array columns.
+  [#6984](https://github.com/ClickHouse/ClickHouse/pull/6984)
+([achimbab](https://github.com/achimbab))
+* Add a function `getMacro(name)` that returns String with the value of corresponding `<macros>`
+  from server configuration.  [#7240](https://github.com/ClickHouse/ClickHouse/pull/7240)
+([alexey-milovidov](https://github.com/alexey-milovidov))
+* Set two configuration options for a dictionary based on an HTTP source: `credentials` and
+  `http-headers`. [#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Guillaume
+Tassery](https://github.com/YiuRULE))
+* Add a new ProfileEvent `Merge` that counts the number of launched background merges.
+  [#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Mikhail
+Korotov](https://github.com/millb))
+* Add fullHostName function that returns a fully qualified domain name.
+  [#7263](https://github.com/ClickHouse/ClickHouse/issues/7263)
+[#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([sundyli](https://github.com/sundy-li))
+* Add function `arraySplit` and `arrayReverseSplit` which split an array by "cut off"
+  conditions. They are useful in time sequence handling.
+[#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz))
+* Add new functions that return the Array of all matched indices in multiMatch family of functions.
+  [#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Danila
+Kutenin](https://github.com/danlark1))
+* Add a new database engine `Lazy` that is optimized for storing a large number of small -Log
+  tables.  [#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Nikita
+Vasilev](https://github.com/nikvas0))
+* Add AND, OR, XOR aggregate function for bitmap columns. [#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang
+Yu](https://github.com/yuzhichang))
+* Add aggregate function combinators which fill null or default value when there is nothing to
+  aggregate. [#7331](https://github.com/ClickHouse/ClickHouse/pull/7331)
+([hcz](https://github.com/hczhcz))
+
+### Bug Fix
+* Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is
+  used. [#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Anton
+Popov](https://github.com/CurtizJ))
+* Disabled MariaDB authentication plugin, which depends on files outside of project.
+  [#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Yuriy
+Baranov](https://github.com/yurriy))
+* Fix exception `Cannot convert column ... because it is constant but values of constants are
+  different in source and result` which could rarely happen when functions `now()`, `today()`,
+`yestarday()`, `randConstant()` are used.
+[#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Nikolai
+Kochetov](https://github.com/KochetovNicolai))
+* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout.
+  [#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily
+Nemkov](https://github.com/Enmk))
+* Fixed [#7109](https://github.com/ClickHouse/ClickHouse/issues/7109) groupBitmapOr(31) segfault.
+  [#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Zhichang
+Yu](https://github.com/yuzhichang))
+* For materialized views the commit for Kafka is called after all data were written.
+  [#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7))
+* Fixed wrong `duration_ms` value in `system.part_log` table. It was ten times off.
+  [#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Vladimir
+Chebotarev](https://github.com/excitoon))
+* A quick fix to resolve crash in LIVE VIEW table and re-enabling all LIVE VIEW tests.
+  [#7201](https://github.com/ClickHouse/ClickHouse/pull/7201)
+([vzakaznikov](https://github.com/vzakaznikov))
+* Serialize NULL values correctly in min/max indexes of MergeTree parts.
+  [#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander
+Kuzmenkov](https://github.com/akuzm))
+* Don't put virtual columns to .sql metadata when table is created as `CREATE TABLE AS`.
+  [#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Ivan](https://github.com/abyss7))
+* Fix segmentation fault in `ATTACH PART` query.
+  [#7185](https://github.com/ClickHouse/ClickHouse/pull/7185)
+([alesapin](https://github.com/alesapin))
+* Fix wrong result for some queries given by the optimization of empty IN subqueries and empty
+  INNER/RIGHT JOIN. [#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Nikolai
+Kochetov](https://github.com/KochetovNicolai))
+
+### Improvement
+* Add a message in case of queue_wait_max_ms wait takes place.
+  [#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat
+Khuzhin](https://github.com/azat))
+* Minor improvements of Template format. [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118)
+  ([tavplubix](https://github.com/tavplubix))
+* Made setting `s3_min_upload_part_size` table-level.
+  [#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Vladimir
+Chebotarev](https://github.com/excitoon))
+* Check TTL in StorageFactory. [#7304](https://github.com/ClickHouse/ClickHouse/pull/7304)
+  ([sundyli](https://github.com/sundy-li))
+* Squash left-hand blocks in partial merge join (optimization).
+  [#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Artem
+Zuikov](https://github.com/4ertus2))
+* Do not allow non-deterministic functions in mutations of Replicated table engines, because this
+  can introduce inconsistencies between replicas.
+[#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Alexander
+Kazakov](https://github.com/Akazz))
+* Disable memory tracker while converting exception stack trace to string. It can prevent the loss
+  of error messages of type `Memory limit exceeded` on server, which caused the `Attempt to read
+after eof` exception on client. [#7264](https://github.com/ClickHouse/ClickHouse/pull/7264)
+([Nikolai Kochetov](https://github.com/KochetovNicolai))
+* Miscellaneous format improvements. Resolves
+  [#6033](https://github.com/ClickHouse/ClickHouse/issues/6033),
+[#2633](https://github.com/ClickHouse/ClickHouse/issues/2633),
+[#6611](https://github.com/ClickHouse/ClickHouse/issues/6611),
+[#6742](https://github.com/ClickHouse/ClickHouse/issues/6742)
+[#7215](https://github.com/ClickHouse/ClickHouse/pull/7215)
+([tavplubix](https://github.com/tavplubix))
+* ClickHouse ignores values on the right side of IN operator that are not convertible to the left
+  side type. Make it work properly for compound types -- Array and Tuple.
+[#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Alexander
+Kuzmenkov](https://github.com/akuzm))
+* Support missing inequalities for ASOF JOIN. It's possible to join less-or-equal variant and strict
+  greater and less variants for ASOF column in ON syntax.
+[#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Artem
+Zuikov](https://github.com/4ertus2))
+* Optimize partial merge join. [#7070](https://github.com/ClickHouse/ClickHouse/pull/7070)
+  ([Artem Zuikov](https://github.com/4ertus2))
+* Do not use more then 98K of memory in uniqCombined functions.
+  [#7236](https://github.com/ClickHouse/ClickHouse/pull/7236),
+[#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Azat
+Khuzhin](https://github.com/azat))
+* Flush parts of right-hand joining table on disk in PartialMergeJoin (if there is not enough
+  memory). Load data back when needed.  [#7186](https://github.com/ClickHouse/ClickHouse/pull/7186)
+([Artem Zuikov](https://github.com/4ertus2))
+
+### Performance Improvement
+* Speed up joinGet with const arguments by avoiding data duplication.
+  [#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Amos
+Bird](https://github.com/amosbird))
+* Return early if the subquery is empty.
+  [#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu))
+* Optimize parsing of SQL expression in Values.
+  [#6781](https://github.com/ClickHouse/ClickHouse/pull/6781)
+([tavplubix](https://github.com/tavplubix))
+
+### Build/Testing/Packaging Improvement
+* Disable some contribs for cross-compilation.
+  [#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Ivan](https://github.com/abyss7))
+* Add missing linking with PocoXML for clickhouse_common_io.
+  [#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat
+Khuzhin](https://github.com/azat))
+* Accept multiple test filter arguments in clickhouse-test.
+  [#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Alexander
+Kuzmenkov](https://github.com/akuzm))
+* Enable musl and jemalloc for ARM.  [#7300](https://github.com/ClickHouse/ClickHouse/pull/7300)
+  ([Amos Bird](https://github.com/amosbird))
+* Added `--client-option` parameter to `clickhouse-test` to pass additional parameters to client.
+  [#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Nikolai
+Kochetov](https://github.com/KochetovNicolai))
+* Preserve existing configs on rpm package upgrade.
+  [#7103](https://github.com/ClickHouse/ClickHouse/pull/7103)
+([filimonov](https://github.com/filimonov))
+* Fix errors detected by PVS. [#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Artem
+  Zuikov](https://github.com/4ertus2))
+* Fix build for Darwin. [#7149](https://github.com/ClickHouse/ClickHouse/pull/7149)
+  ([Ivan](https://github.com/abyss7))
+* glibc 2.29 compatibility. [#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Amos
+  Bird](https://github.com/amosbird))
+* Make sure dh_clean does not touch potential source files.
+  [#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Amos
+Bird](https://github.com/amosbird))
+* Attempt to avoid conflict when updating from altinity rpm - it has config file packaged separately
+  in clickhouse-server-common.  [#7073](https://github.com/ClickHouse/ClickHouse/pull/7073)
+([filimonov](https://github.com/filimonov))
+* Optimize some header files for faster rebuilds.
+  [#7212](https://github.com/ClickHouse/ClickHouse/pull/7212),
+[#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Alexander
+Kuzmenkov](https://github.com/akuzm))
+* Add performance tests for Date and DateTime. [#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Vasily
+  Nemkov](https://github.com/Enmk))
+* Fix some tests that contained non-deterministic mutations.
+  [#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Alexander
+Kazakov](https://github.com/Akazz))
+* Add build with MemorySanitizer to CI.  [#7066](https://github.com/ClickHouse/ClickHouse/pull/7066)
+  ([Alexander Kuzmenkov](https://github.com/akuzm))
+* Avoid use of uninitialized values in MetricsTransmitter.
+  [#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat
+Khuzhin](https://github.com/azat))
+* Fix some issues in Fields found by MemorySanitizer.
+  [#7135](https://github.com/ClickHouse/ClickHouse/pull/7135),
+[#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Alexander
+Kuzmenkov](https://github.com/akuzm)), [#7376](https://github.com/ClickHouse/ClickHouse/pull/7376)
+([Amos Bird](https://github.com/amosbird))
+* Fix undefined behavior in murmurhash32. [#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Amos
+  Bird](https://github.com/amosbird))
+* Fix undefined behavior in StoragesInfoStream. [#7384](https://github.com/ClickHouse/ClickHouse/pull/7384)
+  ([tavplubix](https://github.com/tavplubix))
+* Fixed constant expressions folding for external database engines (MySQL, ODBC, JDBC). In previous
+  versions it wasn't working for multiple constant expressions and was not working at all for Date,
+DateTime and UUID. This fixes [#7245](https://github.com/ClickHouse/ClickHouse/issues/7245)
+[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252)
+([alexey-milovidov](https://github.com/alexey-milovidov))
+* Fixing AddressSanitizer error in the LIVE VIEW getHeader() method.
+  [#7271](https://github.com/ClickHouse/ClickHouse/pull/7271)
+([vzakaznikov](https://github.com/vzakaznikov))
+* Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws
+  an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and
+fix comments to make obvious that it may throw.
+[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350)
+([tavplubix](https://github.com/tavplubix))
+* Fixing ThreadSanitizer data race error in the LIVE VIEW when accessing no_users_thread variable.
+  [#7353](https://github.com/ClickHouse/ClickHouse/pull/7353)
+([vzakaznikov](https://github.com/vzakaznikov))
+* Get rid of malloc symbols in libcommon
+  [#7134](https://github.com/ClickHouse/ClickHouse/pull/7134),
+[#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Amos
+Bird](https://github.com/amosbird))
+* Add global flag ENABLE_LIBRARIES for disabling all libraries.
+  [#7063](https://github.com/ClickHouse/ClickHouse/pull/7063)
+([proller](https://github.com/proller))
+
+### Code cleanup
+* Generalize configuration repository to prepare for DDL for Dictionaries.  [#7155](https://github.com/ClickHouse/ClickHouse/pull/7155)
+  ([alesapin](https://github.com/alesapin))
+* Parser for dictionaries DDL without any semantic.
+  [#7209](https://github.com/ClickHouse/ClickHouse/pull/7209)
+([alesapin](https://github.com/alesapin))
+* Split ParserCreateQuery into different smaller parsers.
+  [#7253](https://github.com/ClickHouse/ClickHouse/pull/7253)
+([alesapin](https://github.com/alesapin))
+* Small refactoring and renaming near external dictionaries.
+  [#7111](https://github.com/ClickHouse/ClickHouse/pull/7111)
+([alesapin](https://github.com/alesapin))
+* Refactor some code to prepare for role-based access control. [#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Vitaly
+  Baranov](https://github.com/vitlibar))
+* Some improvements in DatabaseOrdinary code.
+  [#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Nikita
+Vasilev](https://github.com/nikvas0))
+* Do not use iterators in find() and emplace() methods of hash tables.
+[#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Alexander
+Kuzmenkov](https://github.com/akuzm))
+* Fix getMultipleValuesFromConfig in case when parameter root is not empty. [#7374](https://github.com/ClickHouse/ClickHouse/pull/7374)
+([Mikhail Korotov](https://github.com/millb))
+* Remove some copy-paste (TemporaryFile and TemporaryFileStream)
+  [#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Artem
+Zuikov](https://github.com/4ertus2))
+* Improved code readability a little bit (`MergeTreeData::getActiveContainingPart`).
+  [#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Vladimir
+Chebotarev](https://github.com/excitoon))
+
 ## ClickHouse release 19.15.2.2, 2019-10-01
 
 ### New Feature

From 4aff9a20c66dde62d0d7c18627c78551c0d2465f Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 31 Oct 2019 23:22:48 +0800
Subject: [PATCH 112/222] Better parser

---
 dbms/src/Parsers/ExpressionElementParsers.cpp |  9 ++++++-
 dbms/src/Parsers/ExpressionElementParsers.h   |  3 +--
 dbms/src/Parsers/ExpressionListParsers.cpp    |  2 +-
 dbms/src/Parsers/ExpressionListParsers.h      | 27 ++++++++++---------
 .../0_stateless/01021_tuple_parser.reference  |  2 ++
 .../0_stateless/01021_tuple_parser.sql        |  5 ++++
 6 files changed, 32 insertions(+), 16 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/01021_tuple_parser.reference
 create mode 100644 dbms/tests/queries/0_stateless/01021_tuple_parser.sql

diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp
index 02be6bebd92..89793a5042d 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.cpp
+++ b/dbms/src/Parsers/ExpressionElementParsers.cpp
@@ -81,6 +81,13 @@ bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected &
     if (!contents.parse(pos, contents_node, expected))
         return false;
 
+    bool is_elem = true;
+    if (pos->type == TokenType::Comma)
+    {
+        is_elem = false;
+        ++pos;
+    }
+
     if (pos->type != TokenType::ClosingRoundBracket)
         return false;
     ++pos;
@@ -94,7 +101,7 @@ bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected &
         return false;
     }
 
-    if (expr_list.children.size() == 1)
+    if (expr_list.children.size() == 1 && is_elem)
     {
         node = expr_list.children.front();
     }
diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h
index 452503f8e4d..aab2cefbc2a 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.h
+++ b/dbms/src/Parsers/ExpressionElementParsers.h
@@ -275,8 +275,7 @@ class ParserWithOptionalAlias : public IParserBase
 {
 public:
     ParserWithOptionalAlias(ParserPtr && elem_parser_, bool allow_alias_without_as_keyword_)
-    : elem_parser(std::move(elem_parser_)), allow_alias_without_as_keyword(allow_alias_without_as_keyword_)
-    {}
+    : elem_parser(std::move(elem_parser_)), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {}
 protected:
     ParserPtr elem_parser;
     bool allow_alias_without_as_keyword;
diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp
index 1cded1b4a7e..6d33368d88b 100644
--- a/dbms/src/Parsers/ExpressionListParsers.cpp
+++ b/dbms/src/Parsers/ExpressionListParsers.cpp
@@ -560,7 +560,7 @@ bool ParserOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected &
 bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr node_comp;
-    if (!ParserComparisonExpression{}.parse(pos, node_comp, expected))
+    if (!elem_parser.parse(pos, node_comp, expected))
         return false;
 
     ParserKeyword s_is{"IS"};
diff --git a/dbms/src/Parsers/ExpressionListParsers.h b/dbms/src/Parsers/ExpressionListParsers.h
index 6a61afc5cdd..4fbee507f2d 100644
--- a/dbms/src/Parsers/ExpressionListParsers.h
+++ b/dbms/src/Parsers/ExpressionListParsers.h
@@ -115,18 +115,6 @@ protected:
 };
 
 
-class ParserTupleElementExpression : public IParserBase
-{
-private:
-    static const char * operators[];
-
-protected:
-    const char * getName() const { return "tuple element expression"; }
-
-    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
-};
-
-
 class ParserArrayElementExpression : public IParserBase
 {
 private:
@@ -139,6 +127,18 @@ protected:
 };
 
 
+class ParserTupleElementExpression : public IParserBase
+{
+private:
+    static const char * operators[];
+
+protected:
+    const char * getName() const { return "tuple element expression"; }
+
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
+};
+
+
 class ParserUnaryMinusExpression : public IParserBase
 {
 private:
@@ -241,6 +241,9 @@ protected:
   */
 class ParserNullityChecking : public IParserBase
 {
+private:
+    ParserComparisonExpression elem_parser;
+
 protected:
     const char * getName() const override { return "nullity checking"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/dbms/tests/queries/0_stateless/01021_tuple_parser.reference b/dbms/tests/queries/0_stateless/01021_tuple_parser.reference
new file mode 100644
index 00000000000..4cfc8d73bd8
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01021_tuple_parser.reference
@@ -0,0 +1,2 @@
+Tuple(UInt8)	(1)
+SELECT tuple(1)
diff --git a/dbms/tests/queries/0_stateless/01021_tuple_parser.sql b/dbms/tests/queries/0_stateless/01021_tuple_parser.sql
new file mode 100644
index 00000000000..e00ec799440
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01021_tuple_parser.sql
@@ -0,0 +1,5 @@
+SELECT toTypeName((1,)), (1,);
+
+SET enable_debug_queries = 1;
+
+ANALYZE SELECT (1,)

From e04cdd4ebb1288739b74d365c79da3c5d2487d19 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 28 Oct 2019 02:12:40 +0800
Subject: [PATCH 113/222] Early constant folding.

---
 dbms/src/Columns/ColumnSet.h                  |   4 +
 dbms/src/DataTypes/DataTypeSet.h              |   3 +
 dbms/src/Functions/in.cpp                     |   6 +-
 dbms/src/Interpreters/ActionsVisitor.cpp      |  36 +--
 dbms/src/Interpreters/ActionsVisitor.h        |   2 +-
 dbms/src/Interpreters/ExpressionActions.cpp   |   4 +-
 .../Interpreters/InterpreterSelectQuery.cpp   | 206 ++++++++++++------
 .../src/Interpreters/InterpreterSelectQuery.h |   6 +-
 ..._transform_query_for_external_database.cpp |   2 +-
 .../00597_push_down_predicate.reference       |   6 +-
 .../01029_early_constant_folding.reference    |   5 +
 .../01029_early_constant_folding.sql          |  15 ++
 12 files changed, 209 insertions(+), 86 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/01029_early_constant_folding.reference
 create mode 100644 dbms/tests/queries/0_stateless/01029_early_constant_folding.sql

diff --git a/dbms/src/Columns/ColumnSet.h b/dbms/src/Columns/ColumnSet.h
index 83ab6de5578..b30ba86fafe 100644
--- a/dbms/src/Columns/ColumnSet.h
+++ b/dbms/src/Columns/ColumnSet.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Columns/IColumnDummy.h>
+#include <Core/Field.h>
 
 
 namespace DB
@@ -28,6 +29,9 @@ public:
 
     ConstSetPtr getData() const { return data; }
 
+    // Used only for debugging, making it DUMPABLE
+    Field operator[](size_t) const override { return {}; }
+
 private:
     ConstSetPtr data;
 };
diff --git a/dbms/src/DataTypes/DataTypeSet.h b/dbms/src/DataTypes/DataTypeSet.h
index 59ed70e4c35..21ab50a7c88 100644
--- a/dbms/src/DataTypes/DataTypeSet.h
+++ b/dbms/src/DataTypes/DataTypeSet.h
@@ -17,6 +17,9 @@ public:
     TypeIndex getTypeId() const override { return TypeIndex::Set; }
     bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
     bool isParametric() const override { return true; }
+
+    // Used only for debugging, making it DUMPABLE
+    Field getDefault() const override { return Tuple(); }
 };
 
 }
diff --git a/dbms/src/Functions/in.cpp b/dbms/src/Functions/in.cpp
index fa3a7652e9e..9bc782d2933 100644
--- a/dbms/src/Functions/in.cpp
+++ b/dbms/src/Functions/in.cpp
@@ -73,13 +73,17 @@ public:
         return std::make_shared<DataTypeUInt8>();
     }
 
+    bool useDefaultImplementationForConstants() const override { return true; }
+
     void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
     {
         /// NOTE: after updating this code, check that FunctionIgnoreExceptNull returns the same type of column.
 
         /// Second argument must be ColumnSet.
         ColumnPtr column_set_ptr = block.getByPosition(arguments[1]).column;
-        const ColumnSet * column_set = typeid_cast<const ColumnSet *>(&*column_set_ptr);
+        const ColumnSet * column_set = checkAndGetColumnConstData<const ColumnSet>(column_set_ptr.get());
+        if (!column_set)
+            column_set = checkAndGetColumn<const ColumnSet>(column_set_ptr.get());
         if (!column_set)
             throw Exception("Second argument for function '" + getName() + "' must be Set; found " + column_set_ptr->getName(),
                 ErrorCodes::ILLEGAL_COLUMN);
diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp
index c587d1826e1..b5b7991868a 100644
--- a/dbms/src/Interpreters/ActionsVisitor.cpp
+++ b/dbms/src/Interpreters/ActionsVisitor.cpp
@@ -328,10 +328,9 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
         /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything).
         visit(node.arguments->children.at(0), data);
 
-        if (!data.no_subqueries)
+        if ((prepared_set = makeSet(node, data, data.no_subqueries)))
         {
             /// Transform tuple or subquery into a set.
-            prepared_set = makeSet(node, data);
         }
         else
         {
@@ -423,8 +422,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
 
             if (!data.hasColumn(column.name))
             {
-                column.column = ColumnSet::create(1, prepared_set);
-
+                auto column_set = ColumnSet::create(1, prepared_set);
+                /// If prepared_set is not empty, we have a set made with literals.
+                /// Create a const ColumnSet to make constant folding work
+                if (!prepared_set->empty())
+                    column.column = ColumnConst::create(std::move(column_set), 1);
+                else
+                    column.column = std::move(column_set);
                 data.addAction(ExpressionAction::addColumn(column));
             }
 
@@ -542,21 +546,24 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & ast, Data
     data.addAction(ExpressionAction::addColumn(column));
 }
 
-SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data)
+SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries)
 {
     /** You need to convert the right argument to a set.
       * This can be a table name, a value, a value enumeration, or a subquery.
       * The enumeration of values is parsed as a function `tuple`.
       */
     const IAST & args = *node.arguments;
-    const ASTPtr & arg = args.children.at(1);
+    const ASTPtr & left_in_operand = args.children.at(0);
+    const ASTPtr & right_in_operand = args.children.at(1);
     const Block & sample_block = data.getSampleBlock();
 
     /// If the subquery or table name for SELECT.
-    const auto * identifier = arg->as<ASTIdentifier>();
-    if (arg->as<ASTSubquery>() || identifier)
+    const auto * identifier = right_in_operand->as<ASTIdentifier>();
+    if (right_in_operand->as<ASTSubquery>() || identifier)
     {
-        auto set_key = PreparedSetKey::forSubquery(*arg);
+        if (no_subqueries)
+            return {};
+        auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
         if (data.prepared_sets.count(set_key))
             return data.prepared_sets.at(set_key);
 
@@ -579,7 +586,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data)
         }
 
         /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
-        String set_id = arg->getColumnName();
+        String set_id = right_in_operand->getColumnName();
 
         SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];
 
@@ -599,7 +606,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data)
           */
         if (!subquery_for_set.source && data.no_storage_or_local)
         {
-            auto interpreter = interpretSubquery(arg, data.context, data.subquery_depth, {});
+            auto interpreter = interpretSubquery(right_in_operand, data.context, data.subquery_depth, {});
             subquery_for_set.source = std::make_shared<LazyBlockInputStream>(
                 interpreter->getSampleBlock(), [interpreter]() mutable { return interpreter->execute().in; });
 
@@ -637,8 +644,11 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data)
     }
     else
     {
-        /// An explicit enumeration of values in parentheses.
-        return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets);
+        if (sample_block.has(left_in_operand->getColumnName()))
+            /// An explicit enumeration of values in parentheses.
+            return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets);
+        else
+            return {};
     }
 }
 
diff --git a/dbms/src/Interpreters/ActionsVisitor.h b/dbms/src/Interpreters/ActionsVisitor.h
index def72c7ad1c..f48ccc1d88e 100644
--- a/dbms/src/Interpreters/ActionsVisitor.h
+++ b/dbms/src/Interpreters/ActionsVisitor.h
@@ -125,7 +125,7 @@ private:
     static void visit(const ASTFunction & node, const ASTPtr & ast, Data & data);
     static void visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data);
 
-    static SetPtr makeSet(const ASTFunction & node, Data & data);
+    static SetPtr makeSet(const ASTFunction & node, Data & data, bool no_subqueries);
 };
 
 using ActionsVisitor = ActionsMatcher::Visitor;
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index e1e3fa3cedc..f6d065f1025 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <optional>
 #include <Columns/ColumnSet.h>
+#include <Functions/FunctionHelpers.h>
 
 
 namespace ProfileEvents
@@ -1208,7 +1209,8 @@ bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) con
         {
             if (action.type == action.ADD_COLUMN && action.result_name == set_to_check)
             {
-                if (auto * column_set = typeid_cast<const ColumnSet *>(action.added_column.get()))
+                // Constant ColumnSet cannot be empty, so we only need to check non-constant ones.
+                if (auto * column_set = checkAndGetColumn<const ColumnSet>(action.added_column.get()))
                 {
                     if (column_set->getData()->getTotalRowCount() == 0)
                         return true;
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index dc7331f7031..790c55566ae 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -206,6 +206,17 @@ static Context getSubqueryContext(const Context & context)
     return subquery_context;
 }
 
+static void sanitizeBlock(Block & block)
+{
+    for (auto & col : block)
+    {
+        if (!col.column)
+            col.column = col.type->createColumn();
+        else if (isColumnConst(*col.column) && !col.column->empty())
+            col.column = col.column->cloneEmpty();
+    }
+}
+
 InterpreterSelectQuery::InterpreterSelectQuery(
     const ASTPtr & query_ptr_,
     const Context & context_,
@@ -303,81 +314,104 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     if (storage)
         table_lock = storage->lockStructureForShare(false, context.getInitialQueryId());
 
-    syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze(
-        query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage, NamesAndTypesList());
-
-    /// Save scalar sub queries's results in the query context
-    if (context.hasQueryContext())
-        for (const auto & it : syntax_analyzer_result->getScalars())
-            context.getQueryContext().addScalar(it.first, it.second);
-
-    query_analyzer = std::make_unique<SelectQueryExpressionAnalyzer>(
-        query_ptr, syntax_analyzer_result, context,
-        NameSet(required_result_column_names.begin(), required_result_column_names.end()),
-        options.subquery_depth, !options.only_analyze);
-
-    if (!options.only_analyze)
+    auto analyze = [&] ()
     {
-        if (query.sample_size() && (input || !storage || !storage->supportsSampling()))
-            throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED);
+        syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze(
+                query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage, NamesAndTypesList());
 
-        if (query.final() && (input || !storage || !storage->supportsFinal()))
-            throw Exception((!input && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", ErrorCodes::ILLEGAL_FINAL);
+        /// Save scalar sub queries's results in the query context
+        if (context.hasQueryContext())
+            for (const auto & it : syntax_analyzer_result->getScalars())
+                context.getQueryContext().addScalar(it.first, it.second);
 
-        if (query.prewhere() && (input || !storage || !storage->supportsPrewhere()))
-            throw Exception((!input && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", ErrorCodes::ILLEGAL_PREWHERE);
+        query_analyzer = std::make_unique<SelectQueryExpressionAnalyzer>(
+                query_ptr, syntax_analyzer_result, context,
+                NameSet(required_result_column_names.begin(), required_result_column_names.end()),
+                options.subquery_depth, !options.only_analyze);
 
-        /// Save the new temporary tables in the query context
-        for (const auto & it : query_analyzer->getExternalTables())
-            if (!context.tryGetExternalTable(it.first))
-                context.addExternalTable(it.first, it.second);
-    }
-
-    if (!options.only_analyze || options.modify_inplace)
-    {
-        if (syntax_analyzer_result->rewrite_subqueries)
+        if (!options.only_analyze)
         {
-            /// remake interpreter_subquery when PredicateOptimizer rewrites subqueries and main table is subquery
-            if (is_subquery)
-                interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
-                    table_expression,
-                    getSubqueryContext(context),
-                    options.subquery(),
-                    required_columns);
+            if (query.sample_size() && (input || !storage || !storage->supportsSampling()))
+                throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED);
+
+            if (query.final() && (input || !storage || !storage->supportsFinal()))
+                throw Exception((!input && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", ErrorCodes::ILLEGAL_FINAL);
+
+            if (query.prewhere() && (input || !storage || !storage->supportsPrewhere()))
+                throw Exception((!input && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", ErrorCodes::ILLEGAL_PREWHERE);
+
+            /// Save the new temporary tables in the query context
+            for (const auto & it : query_analyzer->getExternalTables())
+                if (!context.tryGetExternalTable(it.first))
+                    context.addExternalTable(it.first, it.second);
         }
-    }
 
-    if (interpreter_subquery)
-    {
-        /// If there is an aggregation in the outer query, WITH TOTALS is ignored in the subquery.
-        if (query_analyzer->hasAggregation())
-            interpreter_subquery->ignoreWithTotals();
-    }
-
-    required_columns = syntax_analyzer_result->requiredSourceColumns();
-
-    if (storage)
-    {
-        source_header = storage->getSampleBlockForColumns(required_columns);
-
-        /// Fix source_header for filter actions.
-        if (context.hasUserProperty(storage->getDatabaseName(), storage->getTableName(), "filter"))
+        if (!options.only_analyze || options.modify_inplace)
         {
-            filter_info = std::make_shared<FilterInfo>();
-            filter_info->column_name = generateFilterActions(filter_info->actions, storage, context, required_columns);
-            source_header = storage->getSampleBlockForColumns(filter_info->actions->getRequiredColumns());
+            if (syntax_analyzer_result->rewrite_subqueries)
+            {
+                /// remake interpreter_subquery when PredicateOptimizer rewrites subqueries and main table is subquery
+                if (is_subquery)
+                    interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
+                            table_expression,
+                            getSubqueryContext(context),
+                            options.subquery(),
+                            required_columns);
+            }
         }
-    }
 
-    /// Calculate structure of the result.
-    result_header = getSampleBlockImpl();
-    for (auto & col : result_header)
+        if (interpreter_subquery)
+        {
+            /// If there is an aggregation in the outer query, WITH TOTALS is ignored in the subquery.
+            if (query_analyzer->hasAggregation())
+                interpreter_subquery->ignoreWithTotals();
+        }
+
+        required_columns = syntax_analyzer_result->requiredSourceColumns();
+
+        if (storage)
+        {
+            source_header = storage->getSampleBlockForColumns(required_columns);
+
+            /// Fix source_header for filter actions.
+            if (context.hasUserProperty(storage->getDatabaseName(), storage->getTableName(), "filter"))
+            {
+                filter_info = std::make_shared<FilterInfo>();
+                filter_info->column_name = generateFilterActions(filter_info->actions, storage, context, required_columns);
+                source_header = storage->getSampleBlockForColumns(filter_info->actions->getRequiredColumns());
+            }
+        }
+
+        /// Calculate structure of the result.
+        result_header = getSampleBlockImpl();
+    };
+
+    analyze();
+
+    bool need_analyze_again = false;
+    if (analysis_result.prewhere_constant_filter_description.always_false || analysis_result.prewhere_constant_filter_description.always_true)
     {
-        if (!col.column)
-            col.column = col.type->createColumn();
-        else if (isColumnConst(*col.column) && !col.column->empty())
-            col.column = col.column->cloneEmpty();
+        auto constant = std::make_shared<ASTLiteral>(0u);
+        if (analysis_result.prewhere_constant_filter_description.always_true)
+            constant->value = 1u;
+        query.setExpression(ASTSelectQuery::Expression::PREWHERE, constant);
+        need_analyze_again = true;
     }
+    if (analysis_result.where_constant_filter_description.always_false || analysis_result.where_constant_filter_description.always_true)
+    {
+        auto constant = std::make_shared<ASTLiteral>(0u);
+        if (analysis_result.where_constant_filter_description.always_true)
+            constant->value = 1u;
+        query.setExpression(ASTSelectQuery::Expression::WHERE, constant);
+        need_analyze_again = true;
+    }
+    if (need_analyze_again)
+        analyze();
+
+    /// Blocks used in expression analysis contains size 1 const columns for constant folding and
+    ///  null non-const columns to avoid useless memory allocations. However, a valid block sample
+    ///  requires all columns to be of size 0, thus we need to sanitize the block here.
+    sanitizeBlock(result_header);
 }
 
 
@@ -476,7 +510,8 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
             context,
             storage,
             options.only_analyze,
-            filter_info
+            filter_info,
+            source_header
         );
 
     if (options.to_stage == QueryProcessingStage::Enum::FetchColumns)
@@ -527,6 +562,22 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
     return analysis_result.final_projection->getSampleBlock();
 }
 
+/// Check if there is an ignore function. It's used for disabling constant folding in query
+///  predicates because some performance tests use ignore function as a non-optimize guard.
+static bool hasIgnore(const ExpressionActions & actions)
+{
+    for (auto & action : actions.getActions())
+    {
+        if (action.type == action.APPLY_FUNCTION && action.function_base)
+        {
+            auto name = action.function_base->getName();
+            if (name == "ignore")
+                return true;
+        }
+    }
+    return false;
+}
+
 InterpreterSelectQuery::AnalysisResult
 InterpreterSelectQuery::analyzeExpressions(
     const ASTSelectQuery & query,
@@ -536,7 +587,8 @@ InterpreterSelectQuery::analyzeExpressions(
     const Context & context,
     const StoragePtr & storage,
     bool only_types,
-    const FilterInfoPtr & filter_info)
+    const FilterInfoPtr & filter_info,
+    const Block & source_header)
 {
     AnalysisResult res;
 
@@ -630,6 +682,16 @@ InterpreterSelectQuery::analyzeExpressions(
             res.prewhere_info = std::make_shared<PrewhereInfo>(
                     chain.steps.front().actions, query.prewhere()->getColumnName());
 
+            if (!hasIgnore(*res.prewhere_info->prewhere_actions))
+            {
+                Block before_prewhere_sample = source_header;
+                sanitizeBlock(before_prewhere_sample);
+                res.prewhere_info->prewhere_actions->execute(before_prewhere_sample);
+                auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
+                /// If the filter column is a constant, record it.
+                if (column_elem.column)
+                    res.prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+            }
             chain.addStep();
         }
 
@@ -650,6 +712,20 @@ InterpreterSelectQuery::analyzeExpressions(
             where_step_num = chain.steps.size() - 1;
             has_where = res.has_where = true;
             res.before_where = chain.getLastActions();
+            if (!hasIgnore(*res.before_where))
+            {
+                Block before_where_sample;
+                if (chain.steps.size() > 1)
+                    before_where_sample = chain.steps[chain.steps.size() - 2].actions->getSampleBlock();
+                else
+                    before_where_sample = source_header;
+                sanitizeBlock(before_where_sample);
+                res.before_where->execute(before_where_sample);
+                auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
+                /// If the filter column is a constant, record it.
+                if (column_elem.column)
+                    res.where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+            }
             chain.addStep();
         }
 
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h
index ce49ce90c62..6d922c6b079 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.h
@@ -14,6 +14,7 @@
 #include <Storages/TableStructureLockHolder.h>
 
 #include <Processors/QueryPipeline.h>
+#include <Columns/FilterDescription.h>
 
 namespace Poco { class Logger; }
 
@@ -171,6 +172,8 @@ private:
         SubqueriesForSets subqueries_for_sets;
         PrewhereInfoPtr prewhere_info;
         FilterInfoPtr filter_info;
+        ConstantFilterDescription prewhere_constant_filter_description;
+        ConstantFilterDescription where_constant_filter_description;
     };
 
     static AnalysisResult analyzeExpressions(
@@ -181,7 +184,8 @@ private:
         const Context & context,
         const StoragePtr & storage,
         bool only_types,
-        const FilterInfoPtr & filter_info);
+        const FilterInfoPtr & filter_info,
+        const Block & source_header);
 
     /** From which table to read. With JOIN, the "left" table is returned.
      */
diff --git a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp
index 797cb677d6a..d615d1fb8ad 100644
--- a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp
+++ b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp
@@ -57,7 +57,7 @@ void check(const std::string & query, const std::string & expected, const Contex
 TEST(TransformQueryForExternalDatabase, InWithSingleElement)
 {
     check("SELECT column FROM test.table WHERE 1 IN (1)",
-          "SELECT \"column\" FROM \"test\".\"table\" WHERE 1 IN (1)",
+          "SELECT \"column\" FROM \"test\".\"table\" WHERE 1",
           state().context, state().columns);
     check("SELECT column FROM test.table WHERE column IN (1, 2)",
           "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" IN (1, 2)",
diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
index c71e5c1cdd9..91c09a73e8e 100644
--- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -11,15 +11,15 @@ SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) USING (a)\nWHERE b = 0
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b\n)\nANY FULL OUTER JOIN \n(\n    SELECT 1 AS a\n) USING (a)\nWHERE b = 0
 -------Need push down-------
-SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n    WHERE toString(value) = \'1\'\n)\nWHERE value = \'1\'
+SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n    WHERE 1\n)\nWHERE 1
 1
-SELECT id\nFROM \n(\n    SELECT 1 AS id\n    WHERE id = 1\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE `2` = 1\n)\nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT 1 AS id\n    WHERE 1\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE 0\n)\nWHERE id = 1
 1
 SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n)\nWHERE id = 1
 1
 SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n)\nWHERE id = 1
 1
-SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE subquery = 1\n)\nWHERE subquery = 1
+SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE 1\n)\nWHERE 1
 1	1
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597\n    HAVING a = 3\n)\nWHERE a = 3
 3	3
diff --git a/dbms/tests/queries/0_stateless/01029_early_constant_folding.reference b/dbms/tests/queries/0_stateless/01029_early_constant_folding.reference
new file mode 100644
index 00000000000..2ea4a6a6357
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01029_early_constant_folding.reference
@@ -0,0 +1,5 @@
+SELECT 1\nWHERE 0
+SELECT 1\nWHERE 1
+SELECT 1\nWHERE 0
+SELECT 1\nWHERE 1 IN (\n(\n    SELECT arrayJoin([1, 2, 3])\n) AS subquery)
+SELECT 1\nWHERE NOT ignore()
diff --git a/dbms/tests/queries/0_stateless/01029_early_constant_folding.sql b/dbms/tests/queries/0_stateless/01029_early_constant_folding.sql
new file mode 100644
index 00000000000..52af4e4d75c
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01029_early_constant_folding.sql
@@ -0,0 +1,15 @@
+SET enable_debug_queries = 1;
+
+-- constant folding
+
+ANALYZE SELECT 1 WHERE 1 = 0;
+
+ANALYZE SELECT 1 WHERE 1 IN (0, 1, 2);
+
+ANALYZE SELECT 1 WHERE 1 IN (0, 2) AND 2 = (SELECT 2);
+
+-- no constant folding
+
+ANALYZE SELECT 1 WHERE 1 IN ((SELECT arrayJoin([1, 2, 3])) AS subquery);
+
+ANALYZE SELECT 1 WHERE NOT ignore();

From 9a5a0a2a3bb328f9d43ab19c7e85ee31328af3ef Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 31 Oct 2019 19:14:06 +0300
Subject: [PATCH 114/222] Fix types for set elements.

---
 dbms/src/Functions/FunctionHelpers.cpp                      | 5 -----
 dbms/src/Interpreters/Set.cpp                               | 6 ++++--
 dbms/src/Interpreters/Set.h                                 | 4 ++++
 .../MergeTree/MergeTreeIndexConditionBloomFilter.cpp        | 6 +++---
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp
index 0ab342b84ba..1edfbfebf78 100644
--- a/dbms/src/Functions/FunctionHelpers.cpp
+++ b/dbms/src/Functions/FunctionHelpers.cpp
@@ -77,11 +77,6 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un
                 const auto & nested_col = checkAndGetColumn<ColumnNullable>(const_column->getDataColumn())->getNestedColumnPtr();
                 res.insert({ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name});
             }
-            else if (auto * low_cardinality = checkAndGetColumn<ColumnLowCardinality>(*col.column))
-            {
-                const auto & low_cardinality_col = low_cardinality->convertToFullColumnIfLowCardinality();
-                res.insert({low_cardinality_col, nested_type, col.name});
-            }
             else
                 throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN);
         }
diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp
index 68c219c3a91..ef3d158c614 100644
--- a/dbms/src/Interpreters/Set.cpp
+++ b/dbms/src/Interpreters/Set.cpp
@@ -106,6 +106,7 @@ void Set::setHeader(const Block & block)
     ColumnRawPtrs key_columns;
     key_columns.reserve(keys_size);
     data_types.reserve(keys_size);
+    set_elements_types.reserve(keys_size);
 
     /// The constant columns to the right of IN are not supported directly. For this, they first materialize.
     Columns materialized_columns;
@@ -116,6 +117,7 @@ void Set::setHeader(const Block & block)
         materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst());
         key_columns.emplace_back(materialized_columns.back().get());
         data_types.emplace_back(block.safeGetByPosition(i).type);
+        set_elements_types.emplace_back(block.safeGetByPosition(i).type);
 
         /// Convert low cardinality column to full.
         if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(data_types.back().get()))
@@ -135,8 +137,8 @@ void Set::setHeader(const Block & block)
         /// Create empty columns with set values in advance.
         /// It is needed because set may be empty, so method 'insertFromBlock' will be never called.
         set_elements.reserve(keys_size);
-        for (const auto & type : data_types)
-            set_elements.emplace_back(removeNullable(type)->createColumn());
+        for (const auto & type : set_elements_types)
+            set_elements.emplace_back(type->createColumn());
     }
 
     /// Choose data structure to use for the set.
diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h
index 987252e37ba..86c82629df9 100644
--- a/dbms/src/Interpreters/Set.h
+++ b/dbms/src/Interpreters/Set.h
@@ -66,6 +66,7 @@ public:
     size_t getTotalByteCount() const { return data.getTotalByteCount(); }
 
     const DataTypes & getDataTypes() const { return data_types; }
+    const DataTypes & getElementsTypes() const { return set_elements_types; }
 
     bool hasExplicitSetElements() const { return fill_set_elements; }
     Columns getSetElements() const { return { set_elements.begin(), set_elements.end() }; }
@@ -99,6 +100,9 @@ private:
       */
     DataTypes data_types;
 
+    /// Types for set_elements.
+    DataTypes set_elements_types;
+
     Logger * log;
 
     /// Limitations on the maximum size of the set
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index dbb9a113244..3c52eb99198 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -36,9 +36,9 @@ PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_t
 ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set)
 {
     if (prepared_set->getDataTypes().size() == 1)
-        return {prepared_set->getSetElements()[0], prepared_set->getDataTypes()[0], "dummy"};
+        return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"};
 
-    return {ColumnTuple::create(prepared_set->getSetElements()), std::make_shared<DataTypeTuple>(prepared_set->getDataTypes()), "dummy"};
+    return {ColumnTuple::create(prepared_set->getSetElements()), std::make_shared<DataTypeTuple>(prepared_set->getElementsTypes()), "dummy"};
 }
 
 bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions)
@@ -242,7 +242,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
 bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
     const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out)
 {
-    const auto & prepared_info = getPreparedSetInfo(prepared_set);
+    const auto prepared_info = getPreparedSetInfo(prepared_set);
     return traverseASTIn(function_name, key_ast, prepared_info.type, prepared_info.column, out);
 }
 

From dbe45800d9066dbebe6b96df7a981bc3a500fb43 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 30 Oct 2019 22:58:19 +0300
Subject: [PATCH 115/222] Do not account memory for Buffer engine in
 max_memory_usage limit

Since background flush will not be accounted there, and it can be too
tricky to calclulate this limit, in case you have multiple materialized
views with Buffer engine.

v2: test adjustment
---
 dbms/src/Storages/StorageBuffer.cpp           |  9 +++----
 ...1019_Buffer_and_max_memory_usage.reference |  0
 .../01019_Buffer_and_max_memory_usage.sql     | 24 +++++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.reference
 create mode 100644 dbms/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql

diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp
index 44f2c466a5f..15e5c0f9d41 100644
--- a/dbms/src/Storages/StorageBuffer.cpp
+++ b/dbms/src/Storages/StorageBuffer.cpp
@@ -265,6 +265,8 @@ static void appendBlock(const Block & from, Block & to)
 
     size_t old_rows = to.rows();
 
+    auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
+
     try
     {
         for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
@@ -282,9 +284,6 @@ static void appendBlock(const Block & from, Block & to)
         /// Rollback changes.
         try
         {
-            /// Avoid "memory limit exceeded" exceptions during rollback.
-            auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
-
             for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
             {
                 ColumnPtr & col_to = to.getByPosition(column_no).column;
@@ -339,7 +338,7 @@ public:
             {
                 LOG_TRACE(storage.log, "Writing block with " << rows << " rows, " << bytes << " bytes directly.");
                 storage.writeBlockToDestination(block, destination);
-             }
+            }
             return;
         }
 
@@ -622,6 +621,8 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl
         return;
     }
 
+    auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
+
     auto insert = std::make_shared<ASTInsertQuery>();
 
     insert->database = destination_database;
diff --git a/dbms/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.reference b/dbms/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql b/dbms/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql
new file mode 100644
index 00000000000..3df6cbf44b9
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01019_Buffer_and_max_memory_usage.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS null_;
+DROP TABLE IF EXISTS buffer_;
+
+CREATE TABLE null_ (key UInt64) Engine=Null();
+CREATE TABLE buffer_ (key UInt64) Engine=Buffer(currentDatabase(), null_,
+    1,    /* num_layers */
+    0,    /* min_time   */
+    86400,/* max_time   */
+    0,    /* min_rows   */
+    100e9,/* max_rows   */
+    0,    /* min_bytes  */
+    20e6  /* max_bytes  */
+);
+
+-- note that there is untracked_memory_limit (4MB) in MemoryTracker
+SET max_memory_usage=10e6;
+
+SET min_insert_block_size_bytes=9e6;
+INSERT INTO buffer_ SELECT toUInt64(number) FROM system.numbers LIMIT 10e6; -- { serverError 241 }
+
+OPTIMIZE TABLE buffer_; -- flush
+
+SET min_insert_block_size_bytes=1e6;
+INSERT INTO buffer_ SELECT toUInt64(number) FROM system.numbers LIMIT 10e6;

From 5333258e7e8f3569b3d3b31a51462652d9c902d2 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 1 Nov 2019 10:48:15 +0800
Subject: [PATCH 116/222] Complement to tuple parser

---
 dbms/src/DataTypes/DataTypeTuple.cpp                  |  7 +++++++
 dbms/tests/queries/0_stateless/01021_tuple_parser.sql | 10 +++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp
index bd0e7e6ea3a..c9396b7f673 100644
--- a/dbms/src/DataTypes/DataTypeTuple.cpp
+++ b/dbms/src/DataTypes/DataTypeTuple.cpp
@@ -188,6 +188,13 @@ void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const F
         }
     });
 
+    // Special format for one element tuple (1,)
+    if (1 == elems.size())
+    {
+        skipWhitespaceIfAny(istr);
+        // Allow both (1) and (1,)
+        checkChar(',', istr);
+    }
     skipWhitespaceIfAny(istr);
     assertChar(')', istr);
 }
diff --git a/dbms/tests/queries/0_stateless/01021_tuple_parser.sql b/dbms/tests/queries/0_stateless/01021_tuple_parser.sql
index e00ec799440..d0c837fae83 100644
--- a/dbms/tests/queries/0_stateless/01021_tuple_parser.sql
+++ b/dbms/tests/queries/0_stateless/01021_tuple_parser.sql
@@ -2,4 +2,12 @@ SELECT toTypeName((1,)), (1,);
 
 SET enable_debug_queries = 1;
 
-ANALYZE SELECT (1,)
+ANALYZE SELECT (1,);
+
+DROP TABLE IF EXISTS tuple_values;
+
+CREATE TABLE tuple_values (t Tuple(int)) ENGINE = Memory;
+
+INSERT INTO tuple_values VALUES ((1)), ((2,));
+
+DROP TABLE tuple_values;

From c0fccd5b8765ed716903c0fbdf09700018330904 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 1 Nov 2019 11:32:41 +0300
Subject: [PATCH 117/222] Simplified code.

---
 dbms/src/Interpreters/BloomFilterHash.h | 55 ++++++++++++-------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index e2af70b7ad8..b5ed03aab1c 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -36,43 +36,42 @@ struct BloomFilterHash
     static ColumnPtr hashWithField(const IDataType * data_type, const Field & field)
     {
         WhichDataType which(data_type);
+        UInt64 hash = 0;
+        bool unexpected_type = false;
 
-        if (which.isUInt() || which.isDateOrDateTime())
-            if (field.isNull() == false)
-                return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet<UInt64>())), 1);
+        if (field.isNull())
+        {
+            if (which.isInt() || which.isUInt() || which.isEnum() || which.isDateOrDateTime() || which.isFloat())
+                hash = intHash64(0);
+            else if (which.isString())
+                hash = CityHash_v1_0_2::CityHash64("", 0);
+            else if (which.isFixedString())
+            {
+                const auto * fixed_string_type = typeid_cast<const DataTypeFixedString *>(data_type);
+                const std::vector<char> value(fixed_string_type->getN(), 0);
+                hash = CityHash_v1_0_2::CityHash64(value.data(), value.size());
+            }
             else
-                return ColumnConst::create(ColumnUInt64::create(1, intHash64(0)), 1);
+                unexpected_type = true;
+        }
+        else if (which.isUInt() || which.isDateOrDateTime())
+            hash = intHash64(field.safeGet<UInt64>());
         else if (which.isInt() || which.isEnum())
-            if (field.isNull() == false)
-                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()))), 1);
-            else
-                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(0))), 1);
+            hash = intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()));
         else if (which.isFloat32() || which.isFloat64())
-            if (field.isNull() == false)
-                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()))), 1);
-            else
-                return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(0))), 1);
+            hash = intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()));
         else if (which.isString() || which.isFixedString())
         {
-            if (field.isNull() == false)
-            {
-                const auto & value = field.safeGet<String>();
-                return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1);
-            }
-            else
-            {
-                if (which.isString())
-                    return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64("", 0)), 1);
-                else
-                {
-                    const DataTypeFixedString * fixed_string_type = typeid_cast<const DataTypeFixedString *>(data_type);
-                    const std::vector<char> value(fixed_string_type->getN(), 0);
-                    return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1);
-                }
-            }
+            const auto & value = field.safeGet<String>();
+            hash = CityHash_v1_0_2::CityHash64(value.data(), value.size());
         }
         else
+            unexpected_type = true;
+
+        if (unexpected_type)
             throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
+
+        return ColumnConst::create(ColumnUInt64::create(1, hash), 1);
     }
 
     static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit)

From 7409002ce0dfd21e9d495aa6fe6756dfd9fc6264 Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <QAZWSXedcrfv333>
Date: Fri, 1 Nov 2019 11:45:09 +0300
Subject: [PATCH 118/222] added brief description for queries system stop|start
 merges

---
 docs/en/query_language/system.md | 17 +++++++++++++++++
 docs/ru/query_language/system.md | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md
index 3ef504e46b3..4eec49d23ca 100644
--- a/docs/en/query_language/system.md
+++ b/docs/en/query_language/system.md
@@ -11,6 +11,7 @@
 - [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
 - [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
 - [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
+- [STOP|START MERGES](#query_language-system-stop-start-merges)
 
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
 
@@ -85,4 +86,20 @@ Enables background data distribution when inserting data into distributed tables
 SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```
 
+
+### STOP|START MERGES {#query_language-system-stop-start-merges}
+
+Provides possibility to stop and start background merges for all tables in the MergeTree family:
+
+```sql
+SYSTEM STOP|START MERGES
+```
+
+Optionally we can define specific table:  
+
+```sql
+SYSTEM STOP|START MERGES [db.]<merge_tree_family_table_name>
+```
+NOTE: DETACH / ATTACH table will start background merges even in case we stopped merges for all *MergeTree tables before).
+
 [Original article](https://clickhouse.yandex/docs/en/query_language/system/) <!--hide-->
diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md
index 474574b0b19..af6bf5b1b72 100644
--- a/docs/ru/query_language/system.md
+++ b/docs/ru/query_language/system.md
@@ -11,6 +11,7 @@
 - [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
 - [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
 - [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
+- [STOP|START MERGES](#query_language-system-stop-start-merges)
 
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
 
@@ -82,5 +83,21 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
 SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```
 
+### STOP|START MERGES {#query_language-system-stop-start-merges}
+
+Позволяет отключать и включать фоновые мержи для всех таблиц семейства *MergeTree:
+
+```sql
+SYSTEM STOP|START MERGES
+```
+
+Также можно указать определенную таблицу:  
+
+```sql
+SYSTEM STOP|START MERGES [db.]<merge_tree_family_table_name>
+```
+NOTE: DETACH / ATTACH таблицы восстанавливает фоновые мержи для этой 
+таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства *MergeTree).
+
 [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/system/) <!--hide-->
 

From 1950355942d72ca5f11da62d520db9f2900dec95 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 1 Nov 2019 11:54:41 +0300
Subject: [PATCH 119/222] Add CVE number for latest fix

---
 docs/en/security_changelog.md | 2 +-
 docs/ru/security_changelog.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/security_changelog.md b/docs/en/security_changelog.md
index 22f7f729ac1..0847300cc19 100644
--- a/docs/en/security_changelog.md
+++ b/docs/en/security_changelog.md
@@ -1,6 +1,6 @@
 ## Fixed in ClickHouse Release 19.13.6.1, 2019-09-20
 
-### CVE-2019-
+### CVE-2019-18657
 Table function `url` had the vulnerability allowed the attacker to inject arbitrary HTTP headers in the request.
 
 Credits: [Nikita Tikhomirov](https://github.com/NSTikhomirov)
diff --git a/docs/ru/security_changelog.md b/docs/ru/security_changelog.md
index 8118681f2fe..17ae1eba19d 100644
--- a/docs/ru/security_changelog.md
+++ b/docs/ru/security_changelog.md
@@ -1,6 +1,6 @@
 ## Исправлено в релизе 19.13.6.1 от 20 сентября 2019
 
-### CVE-2019-
+### CVE-2019-18657
 Уязвимость в табличной функции `url` позволяла злоумышленнику добавлять произвольные HTTP-заголовки в запрос.
 
 Обнаружено благодаря: [Никите Тихомирову](https://github.com/NSTikhomirov)

From d31cac33b265ef3ada76fb5cdfc4d855e91473b7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 1 Nov 2019 12:41:20 +0300
Subject: [PATCH 120/222] Small fixes.

---
 dbms/src/Interpreters/BloomFilterHash.h                       | 2 +-
 .../MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp         | 4 ++--
 .../Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp   | 2 +-
 .../src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index b5ed03aab1c..64a6fbbbc59 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -79,7 +79,7 @@ struct BloomFilterHash
         WhichDataType which(data_type);
         if (which.isArray())
         {
-            const ColumnArray * array_col = typeid_cast<const ColumnArray *>(column.get());
+            const auto * array_col = typeid_cast<const ColumnArray *>(column.get());
 
             if (checkAndGetColumn<ColumnNullable>(array_col->getData()))
                 throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
index 760721b5f3c..a2c5097b28e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
@@ -49,9 +49,9 @@ void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * p
     for (size_t index = 0; index < index_columns_name.size(); ++index)
     {
         const auto & column_and_type = block.getByName(index_columns_name[index]);
-        const auto & index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows);
+        auto index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows);
 
-        granule_index_block.insert({std::move(index_column), std::make_shared<DataTypeUInt64>(), column_and_type.name});
+        granule_index_block.insert({index_column, std::make_shared<DataTypeUInt64>(), column_and_type.name});
     }
 
     *pos += max_read_rows;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
index ad10bedfe10..7a56f318a7b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
@@ -43,7 +43,7 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(
             {
                 size_t total_items = total_rows;
 
-                if (const ColumnArray * array_col = typeid_cast<const ColumnArray *>(granule_index_block.getByPosition(column).column.get()))
+                if (const auto * array_col = typeid_cast<const ColumnArray *>(granule_index_block.getByPosition(column).column.get()))
                 {
                     const IColumn * nested_col = array_col->getDataPtr().get();
                     total_items = nested_col->size();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
index 673c5ac4706..782f84170c7 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
@@ -19,7 +19,7 @@ public:
 
     void deserializeBinary(ReadBuffer & istr) override;
 
-    const std::vector<BloomFilterPtr> getFilters() const { return bloom_filters; }
+    const std::vector<BloomFilterPtr> & getFilters() const { return bloom_filters; }
 
 private:
     size_t total_rows;

From 3b0f41f71b82dbb614e23b6806455efda026dfca Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <QAZWSXedcrfv333>
Date: Fri, 1 Nov 2019 13:28:39 +0300
Subject: [PATCH 121/222] clarified

---
 docs/en/query_language/system.md | 2 +-
 docs/ru/query_language/system.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md
index 4eec49d23ca..f897a764348 100644
--- a/docs/en/query_language/system.md
+++ b/docs/en/query_language/system.md
@@ -100,6 +100,6 @@ Optionally we can define specific table:
 ```sql
 SYSTEM STOP|START MERGES [db.]<merge_tree_family_table_name>
 ```
-NOTE: DETACH / ATTACH table will start background merges even in case we stopped merges for all *MergeTree tables before).
+NOTE: DETACH / ATTACH table will start background merges for the table even in case we stopped merges for all *MergeTree tables before).
 
 [Original article](https://clickhouse.yandex/docs/en/query_language/system/) <!--hide-->
diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md
index af6bf5b1b72..cdaaca798d4 100644
--- a/docs/ru/query_language/system.md
+++ b/docs/ru/query_language/system.md
@@ -97,7 +97,7 @@ SYSTEM STOP|START MERGES
 SYSTEM STOP|START MERGES [db.]<merge_tree_family_table_name>
 ```
 NOTE: DETACH / ATTACH таблицы восстанавливает фоновые мержи для этой 
-таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства *MergeTree).
+таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства *MergeTree до DETACH ).
 
 [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/system/) <!--hide-->
 

From 473a60f741dd5836f4f46b9b75ce78c13e7b9e9f Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 1 Nov 2019 14:28:39 +0300
Subject: [PATCH 122/222] Fix toolchain file

---
 cmake/linux/toolchain-aarch64.cmake | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake
index e93f0f47faf..db9eaa814ef 100644
--- a/cmake/linux/toolchain-aarch64.cmake
+++ b/cmake/linux/toolchain-aarch64.cmake
@@ -7,8 +7,10 @@ set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64
 # We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
 set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar")
 
-set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
+set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
+set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
+
+set (LINKER_NAME "lld" CACHE STRING "" FORCE)
 
 set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
 set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")

From 21db9157f765008c47a04275640d260b9c852979 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Fri, 1 Nov 2019 14:34:29 +0300
Subject: [PATCH 123/222] Don't initialize some constant blocks in a loop
 (#7475)

---
 dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp | 13 ++++++++-----
 dbms/src/Storages/Kafka/KafkaBlockInputStream.h   |  2 ++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
index 5e48add8047..831aeaba6a0 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -7,10 +7,15 @@
 
 namespace DB
 {
-
 KafkaBlockInputStream::KafkaBlockInputStream(
     StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_, bool commit_in_suffix_)
-    : storage(storage_), context(context_), column_names(columns), max_block_size(max_block_size_), commit_in_suffix(commit_in_suffix_)
+    : storage(storage_)
+    , context(context_)
+    , column_names(columns)
+    , max_block_size(max_block_size_)
+    , commit_in_suffix(commit_in_suffix_)
+    , non_virtual_header(storage.getSampleBlockNonMaterialized()) /// FIXME: add materialized columns support
+    , virtual_header(storage.getSampleBlockForColumns({"_topic", "_key", "_offset", "_partition", "_timestamp"}))
 {
     context.setSetting("input_format_skip_unknown_fields", 1u); // Always skip unknown fields regardless of the context (JSON or TSKV)
     context.setSetting("input_format_allow_errors_ratio", 0.);
@@ -19,7 +24,7 @@ KafkaBlockInputStream::KafkaBlockInputStream(
     if (!storage.getSchemaName().empty())
         context.setSetting("format_schema", storage.getSchemaName());
 
-    virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset", "_partition", "_timestamp"}).cloneEmptyColumns();
+    virtual_columns = virtual_header.cloneEmptyColumns();
 }
 
 KafkaBlockInputStream::~KafkaBlockInputStream()
@@ -57,7 +62,6 @@ Block KafkaBlockInputStream::readImpl()
     if (!buffer)
         return Block();
 
-    auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support
     auto read_callback = [this]
     {
         virtual_columns[0]->insert(buffer->currentTopic());     // "topic"
@@ -94,7 +98,6 @@ Block KafkaBlockInputStream::readImpl()
         Block result;
         auto child = FormatFactory::instance().getInput(
             storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, read_callback);
-        const auto virtual_header = storage.getSampleBlockForColumns({"_topic", "_key", "_offset", "_partition", "_timestamp"});
 
         while (auto block = child->read())
         {
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
index 63504b8840d..7aae403228b 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
@@ -35,6 +35,8 @@ private:
     ConsumerBufferPtr buffer;
     MutableColumns virtual_columns;
     bool broken = true, claimed = false, commit_in_suffix;
+
+    const Block non_virtual_header, virtual_header;
 };
 
 }

From be75b8d16ed868b81fda720cfa455db85b03c512 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Fri, 1 Nov 2019 16:14:15 +0300
Subject: [PATCH 124/222] Update toolchain for ASM language

---
 cmake/linux/toolchain-aarch64.cmake | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake
index db9eaa814ef..2dff579e8c0 100644
--- a/cmake/linux/toolchain-aarch64.cmake
+++ b/cmake/linux/toolchain-aarch64.cmake
@@ -2,6 +2,7 @@ set (CMAKE_SYSTEM_NAME "Linux")
 set (CMAKE_SYSTEM_PROCESSOR "aarch64")
 set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
+set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
 
 # We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
@@ -9,6 +10,7 @@ set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-
 
 set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
 set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
+set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
 
 set (LINKER_NAME "lld" CACHE STRING "" FORCE)
 

From 9b8fc9a899ee3a681264d7821054903d4d23d4a5 Mon Sep 17 00:00:00 2001
From: Alexander Burmak <Alex-Burmak@yandex-team.ru>
Date: Fri, 1 Nov 2019 16:08:56 +0300
Subject: [PATCH 125/222] Fixed missing file name escaping in FREEZE command

---
 dbms/src/Storages/MergeTree/MergeTreeData.cpp |  6 +++-
 .../test_filesystem_layout/__init__.py        |  0
 .../test_filesystem_layout/test.py            | 28 +++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 dbms/tests/integration/test_filesystem_layout/__init__.py
 create mode 100644 dbms/tests/integration/test_filesystem_layout/test.py

diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index b7408634e3f..610d68d1942 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3355,7 +3355,11 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String &
         LOG_DEBUG(log, "Freezing part " << part->name << " snapshot will be placed at " + backup_path);
 
         String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString();
-        String backup_part_absolute_path = backup_path + "data/" + getDatabaseName() + "/" + getTableName() + "/" + part->relative_path;
+        String backup_part_absolute_path = backup_path
+            + "data/"
+            + escapeForFileName(getDatabaseName()) + "/"
+            + escapeForFileName(getTableName()) + "/"
+            + part->relative_path;
         localBackup(part_absolute_path, backup_part_absolute_path);
         part->is_frozen.store(true, std::memory_order_relaxed);
         ++parts_processed;
diff --git a/dbms/tests/integration/test_filesystem_layout/__init__.py b/dbms/tests/integration/test_filesystem_layout/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/integration/test_filesystem_layout/test.py b/dbms/tests/integration/test_filesystem_layout/test.py
new file mode 100644
index 00000000000..6dc4e5f8763
--- /dev/null
+++ b/dbms/tests/integration/test_filesystem_layout/test.py
@@ -0,0 +1,28 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node")
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_file_path_escaping(started_cluster):
+    node.query('''
+        CREATE TABLE test.`T.a_b,l-e!` (`~Id` UInt32)
+        ENGINE = MergeTree() PARTITION BY `~Id` ORDER BY `~Id`;
+        ''')
+    node.query('''INSERT INTO test.`T.a_b,l-e!` VALUES (1);''')
+    node.query('''ALTER TABLE test.`T.a_b,l-e!` FREEZE;''')
+
+    node.exec_in_container(["bash", "-c", "test -f /var/lib/clickhouse/data/test/T%2Ea_b%2Cl%2De%21/1_1_1_0/%7EId.bin"])
+    node.exec_in_container(["bash", "-c", "test -f /var/lib/clickhouse/shadow/1/data/test/T%2Ea_b%2Cl%2De%21/1_1_1_0/%7EId.bin"])
+

From 8203569fdf2fea393bde99968f272f7030a7090b Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <45938185+andrei-karpliuk@users.noreply.github.com>
Date: Fri, 1 Nov 2019 16:27:26 +0300
Subject: [PATCH 126/222] Update docs/en/query_language/system.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/en/query_language/system.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md
index f897a764348..d492d58912b 100644
--- a/docs/en/query_language/system.md
+++ b/docs/en/query_language/system.md
@@ -100,6 +100,7 @@ Optionally we can define specific table:
 ```sql
 SYSTEM STOP|START MERGES [db.]<merge_tree_family_table_name>
 ```
-NOTE: DETACH / ATTACH table will start background merges for the table even in case we stopped merges for all *MergeTree tables before).
+!!! note "Note":
+    `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all *MergeTree tables before.
 
 [Original article](https://clickhouse.yandex/docs/en/query_language/system/) <!--hide-->

From c655e939c49ea26a2a21950bb145b3e67d75a914 Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <QAZWSXedcrfv333>
Date: Fri, 1 Nov 2019 17:17:49 +0300
Subject: [PATCH 127/222] separated stop and start commands

---
 docs/en/query_language/system.md | 24 ++++++++++++++----------
 docs/ru/query_language/system.md | 20 ++++++++++++--------
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md
index d492d58912b..655e7682c81 100644
--- a/docs/en/query_language/system.md
+++ b/docs/en/query_language/system.md
@@ -11,7 +11,8 @@
 - [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
 - [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
 - [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
-- [STOP|START MERGES](#query_language-system-stop-start-merges)
+- [STOP MERGES](#query_language-system-stop-merges)
+- [START MERGES](#query_language-system-start-merges)
 
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
 
@@ -87,20 +88,23 @@ SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```
 
 
-### STOP|START MERGES {#query_language-system-stop-start-merges}
+### STOP MERGES {#query_language-system-stop-merges}
 
-Provides possibility to stop and start background merges for all tables in the MergeTree family:
+Provides possibility to stop background merges for tables in the MergeTree family:
 
 ```sql
-SYSTEM STOP|START MERGES
-```
-
-Optionally we can define specific table:  
-
-```sql
-SYSTEM STOP|START MERGES [db.]<merge_tree_family_table_name>
+SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 ```
 !!! note "Note":
     `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all *MergeTree tables before.
 
+
+### START MERGES {#query_language-system-start-merges}
+
+Provides possibility to start background merges for tables in the MergeTree family:
+
+```sql
+SYSTEM START MERGES [[db.]merge_tree_family_table_name]
+```
+
 [Original article](https://clickhouse.yandex/docs/en/query_language/system/) <!--hide-->
diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md
index cdaaca798d4..6bedad1e55c 100644
--- a/docs/ru/query_language/system.md
+++ b/docs/ru/query_language/system.md
@@ -11,7 +11,8 @@
 - [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
 - [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
 - [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
-- [STOP|START MERGES](#query_language-system-stop-start-merges)
+- [STOP MERGES](#query_language-system-stop-merges)
+- [START MERGES](#query_language-system-start-merges)
 
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
 
@@ -83,21 +84,24 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
 SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```
 
-### STOP|START MERGES {#query_language-system-stop-start-merges}
+### STOP MERGES {#query_language-system-stop-merges}
 
-Позволяет отключать и включать фоновые мержи для всех таблиц семейства *MergeTree:
+Позволяет остановить фоновые мержи для таблиц семейства *MergeTree:
 
 ```sql
-SYSTEM STOP|START MERGES
+SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 ```
+!!! note "Note":
+    `DETACH / ATTACH` таблицы восстанавливает фоновые мержи для этой таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства *MergeTree до `DETACH`).
 
-Также можно указать определенную таблицу:  
+
+### START MERGES {#query_language-system-start-merges}
+
+Включает фоновые мержи для таблиц семейства *MergeTree:
 
 ```sql
-SYSTEM STOP|START MERGES [db.]<merge_tree_family_table_name>
+SYSTEM START MERGES [[db.]merge_tree_family_table_name]
 ```
-NOTE: DETACH / ATTACH таблицы восстанавливает фоновые мержи для этой 
-таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства *MergeTree до DETACH ).
 
 [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/system/) <!--hide-->
 

From 92da4ddf49ceeac627ca7cf445eb8bf3c3f84465 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Fri, 1 Nov 2019 17:18:37 +0300
Subject: [PATCH 128/222] CLICKHOUSEDOCS-191: Updated query_log description.
 (#7559)

---
 docs/en/operations/system_tables.md | 12 ++++++------
 docs/ru/operations/system_tables.md | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md
index de0c277f100..abb03999c4a 100644
--- a/docs/en/operations/system_tables.md
+++ b/docs/en/operations/system_tables.md
@@ -396,7 +396,7 @@ Columns:
 
 Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information.
 
-!!! note
+!!! note "Note"
     The table doesn't contain input data for `INSERT` queries.
 
 ClickHouse creates this table only if the [query_log](server_settings/settings.md#server_settings-query-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in.
@@ -410,11 +410,11 @@ The `system.query_log` table registers two kinds of queries:
 
 Columns:
 
-- `type` (UInt8) — Type of event that occurred when executing the query. Possible values:
-    - 1 — Successful start of query execution.
-    - 2 — Successful end of query execution.
-    - 3 — Exception before the start of query execution.
-    - 4 — Exception during the query execution.
+- `type` (`Enum8`) — Type of event that occurred when executing the query. Values:
+    - `'QueryStart' = 1` — Successful start of query execution.
+    - `'QueryFinish' = 2` — Successful end of query execution.
+    - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution.
+    - `'ExceptionWhileProcessing' = 4` — Exception during the query execution.
 - `event_date` (Date) — Event date.
 - `event_time` (DateTime) — Event time.
 - `query_start_time` (DateTime) — Start time of query execution.
diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md
index eb757480774..9ac94f979f6 100644
--- a/docs/ru/operations/system_tables.md
+++ b/docs/ru/operations/system_tables.md
@@ -407,11 +407,11 @@ ClickHouse создаёт таблицу только в том случае, к
 
 Столбцы:
 
-- `type` (UInt8) — тип события, произошедшего при выполнении запроса. Возможные значения:
-    - 1 — успешное начало выполнения запроса.
-    - 2 — успешное завершение выполнения запроса.
-    - 3 — исключение перед началом обработки запроса.
-    - 4 — исключение во время обработки запроса.
+- `type` (`Enum8`) — тип события, произошедшего при выполнении запроса. Значения:
+    - `'QueryStart' = 1` — успешное начало выполнения запроса.
+    - `'QueryFinish' = 2` — успешное завершение выполнения запроса.
+    - `'ExceptionBeforeStart' = 3` — исключение перед началом обработки запроса.
+    - `'ExceptionWhileProcessing' = 4` — исключение во время обработки запроса.
 - `event_date` (Date) — дата события.
 - `event_time` (DateTime) — время события.
 - `query_start_time` (DateTime) — время начала обработки запроса.

From 6d2c5d61a199bdb1de73ca82c2b4b75eee79bab3 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Fri, 1 Nov 2019 17:20:28 +0300
Subject: [PATCH 129/222] DOCS-8799: EN review, RU translation. SHOW TABLES
 update and toc restructurization. (#7367)

---
 docs/en/operations/system_tables.md           | 21 ++---
 docs/en/query_language/show.md                | 24 +----
 .../ru/operations/server_settings/settings.md |  2 +-
 docs/ru/operations/system_tables.md           | 33 +++----
 docs/ru/query_language/misc.md                | 66 --------------
 docs/ru/query_language/show.md                | 64 ++++++++++++++
 docs/toc_en.yml                               |  8 +-
 docs/toc_ru.yml                               | 87 ++++++++++---------
 8 files changed, 139 insertions(+), 166 deletions(-)
 create mode 100644 docs/ru/query_language/show.md

diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md
index abb03999c4a..18f046688f1 100644
--- a/docs/en/operations/system_tables.md
+++ b/docs/en/operations/system_tables.md
@@ -377,20 +377,21 @@ The `system.part_log` table contains the following columns:
 
 The `system.part_log` table is created after the first inserting data to the `MergeTree` table.
 
-## system.processes
+## system.processes {#system_tables-processes}
 
 This system table is used for implementing the `SHOW PROCESSLIST` query.
+
 Columns:
 
-- `user` (String)              – Name of the user who made the request. For distributed query processing, this is the user who helped the requestor server send the query to this server, not the user who made the distributed request on the requestor server.
-- `address` (String)           - The IP address the request was made from. The same for distributed processing.
-- `elapsed` (Float64)          - The time in seconds since request execution started.
-- `rows_read` (UInt64)         - The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
-- `bytes_read` (UInt64)        - The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
-- `total_rows_approx` (UInt64) - The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known.
-- `memory_usage` (UInt64)      - How much memory the request uses. It might not include some types of dedicated memory.
-- `query` (String)             - The query text. For INSERT, it doesn't include the data to insert.
-- `query_id` (String)          - Query ID, if defined.
+- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated.
+- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server.
+- `elapsed` (Float64) – The time in seconds since request execution started.
+- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
+- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
+- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known.
+- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory.  See the  [max_memory_usage](../operations/settings/query_complexity.md#settings_max_memory_usage) setting.
+- `query` (String) – The query text. For `INSERT`, it doesn't include the data to insert.
+- `query_id` (String) – Query ID, if defined.
 
 ## system.query_log {#system_tables-query-log}
 
diff --git a/docs/en/query_language/show.md b/docs/en/query_language/show.md
index 04f373a31a9..d5e3560584d 100644
--- a/docs/en/query_language/show.md
+++ b/docs/en/query_language/show.md
@@ -17,33 +17,15 @@ SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
 Prints a list of all databases.
 This query is identical to `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
 
-See also the section "Formats".
-
 ## SHOW PROCESSLIST
 
 ```sql
 SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
 ```
 
-Outputs a list of queries currently being processed, other than `SHOW PROCESSLIST` queries.
+Outputs the content of the [system.processes](../operations/system_tables.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries.
 
-Prints a table containing the columns:
-
-**user** – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the 'default' user. SHOW PROCESSLIST shows the username for a specific query, not for a query that this query initiated.
-
-**address** – The name of the host that the query was sent from. For distributed processing, on remote servers, this is the name of the query requestor host. To track where a distributed query was originally made from, look at SHOW PROCESSLIST on the query requestor server.
-
-**elapsed** – The execution time, in seconds. Queries are output in order of decreasing execution time.
-
-**rows_read**, **bytes_read** – How many rows and bytes of uncompressed data were read when processing the query. For distributed processing, data is totaled from all the remote servers. This is the data used for restrictions and quotas.
-
-**memory_usage** – Current RAM usage in bytes. See the setting 'max_memory_usage'.
-
-**query** – The query itself. In INSERT queries, the data for insertion is not output.
-
-**query_id** – The query identifier. Non-empty only if it was explicitly defined by the user. For distributed processing, the query ID is not passed to remote servers.
-
-This query is nearly identical to: `SELECT * FROM system.processes`. The difference is that the `SHOW PROCESSLIST` query does not show itself in a list, when the `SELECT .. FROM system.processes` query does.
+The `SELECT * FROM system.processes` query returns data about all the current queries.
 
 Tip (execute in the console):
 
@@ -61,7 +43,7 @@ SHOW [TEMPORARY] TABLES [FROM <db>] [LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE
 
 If the `FROM` clause is not specified, the query returns the list of tables from the current database.
 
-The same result as the `SHOW TABLES` query returns, you can get by the following way:
+You can get the same results as the `SHOW TABLES` query in the following way:
 
 ```sql
 SELECT name FROM system.tables WHERE database = <db> [AND name LIKE <pattern>] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md
index 64b5377804f..0bec350b15f 100644
--- a/docs/ru/operations/server_settings/settings.md
+++ b/docs/ru/operations/server_settings/settings.md
@@ -60,7 +60,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat
 
 База данных по умолчанию.
 
-Перечень баз данных можно получить запросом [SHOW DATABASES](../../query_language/misc.md#show-databases).
+Перечень баз данных можно получить запросом [SHOW DATABASES](../../query_language/show.md#show-databases).
 
 **Пример**
 
diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md
index 9ac94f979f6..8e5531ecdf9 100644
--- a/docs/ru/operations/system_tables.md
+++ b/docs/ru/operations/system_tables.md
@@ -364,30 +364,21 @@ SELECT * FROM system.metrics LIMIT 10
 
 Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`.
 
-## system.processes
+## system.processes {#system_tables-processes}
+
+Используется для реализации запроса `SHOW PROCESSLIST`.
 
-Эта системная таблица используется для реализации запроса `SHOW PROCESSLIST`.
 Столбцы:
 
-```text
-user String              - имя пользователя, который задал запрос. При распределённой обработке запроса, относится к пользователю, с помощью которого сервер-инициатор запроса отправил запрос на данный сервер, а не к имени пользователя, который задал распределённый запрос на сервер-инициатор запроса.
-
-address String           - IP-адрес, с которого задан запрос. При распределённой обработке запроса, аналогично.
-
-elapsed Float64          - время в секундах, прошедшее от начала выполнения запроса.
-
-rows_read UInt64         - количество прочитанных из таблиц строк. При распределённой обработке запроса, на сервере-инициаторе запроса, представляет собой сумму по всем удалённым серверам.
-
-bytes_read UInt64        - количество прочитанных из таблиц байт, в несжатом виде. При распределённой обработке запроса, на сервере-инициаторе запроса, представляет собой сумму по всем удалённым серверам.
-
-total_rows_approx UInt64 - приблизительная оценка общего количества строк, которые должны быть прочитаны. При распределённой обработке запроса, на сервере-инициаторе запроса, представляет собой сумму по всем удалённым серверам. Может обновляться в процессе выполнения запроса, когда становятся известны новые источники для обработки.
-
-memory_usage UInt64      - потребление памяти запросом. Может не учитывать некоторые виды выделенной памяти.
-
-query String             - текст запроса. В случае INSERT - без данных для INSERT-а.
-
-query_id String          - идентификатор запроса, если был задан.
-```
+- `user` (String) – пользователь, инициировавший запрос. При распределённом выполнении запросы отправляются на удалённые серверы от имени пользователя `default`. Поле содержит имя пользователя для конкретного запроса, а не для запроса, который иницировал этот запрос.
+- `address` (String) – IP-адрес, с которого пришёл запрос. При распределённой обработке запроса аналогично. Чтобы определить откуда запрос пришел изначально, необходимо смотреть таблицу `system.processes` на сервере-источнике запроса.
+- `elapsed` (Float64) – время в секундах с начала обработки запроса.
+- `rows_read` (UInt64) – количество прочитанных строк. При распределённой обработке запроса на сервере-инициаторе запроса представляет собой сумму по всем удалённым серверам.
+- `bytes_read` (UInt64) – количество прочитанных из таблиц байт, в несжатом виде. При распределённой обработке запроса на сервере-инициаторе запроса представляет собой сумму по всем удалённым серверам.
+- `total_rows_approx` (UInt64) – приблизительная оценка общего количества строк, которые должны быть прочитаны. При распределённой обработке запроса, на сервере-инициаторе запроса, представляет собой сумму по всем удалённым серверам. Может обновляться в процессе выполнения запроса, когда становятся известны новые источники для обработки.
+- `memory_usage` (UInt64) – потребление памяти запросом. Может не учитывать некоторые виды выделенной памяти. Смотрите описание настройки [max_memory_usage](../operations/settings/query_complexity.md#settings_max_memory_usage).
+- `query` (String) – текст запроса. Для запросов `INSERT` не содержит встаявляемые данные.
+- `query_id` (String) – идентификатор запроса, если был задан.
 
 ## system.query_log {#system_tables-query-log}
 
diff --git a/docs/ru/query_language/misc.md b/docs/ru/query_language/misc.md
index ce73a5aafdb..6a1d7c01a36 100644
--- a/docs/ru/query_language/misc.md
+++ b/docs/ru/query_language/misc.md
@@ -213,72 +213,6 @@ SET profile = 'profile-name-from-the-settings-file'
 
 Подробности смотрите в разделе [Настройки](../operations/settings/settings.md).
 
-## SHOW CREATE TABLE
-
-```sql
-SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
-```
-
-Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица.
-
-## SHOW DATABASES {#show-databases}
-
-```sql
-SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
-```
-
-Выводит список всех баз данных.
-Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
-
-Смотрите также раздел "Форматы".
-
-## SHOW PROCESSLIST
-
-```sql
-SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
-```
-
-Выводит список запросов, выполняющихся в данный момент времени, кроме самих запросов `SHOW PROCESSLIST`.
-
-Выдаёт таблицу, содержащую столбцы:
-
-**user** - пользователь, под которым был задан запрос. Следует иметь ввиду, что при распределённой обработке запроса на удалённые серверы запросы отправляются под пользователем 'default'. И SHOW PROCESSLIST показывает имя пользователя для конкретного запроса, а не для запроса, который данный запрос инициировал.
-
-**address** - имя хоста, с которого был отправлен запрос. При распределённой обработке запроса на удалённых серверах — это имя хоста-инициатора запроса. Чтобы проследить, откуда был задан распределённый запрос изначально, следует смотреть SHOW PROCESSLIST на сервере-инициаторе запроса.
-
-**elapsed** - время выполнения запроса, в секундах. Запросы выводятся в порядке убывания времени выполнения.
-
-**rows_read**, **bytes_read** - сколько было прочитано строк, байт несжатых данных при обработке запроса. При распределённой обработке запроса суммируются данные со всех удалённых серверов. Именно эти данные используются для ограничений и квот.
-
-**memory_usage** - текущее потребление оперативки в байтах. Смотрите настройку 'max_memory_usage'.
-
-**query** - сам запрос. В запросах INSERT данные для вставки не выводятся.
-
-**query_id** - идентификатор запроса. Непустой, только если был явно задан пользователем. При распределённой обработке запроса идентификатор запроса не передаётся на удалённые серверы.
-
-Этот запрос аналогичен запросу `SELECT * FROM system.processes` за тем исключением, что последний отображает список запросов, включая самого себя.
-
-Полезный совет (выполните в консоли):
-
-```bash
-$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
-```
-
-## SHOW TABLES
-
-```sql
-SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]
-```
-
-Выводит список таблиц:
-
-- из текущей базы данных или из базы db, если указано `FROM db`;
-- всех, или имя которых соответствует шаблону pattern, если указано `LIKE 'pattern'`;
-
-Запрос полностью аналогичен запросу: `SELECT name FROM system.tables WHERE database = 'db' [AND name LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]`.
-
-Смотрите также раздел "Оператор LIKE".
-
 ## TRUNCATE
 
 ```sql
diff --git a/docs/ru/query_language/show.md b/docs/ru/query_language/show.md
new file mode 100644
index 00000000000..4eec70a8002
--- /dev/null
+++ b/docs/ru/query_language/show.md
@@ -0,0 +1,64 @@
+# SHOW Queries
+
+## SHOW CREATE TABLE
+
+```sql
+SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
+```
+
+Возвращает один столбец типа `String` с именем statement, содержащий одно значение — запрос `CREATE TABLE`, с помощью которого была создана указанная таблица.
+
+## SHOW DATABASES {#show-databases}
+
+```sql
+SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
+```
+
+Выводит список всех баз данных.
+Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
+
+## SHOW PROCESSLIST
+
+```sql
+SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
+```
+
+Выводит содержимое таблицы [system.processes](../operations/system_tables.md#system_tables-processes), которая содержит список запросов, выполняющихся в данный момент времени, кроме самих запросов `SHOW PROCESSLIST`.
+
+Запрос `SELECT * FROM system.processes` возвращает данные обо всех текущих запросах.
+
+Полезный совет (выполните в консоли):
+
+```bash
+$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
+```
+
+## SHOW TABLES
+
+Выводит список таблиц.
+
+```sql
+SHOW [TEMPORARY] TABLES [FROM <db>] [LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
+```
+
+Если секция `FROM` не используется, то запрос возвращает список таблиц из текущей базы данных.
+
+Результат, идентичный тому, что выдаёт запрос `SHOW TABLES` можно получить также запросом следующего вида:
+
+```sql
+SELECT name FROM system.tables WHERE database = <db> [AND name LIKE <pattern>] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
+```
+
+**Пример**
+
+Следующий запрос выбирает первые две строки из списка таблиц в базе данных `system`, чьи имена содержат `co`.
+
+```sql
+SHOW TABLES FROM system LIKE '%co%' LIMIT 2
+```
+```text
+┌─name───────────────────────────┐
+│ aggregate_function_combinators │
+│ collations                     │
+└────────────────────────────────┘
+```
diff --git a/docs/toc_en.yml b/docs/toc_en.yml
index 46cef8feeeb..7476407021e 100644
--- a/docs/toc_en.yml
+++ b/docs/toc_en.yml
@@ -176,6 +176,10 @@ nav:
       - 'IPv4': 'data_types/domains/ipv4.md'
       - 'IPv6': 'data_types/domains/ipv6.md'
 
+- 'Guides':
+  - 'Overview': 'guides/index.md'
+  - 'Applying CatBoost Models': 'guides/apply_catboost_model.md'
+
 - 'Operations':
   - 'Introduction': 'operations/index.md'
   - 'Requirements': 'operations/requirements.md'
@@ -204,10 +208,6 @@ nav:
     - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md'
     - 'clickhouse-local': 'operations/utils/clickhouse-local.md'
 
-- 'Guides':
-  - 'Overview': 'guides/index.md'
-  - 'Applying CatBoost Models': 'guides/apply_catboost_model.md'
-
 - 'Development':
   - 'hidden': 'development/index.md'
   - 'Overview of ClickHouse Architecture': 'development/architecture.md'
diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml
index ce3f87e92fc..806e4377ea7 100644
--- a/docs/toc_ru.yml
+++ b/docs/toc_ru.yml
@@ -33,35 +33,6 @@ nav:
     - 'Визуальные интерфейсы': 'interfaces/third-party/gui.md'
     - 'Прокси': 'interfaces/third-party/proxy.md'
 
-- 'Типы данных':
-  - 'Введение': 'data_types/index.md'
-  - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md'
-  - 'Float32, Float64': 'data_types/float.md'
-  - 'Decimal': 'data_types/decimal.md'
-  - 'Булевы значения': 'data_types/boolean.md'
-  - 'String': 'data_types/string.md'
-  - 'FixedString(N)': 'data_types/fixedstring.md'
-  - 'UUID': 'data_types/uuid.md'
-  - 'Date': 'data_types/date.md'
-  - 'DateTime': 'data_types/datetime.md'
-  - 'Enum': 'data_types/enum.md'
-  - 'Array(T)': 'data_types/array.md'
-  - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md'
-  - 'Tuple(T1, T2, ...)': 'data_types/tuple.md'
-  - 'Nullable': 'data_types/nullable.md'
-  - 'Вложенные структуры данных':
-    - 'hidden': 'data_types/nested_data_structures/index.md'
-    - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md'
-  - 'Служебные типы данных':
-    - 'hidden': 'data_types/special_data_types/index.md'
-    - 'Expression': 'data_types/special_data_types/expression.md'
-    - 'Set': 'data_types/special_data_types/set.md'
-    - 'Nothing': 'data_types/special_data_types/nothing.md'
-  - 'Domains':
-    - 'Overview': 'data_types/domains/overview.md'
-    - 'IPv4': 'data_types/domains/ipv4.md'
-    - 'IPv6': 'data_types/domains/ipv6.md'
-
 - 'Движки баз данных':
     - 'Введение': 'database_engines/index.md'
     - 'MySQL': 'database_engines/mysql.md'
@@ -106,12 +77,15 @@ nav:
 
 - 'Справка по SQL':
   - 'hidden': 'query_language/index.md'
-  - 'SELECT': 'query_language/select.md'
-  - 'INSERT INTO': 'query_language/insert_into.md'
-  - 'CREATE': 'query_language/create.md'
-  - 'ALTER': 'query_language/alter.md'
-  - 'SYSTEM': 'query_language/system.md'
-  - 'Прочие виды запросов': 'query_language/misc.md'
+  - 'Общий синтаксис': 'query_language/syntax.md'
+  - 'Запросы':
+    - 'SELECT': 'query_language/select.md'
+    - 'INSERT INTO': 'query_language/insert_into.md'
+    - 'CREATE': 'query_language/create.md'
+    - 'ALTER': 'query_language/alter.md'
+    - 'SYSTEM': 'query_language/system.md'
+    - 'SHOW': 'query_language/show.md'
+    - 'Прочие': 'query_language/misc.md'
   - 'Функции':
     - 'Введение': 'query_language/functions/index.md'
     - 'Арифметические функции': 'query_language/functions/arithmetic_functions.md'
@@ -173,7 +147,38 @@ nav:
       - 'Ключ и поля словаря': 'query_language/dicts/external_dicts_dict_structure.md'
     - 'Встроенные словари': 'query_language/dicts/internal_dicts.md'
   - 'Операторы': 'query_language/operators.md'
-  - 'Общий синтаксис': 'query_language/syntax.md'
+  - 'Типы данных':
+    - 'Введение': 'data_types/index.md'
+    - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md'
+    - 'Float32, Float64': 'data_types/float.md'
+    - 'Decimal': 'data_types/decimal.md'
+    - 'Булевы значения': 'data_types/boolean.md'
+    - 'String': 'data_types/string.md'
+    - 'FixedString(N)': 'data_types/fixedstring.md'
+    - 'UUID': 'data_types/uuid.md'
+    - 'Date': 'data_types/date.md'
+    - 'DateTime': 'data_types/datetime.md'
+    - 'Enum': 'data_types/enum.md'
+    - 'Array(T)': 'data_types/array.md'
+    - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md'
+    - 'Tuple(T1, T2, ...)': 'data_types/tuple.md'
+    - 'Nullable': 'data_types/nullable.md'
+    - 'Вложенные структуры данных':
+      - 'hidden': 'data_types/nested_data_structures/index.md'
+      - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md'
+    - 'Служебные типы данных':
+      - 'hidden': 'data_types/special_data_types/index.md'
+      - 'Expression': 'data_types/special_data_types/expression.md'
+      - 'Set': 'data_types/special_data_types/set.md'
+      - 'Nothing': 'data_types/special_data_types/nothing.md'
+    - 'Domains':
+      - 'Overview': 'data_types/domains/overview.md'
+      - 'IPv4': 'data_types/domains/ipv4.md'
+      - 'IPv6': 'data_types/domains/ipv6.md'
+
+- 'Руководства':
+  - 'Обзор': 'guides/index.md'
+  - 'Применение CatBoost моделей': 'guides/apply_catboost_model.md'
 
 - 'Эксплуатация':
   - 'Введение': 'operations/index.md'
@@ -203,13 +208,6 @@ nav:
     - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md'
     - 'clickhouse-local': 'operations/utils/clickhouse-local.md'
 
-- 'Руководства':
-  - 'Обзор': 'guides/index.md'
-  - 'Применение CatBoost моделей': 'guides/apply_catboost_model.md'
-
-- 'F.A.Q.':
-  - 'Общие вопросы': 'faq/general.md'
-
 - 'Разработка':
   - 'hidden': 'development/index.md'
   - 'Обзор архитектуры ClickHouse': 'development/architecture.md'
@@ -224,3 +222,6 @@ nav:
   - 'Changelog': 'changelog.md'
   - 'Security changelog': 'security_changelog.md'
   - 'Roadmap': 'roadmap.md'
+
+- 'F.A.Q.':
+  - 'Общие вопросы': 'faq/general.md'

From ea50c51dbaf06ba8c45b07909e2842cff0bf0f91 Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <45938185+andrei-karpliuk@users.noreply.github.com>
Date: Fri, 1 Nov 2019 17:27:20 +0300
Subject: [PATCH 130/222] asterisk deleted

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/ru/query_language/system.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md
index 6bedad1e55c..4509c4473fe 100644
--- a/docs/ru/query_language/system.md
+++ b/docs/ru/query_language/system.md
@@ -97,7 +97,7 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 
 ### START MERGES {#query_language-system-start-merges}
 
-Включает фоновые мержи для таблиц семейства *MergeTree:
+Включает фоновые мержи для таблиц семейства MergeTree:
 
 ```sql
 SYSTEM START MERGES [[db.]merge_tree_family_table_name]

From d97b815c1347c33e8e4bf545a906a2dbc9cd84ff Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <45938185+andrei-karpliuk@users.noreply.github.com>
Date: Fri, 1 Nov 2019 17:27:36 +0300
Subject: [PATCH 131/222] asterisk deleted

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/ru/query_language/system.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md
index 4509c4473fe..f7801f01b68 100644
--- a/docs/ru/query_language/system.md
+++ b/docs/ru/query_language/system.md
@@ -92,7 +92,7 @@ SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 ```
 !!! note "Note":
-    `DETACH / ATTACH` таблицы восстанавливает фоновые мержи для этой таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства *MergeTree до `DETACH`).
+    `DETACH / ATTACH` таблицы восстанавливает фоновые мержи для этой таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства MergeTree до `DETACH`).
 
 
 ### START MERGES {#query_language-system-start-merges}

From 17cc3ab9d13a343bbb4619011fc4310243749a46 Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <45938185+andrei-karpliuk@users.noreply.github.com>
Date: Fri, 1 Nov 2019 17:27:59 +0300
Subject: [PATCH 132/222] asterisk deleted

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/ru/query_language/system.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md
index f7801f01b68..7b94dd1eb69 100644
--- a/docs/ru/query_language/system.md
+++ b/docs/ru/query_language/system.md
@@ -86,7 +86,7 @@ SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 
 ### STOP MERGES {#query_language-system-stop-merges}
 
-Позволяет остановить фоновые мержи для таблиц семейства *MergeTree:
+Позволяет остановить фоновые мержи для таблиц семейства MergeTree:
 
 ```sql
 SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]

From 9fae5b60a0206f404d87a121f7f7977e6aa1ed32 Mon Sep 17 00:00:00 2001
From: andrei-karpliuk <QAZWSXedcrfv333>
Date: Fri, 1 Nov 2019 17:31:06 +0300
Subject: [PATCH 133/222] asterisk removed from note

---
 docs/en/query_language/system.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md
index 655e7682c81..74337052a82 100644
--- a/docs/en/query_language/system.md
+++ b/docs/en/query_language/system.md
@@ -96,7 +96,7 @@ Provides possibility to stop background merges for tables in the MergeTree famil
 SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 ```
 !!! note "Note":
-    `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all *MergeTree tables before.
+    `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before.
 
 
 ### START MERGES {#query_language-system-start-merges}

From 9626e242064019395cfb1885a0c7b4caf63a67ed Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 1 Nov 2019 22:27:55 +0800
Subject: [PATCH 134/222] Add perf test for early constant folding

---
 .../performance/early_constant_folding.xml    | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 dbms/tests/performance/early_constant_folding.xml

diff --git a/dbms/tests/performance/early_constant_folding.xml b/dbms/tests/performance/early_constant_folding.xml
new file mode 100644
index 00000000000..04fb4057d17
--- /dev/null
+++ b/dbms/tests/performance/early_constant_folding.xml
@@ -0,0 +1,23 @@
+<test>
+    <type>loop</type>
+
+    <stop_conditions>
+        <all_of>
+            <total_time_ms>30000</total_time_ms>
+        </all_of>
+        <any_of>
+            <min_time_not_changing_for_ms>5000</min_time_not_changing_for_ms>
+            <total_time_ms>60000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <main_metric>
+        <min_time/>
+    </main_metric>
+
+    <preconditions>
+        <table_exists>default.hits_100m_single</table_exists>
+    </preconditions>
+
+    <query>SELECT count(JavaEnable) FROM default.hits_100m_single WHERE WatchID = 1 OR Title = 'next' OR URL = 'prev' OR OriginalURL = '???' OR 1</query>
+</test>

From 1fbf4e827ba00cfc90714e62ae753e37ea1358f9 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Fri, 1 Nov 2019 22:17:08 +0800
Subject: [PATCH 135/222] ISSUES-7566 sync zh geo function docs

---
 docs/zh/query_language/functions/geo.md | 98 +++++++++++++++++++++----
 1 file changed, 82 insertions(+), 16 deletions(-)

diff --git a/docs/zh/query_language/functions/geo.md b/docs/zh/query_language/functions/geo.md
index 486457e34b3..2182aaa2fcb 100644
--- a/docs/zh/query_language/functions/geo.md
+++ b/docs/zh/query_language/functions/geo.md
@@ -4,7 +4,7 @@
 
 使用[great-circle distance公式](https://en.wikipedia.org/wiki/Great-circle_distance)计算地球表面两点之间的距离。
 
-```
+```sql
 greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
 ```
 
@@ -25,11 +25,11 @@ greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
 
 **示例**
 
-``` sql
+```sql
 SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
 ```
 
-```
+```text
 ┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
 │                                                14132374.194975413 │
 └───────────────────────────────────────────────────────────────────┘
@@ -37,9 +37,10 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
 
 ## pointInEllipses
 
-检查指定的点是否至少包含在一个指定的椭圆中。
+检查指定的点是否至少包含在指定的一个椭圆中。
+下述中的坐标是几何图形在笛卡尔坐标系中的位置。
 
-```
+```sql
 pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
 ```
 
@@ -47,7 +48,7 @@ pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
 
 - `x, y` — 平面上某个点的坐标。
 - `xᵢ, yᵢ` — 第i个椭圆的中心坐标。
-- `aᵢ, bᵢ` — 第i个椭圆的轴，单位：米。
+- `aᵢ, bᵢ` — 以x, y坐标为单位的第i个椭圆的轴。
 
 输入参数的个数必须是`2+4⋅n`，其中`n`是椭圆的数量。
 
@@ -57,11 +58,11 @@ pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
 
 **示例**
 
-``` sql
+```sql
 SELECT pointInEllipses(55.755831, 37.617673, 55.755831, 37.617673, 1.0, 2.0)
 ```
 
-```
+```text
 ┌─pointInEllipses(55.755831, 37.617673, 55.755831, 37.617673, 1., 2.)─┐
 │                                                                   1 │
 └─────────────────────────────────────────────────────────────────────┘
@@ -71,7 +72,7 @@ SELECT pointInEllipses(55.755831, 37.617673, 55.755831, 37.617673, 1.0, 2.0)
 
 检查指定的点是否包含在指定的多边形中。
 
-```
+```sql
 pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
 ```
 
@@ -88,11 +89,11 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
 
 **示例**
 
-``` sql
+```sql
 SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
 ```
 
-```
+```text
 ┌─res─┐
 │   1 │
 └─────┘
@@ -101,7 +102,7 @@ SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
 ## geohashEncode
 
 将经度和纬度编码为geohash-string，请参阅（http://geohash.org/,https://en.wikipedia.org/wiki/Geohash）。
-```
+```sql
 geohashEncode(longitude, latitude, [precision])
 ```
 
@@ -117,11 +118,11 @@ geohashEncode(longitude, latitude, [precision])
 
 **示例**
 
-``` sql
+```sql
 SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
 ```
 
-```
+```text
 ┌─res──────────┐
 │ ezs42d000000 │
 └──────────────┘
@@ -141,14 +142,79 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
 
 **示例**
 
-``` sql
+```sql
 SELECT geohashDecode('ezs42') AS res
 ```
 
-```
+```text
 ┌─res─────────────────────────────┐
 │ (-5.60302734375,42.60498046875) │
 └─────────────────────────────────┘
 ```
 
+
+## geoToH3
+
+计算指定的分辨率的[H3](https://uber.github.io/h3/#/documentation/overview/introduction)索引`(lon, lat)`。
+
+```sql
+geoToH3(lon, lat, resolution)
+```
+
+**输入值**
+
+- `lon` — 经度。 [Float64](../../data_types/float.md)类型。
+- `lat` — 纬度。 [Float64](../../data_types/float.md)类型。
+- `resolution` — 索引的分辨率。 取值范围为: `[0, 15]`。 [UInt8](../../data_types/int_uint.md)类型。
+
+**返回值**
+
+- H3中六边形的索引值。
+- 发生异常时返回0。
+
+[UInt64](../../data_types/int_uint.md)类型。
+
+**示例**
+
+```sql
+SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
+```
+```text
+┌────────────h3Index─┐
+│ 644325524701193974 │
+└────────────────────┘
+```
+
+## geohashesInBox
+
+计算在指定精度下计算最小包含指定的经纬范围的最小图形的geohash数组。
+
+**输入值**
+
+- longitude_min - 最小经度。其值应在`[-180°，180°]`范围内
+- latitude_min - 最小纬度。其值应在`[-90°，90°]`范围内
+- longitude_max - 最大经度。其值应在`[-180°，180°]`范围内
+- latitude_max - 最大纬度。其值应在`[-90°，90°]`范围内
+- precision - geohash的精度。其值应在`[1, 12]`内的`UInt8`类型的数字
+
+请注意，上述所有的坐标参数必须同为`Float32`或`Float64`中的一种类型。
+
+**返回值**
+
+- 包含指定范围内的指定精度的geohash字符串数组。注意，您不应该依赖返回数组中geohash的顺序。
+- [] - 当传入的最小经纬度大于最大经纬度时将返回一个空数组。
+
+请注意，如果生成的数组长度超过10000时，则函数将抛出异常。
+
+**示例**
+
+```sql
+SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos
+```
+```text
+┌─thasos──────────────────────────────────────┐
+│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │
+└─────────────────────────────────────────────┘
+```
+
 [来源文章](https://clickhouse.yandex/docs/en/query_language/functions/geo/) <!--hide-->

From a5abedba2aebdff06d6542bc54eda2b35a5045de Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 18:14:12 +0300
Subject: [PATCH 136/222] Refactor mentions of clickhouse-cpp in docs (#7578)

---
 docs/en/interfaces/cpp.md                          | 5 +++++
 docs/en/interfaces/index.md                        | 1 +
 docs/en/interfaces/third-party/client_libraries.md | 2 --
 docs/fa/interfaces/cpp.md                          | 5 +++++
 docs/fa/interfaces/index.md                        | 1 +
 docs/fa/interfaces/third-party/client_libraries.md | 2 --
 docs/ru/interfaces/cpp.md                          | 5 +++++
 docs/ru/interfaces/index.md                        | 3 ++-
 docs/ru/interfaces/third-party/client_libraries.md | 2 --
 docs/toc_en.yml                                    | 1 +
 docs/toc_fa.yml                                    | 1 +
 docs/toc_ru.yml                                    | 1 +
 docs/toc_zh.yml                                    | 1 +
 docs/zh/interfaces/cpp.md                          | 5 +++++
 docs/zh/interfaces/index.md                        | 1 +
 docs/zh/interfaces/third-party/client_libraries.md | 2 --
 16 files changed, 29 insertions(+), 9 deletions(-)
 create mode 100644 docs/en/interfaces/cpp.md
 create mode 100644 docs/fa/interfaces/cpp.md
 create mode 100644 docs/ru/interfaces/cpp.md
 create mode 100644 docs/zh/interfaces/cpp.md

diff --git a/docs/en/interfaces/cpp.md b/docs/en/interfaces/cpp.md
new file mode 100644
index 00000000000..e10942ad8d5
--- /dev/null
+++ b/docs/en/interfaces/cpp.md
@@ -0,0 +1,5 @@
+# C++ Client Library
+
+See README at [clickhouse-cpp](https://github.com/ClickHouse/clickhouse-cpp) repository.
+
+[Original article](https://clickhouse.yandex/docs/en/interfaces/cpp/) <!--hide-->
diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md
index 3f675867e22..57b22adaa69 100644
--- a/docs/en/interfaces/index.md
+++ b/docs/en/interfaces/index.md
@@ -10,6 +10,7 @@ In most cases it is recommended to use appropriate tool or library instead of in
 * [Command-line client](cli.md)
 * [JDBC driver](jdbc.md)
 * [ODBC driver](odbc.md)
+* [C++ client library](cpp.md)
 
 There are also a wide range of third-party libraries for working with ClickHouse:
 
diff --git a/docs/en/interfaces/third-party/client_libraries.md b/docs/en/interfaces/third-party/client_libraries.md
index a619f647790..9dc47807729 100644
--- a/docs/en/interfaces/third-party/client_libraries.md
+++ b/docs/en/interfaces/third-party/client_libraries.md
@@ -41,8 +41,6 @@
 - C#
     - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
     - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
-- C++
-    - [clickhouse-cpp](https://github.com/ClickHouse/clickhouse-cpp/)
 - Elixir
     - [clickhousex](https://github.com/appodeal/clickhousex/)
 - Nim
diff --git a/docs/fa/interfaces/cpp.md b/docs/fa/interfaces/cpp.md
new file mode 100644
index 00000000000..29d53571e94
--- /dev/null
+++ b/docs/fa/interfaces/cpp.md
@@ -0,0 +1,5 @@
+# C++ Client Library
+
+See README at [clickhouse-cpp](https://github.com/ClickHouse/clickhouse-cpp) repository.
+
+[Original article](https://clickhouse.yandex/docs/fa/interfaces/cpp/) <!--hide-->
diff --git a/docs/fa/interfaces/index.md b/docs/fa/interfaces/index.md
index a3e2eb55a5e..57d8e673748 100644
--- a/docs/fa/interfaces/index.md
+++ b/docs/fa/interfaces/index.md
@@ -11,6 +11,7 @@ ClickHouse دو اینترفیس شبکه را فراهم می کند (هر دو
 * [خط فرمان خط](cli.md)
 * [راننده JDBC](jdbc.md)
 * [راننده ODBC](odbc.md)
+* [C ++ کتابخانه مشتری](cpp.md)
 
 همچنین برای کار با ClickHouse طیف گسترده ای از کتابخانه های شخص ثالث وجود دارد:
 * [کتابخانه های مشتری](third-party/client_libraries.md)
diff --git a/docs/fa/interfaces/third-party/client_libraries.md b/docs/fa/interfaces/third-party/client_libraries.md
index 48034195fab..e5c0d1bf888 100644
--- a/docs/fa/interfaces/third-party/client_libraries.md
+++ b/docs/fa/interfaces/third-party/client_libraries.md
@@ -40,8 +40,6 @@
 - C#
     - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
     - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
-- C++
-    - [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp/)
 - Elixir
     - [clickhousex](https://github.com/appodeal/clickhousex/)
 - Nim
diff --git a/docs/ru/interfaces/cpp.md b/docs/ru/interfaces/cpp.md
new file mode 100644
index 00000000000..00cbbcb411a
--- /dev/null
+++ b/docs/ru/interfaces/cpp.md
@@ -0,0 +1,5 @@
+# C++ клиентская библиотека
+
+См. README в репозитории [clickhouse-cpp](https://github.com/ClickHouse/clickhouse-cpp).
+
+[Оригинальная статья](https://clickhouse.yandex/docs/ru/interfaces/cpp/) <!--hide-->
diff --git a/docs/ru/interfaces/index.md b/docs/ru/interfaces/index.md
index dd09427d5ba..4679641872e 100644
--- a/docs/ru/interfaces/index.md
+++ b/docs/ru/interfaces/index.md
@@ -9,7 +9,8 @@ ClickHouse предоставляет два сетевых интерфейса
 
 * [Консольный клиент](cli.md);
 * [JDBC-драйвер](jdbc.md);
-* [ODBC-драйвер](odbc.md).
+* [ODBC-драйвер](odbc.md);
+* [C++ клиентская библиотека](cpp.md).
 
 Существует также широкий спектр сторонних библиотек для работы с ClickHouse:
 
diff --git a/docs/ru/interfaces/third-party/client_libraries.md b/docs/ru/interfaces/third-party/client_libraries.md
index 562b863922e..8633cfc13ee 100644
--- a/docs/ru/interfaces/third-party/client_libraries.md
+++ b/docs/ru/interfaces/third-party/client_libraries.md
@@ -39,8 +39,6 @@
 - C#
     - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
     - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
-- C++
-    - [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp/)
 - Elixir
     - [clickhousex](https://github.com/appodeal/clickhousex/)
 - Nim
diff --git a/docs/toc_en.yml b/docs/toc_en.yml
index 7476407021e..2e0a7c5efa7 100644
--- a/docs/toc_en.yml
+++ b/docs/toc_en.yml
@@ -26,6 +26,7 @@ nav:
   - 'Input and Output Formats': 'interfaces/formats.md'
   - 'JDBC Driver': 'interfaces/jdbc.md'
   - 'ODBC Driver': 'interfaces/odbc.md'
+  - 'C++ Client Library': 'interfaces/cpp.md'
   - 'Third-Party':
     - 'Client Libraries': 'interfaces/third-party/client_libraries.md'
     - 'Integrations': 'interfaces/third-party/integrations.md'
diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml
index afbe8709a47..8c8dea64147 100644
--- a/docs/toc_fa.yml
+++ b/docs/toc_fa.yml
@@ -26,6 +26,7 @@ nav:
   - ' فرمت های Input و Output': 'interfaces/formats.md'
   - ' درایور JDBC': 'interfaces/jdbc.md'
   - ' درایور ODBC': 'interfaces/odbc.md'
+  - 'C ++ کتابخانه مشتری': 'interfaces/cpp.md'
   - 'Third-party':
     - 'کتابخانه های مشتری': 'interfaces/third-party/client_libraries.md'
     - 'یکپارچگی': 'interfaces/third-party/integrations.md'
diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml
index 806e4377ea7..66ae2b1891f 100644
--- a/docs/toc_ru.yml
+++ b/docs/toc_ru.yml
@@ -27,6 +27,7 @@ nav:
   - 'Форматы входных и выходных данных': 'interfaces/formats.md'
   - 'JDBC-драйвер': 'interfaces/jdbc.md'
   - 'ODBC-драйвер': 'interfaces/odbc.md'
+  - 'C++ клиентская библиотека': 'interfaces/cpp.md'
   - 'От сторонних разработчиков':
     - 'Клиентские библиотеки': 'interfaces/third-party/client_libraries.md'
     - 'Интеграции': 'interfaces/third-party/integrations.md'
diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml
index 8a40a1fb133..be300b6d7ad 100644
--- a/docs/toc_zh.yml
+++ b/docs/toc_zh.yml
@@ -25,6 +25,7 @@ nav:
   - '输入输出格式': 'interfaces/formats.md'
   - 'JDBC 驱动': 'interfaces/jdbc.md'
   - 'ODBC 驱动': 'interfaces/odbc.md'
+  - 'C ++客户端库': 'interfaces/cpp.md'
   - '第三方':
     - '客户端库': 'interfaces/third-party/client_libraries.md'
     - '集成': 'interfaces/third-party/integrations.md'
diff --git a/docs/zh/interfaces/cpp.md b/docs/zh/interfaces/cpp.md
new file mode 100644
index 00000000000..ff791b38d13
--- /dev/null
+++ b/docs/zh/interfaces/cpp.md
@@ -0,0 +1,5 @@
+# C ++客户端库
+
+请参阅以下网站的自述文件[clickhouse-cpp](https://github.com/ClickHouse/clickhouse-cpp)资料库。
+
+[Original article](https://clickhouse.yandex/docs/zh/interfaces/cpp/) <!--hide-->
diff --git a/docs/zh/interfaces/index.md b/docs/zh/interfaces/index.md
index 12b61c3f9fd..5f0e536916c 100644
--- a/docs/zh/interfaces/index.md
+++ b/docs/zh/interfaces/index.md
@@ -9,6 +9,7 @@ ClickHouse提供了两个网络接口（两者都可以选择包装在TLS中以
 * [命令行客户端](cli.md)
 * [JDBC驱动程序](jdbc.md)
 * [ODBC驱动程序](odbc.md)
+* [C++客户端库](cpp.md)
 
 还有许多第三方库可供使用ClickHouse：
 * [客户端库](third-party/client_libraries.md)
diff --git a/docs/zh/interfaces/third-party/client_libraries.md b/docs/zh/interfaces/third-party/client_libraries.md
index 2175f8c89eb..b5df45c3d6b 100644
--- a/docs/zh/interfaces/third-party/client_libraries.md
+++ b/docs/zh/interfaces/third-party/client_libraries.md
@@ -39,8 +39,6 @@
 - C#
     - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
     - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
-- C++
-    - [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp/)
 - Elixir
     - [clickhousex](https://github.com/appodeal/clickhousex/)
 - Nim

From ad76cd74e62cc08f6ceb9eba24e58f381ffbb3c6 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 1 Nov 2019 18:18:57 +0300
Subject: [PATCH 137/222] Cache AR and RANLIB paths

---
 cmake/linux/toolchain-aarch64.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake
index 2dff579e8c0..e3924fdc537 100644
--- a/cmake/linux/toolchain-aarch64.cmake
+++ b/cmake/linux/toolchain-aarch64.cmake
@@ -6,7 +6,8 @@ set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
 
 # We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
-set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar")
+set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE)
+set (CMAKE_RANLIB "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ranlib" CACHE FILEPATH "" FORCE)
 
 set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
 set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")

From 239d92a94c0a474a2408e85ac95bda04c22ba6cb Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 1 Nov 2019 18:31:02 +0300
Subject: [PATCH 138/222] Code cleanup.

---
 dbms/src/Interpreters/BloomFilter.cpp                  |  4 ++--
 dbms/src/Interpreters/BloomFilter.h                    |  7 +++++--
 dbms/src/Interpreters/BloomFilterHash.h                |  6 +++---
 .../Storages/MergeTree/MergeTreeIndexBloomFilter.cpp   |  6 +++---
 .../MergeTree/MergeTreeIndexConditionBloomFilter.cpp   | 10 +++++-----
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp
index 150ddbb5e0c..709dd7fbddf 100644
--- a/dbms/src/Interpreters/BloomFilter.cpp
+++ b/dbms/src/Interpreters/BloomFilter.cpp
@@ -89,7 +89,7 @@ bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed
     return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType)))));
 }
 
-const DataTypePtr getPrimitiveType(const DataTypePtr data_type)
+DataTypePtr BloomFilter::getPrimitiveType(const DataTypePtr & data_type)
 {
     if (const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get()))
     {
@@ -108,7 +108,7 @@ const DataTypePtr getPrimitiveType(const DataTypePtr data_type)
     return data_type;
 }
 
-const ColumnPtr getPrimitiveColumn(const ColumnPtr column)
+ColumnPtr BloomFilter::getPrimitiveColumn(const ColumnPtr & column)
 {
     if (const auto * array_col = typeid_cast<const ColumnArray *>(column.get()))
         return getPrimitiveColumn(array_col->getDataPtr());
diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h
index 319a508e8c0..831f1af6b3f 100644
--- a/dbms/src/Interpreters/BloomFilter.h
+++ b/dbms/src/Interpreters/BloomFilter.h
@@ -50,13 +50,16 @@ private:
     size_t seed;
     size_t words;
     Container filter;
+
+public:
+    static ColumnPtr getPrimitiveColumn(const ColumnPtr & column);
+    static DataTypePtr getPrimitiveType(const DataTypePtr & data_type);
 };
 
 using BloomFilterPtr = std::shared_ptr<BloomFilter>;
 
 bool operator== (const BloomFilter & a, const BloomFilter & b);
 
-const DataTypePtr getPrimitiveType(const DataTypePtr data_type);
-const ColumnPtr getPrimitiveColumn(const ColumnPtr column);
+
 
 }
diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index 64a6fbbbc59..bd1100c7c68 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -86,11 +86,11 @@ struct BloomFilterHash
 
             const auto & offsets = array_col->getOffsets();
             size_t offset = (pos == 0) ? 0 : offsets[pos - 1];
-            limit = std::max(array_col->getDataPtr().get()->size() - offset, limit);
+            limit = std::max(array_col->getData().size() - offset, limit);
         }
 
-        const ColumnPtr actual_col = getPrimitiveColumn(column);
-        const DataTypePtr actual_type = getPrimitiveType(data_type);
+        const ColumnPtr actual_col = BloomFilter::getPrimitiveColumn(column);
+        const DataTypePtr actual_type = BloomFilter::getPrimitiveType(data_type);
 
         auto index_column = ColumnUInt64::create(limit);
         ColumnUInt64::Container & index_column_vec = index_column->getData();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 7815cce35f1..24af19b20cf 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -73,14 +73,14 @@ static void assertIndexColumnsType(const Block & header)
 
     const DataTypes & columns_data_types = header.getDataTypes();
 
-    for (size_t index = 0; index < columns_data_types.size(); ++index)
+    for (auto & type : columns_data_types)
     {
-        const IDataType * actual_type = getPrimitiveType(columns_data_types[index]).get();
+        const IDataType * actual_type = BloomFilter::getPrimitiveType(type).get();
         WhichDataType which(actual_type);
 
         if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
             !which.isDateOrDateTime() && !which.isEnum())
-            throw Exception("Unexpected type " + columns_data_types[index]->getName() + " of bloom filter index.",
+            throw Exception("Unexpected type " + type->getName() + " of bloom filter index.",
                             ErrorCodes::ILLEGAL_COLUMN);
     }
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index 3c52eb99198..41f62630c35 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -300,7 +300,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
     {
         size_t position = header.getPositionByName(key_ast->getColumnName());
         const DataTypePtr & index_type = header.getByPosition(position).type;
-        const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(index_type.get());
+        const auto * array_type = typeid_cast<const DataTypeArray *>(index_type.get());
 
         if (function_name == "has")
         {
@@ -309,8 +309,8 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
             if (!array_type)
                 throw Exception("First argument for function has must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-            const DataTypePtr actual_type = getPrimitiveType(array_type->getNestedType());
-            Field converted_field = convertFieldToType(value_field, *actual_type.get(), &*value_type);
+            const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType());
+            Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
             out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
         }
         else
@@ -319,8 +319,8 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
                 throw Exception("An array type of bloom_filter supports only has() function.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
             out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
-            const DataTypePtr actual_type = getPrimitiveType(index_type);
-            Field converted_field = convertFieldToType(value_field, *actual_type.get(), &*value_type);
+            const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
+            Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
             out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
         }
 

From c13e330a1dabb8c510efcf907480908fb24cbb30 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 18:53:14 +0300
Subject: [PATCH 139/222] fix python issue reported by lgtm.com

`Call to ArgumentError.__init__ with too few arguments; should be no fewer than 2.`
---
 utils/s3tools/s3uploader | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader
index 25d4abbd375..cb1cd52228e 100755
--- a/utils/s3tools/s3uploader
+++ b/utils/s3tools/s3uploader
@@ -92,14 +92,14 @@ if __name__ == "__main__":
     parser.add_argument('--tmp-prefix', default='/tmp',
                         help='Prefix to store temporay downloaded file')
     data_group = parser.add_mutually_exclusive_group(required=True)
-    data_group.add_argument('--table-name',
+    table_name_argument = data_group.add_argument('--table-name',
                             help='Name of table with database, if you are uploading partitions')
     data_group.add_argument('--file-path',
                             help='Name of file, if you are uploading')
     args = parser.parse_args()
 
     if args.table_name is not None and args.clickhouse_data_path is None:
-        raise argparse.ArgumentError(
+        raise argparse.ArgumentError(table_name_argument,
             "You should specify --clickhouse-data-path to upload --table")
 
     s3_conn = S3API(

From 9fd7fee9ba05c0d80b2d98d36a2ac7305d90efd9 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 18:57:14 +0300
Subject: [PATCH 140/222] remove unused format argument

---
 utils/release/push_packages | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/release/push_packages b/utils/release/push_packages
index c2ab47b6e85..13bd0be709a 100755
--- a/utils/release/push_packages
+++ b/utils/release/push_packages
@@ -165,7 +165,7 @@ def clear_old_incoming_packages(ssh_connection, user):
                         user=user, pkg=pkg, release_type=release_type))
                 else:
                     ssh_connection.execute("rm /home/{user}/incoming/clickhouse/{pkg}/*".format(
-                        user=user, pkg=pkg, release_type=release_type))
+                        user=user, pkg=pkg))
             except Exception:
                 logging.info("rm is not required")
 

From e9e05dfedd29d66ae36084067f2f2110346320b0 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:01:34 +0300
Subject: [PATCH 141/222] remove duplicate id tags on website front page
 (#7582)

---
 website/index.html | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/website/index.html b/website/index.html
index fcae470547e..afe8d2abcf5 100644
--- a/website/index.html
+++ b/website/index.html
@@ -131,24 +131,24 @@
             <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd" stroke-linecap="round">
                 <g id="Clickhouse_2" transform="translate(-558.000000, -1293.000000)">
                     <g id="Group-11" transform="translate(558.000000, 1293.000000)">
-                        <use id="Rectangle-33" stroke="#FFCC00" mask="url(#mask-2)" stroke-width="4" xlink:href="#path-1"></use>
-                        <use id="Rectangle-33" stroke="#FFCC00" mask="url(#mask-4)" stroke-width="4" xlink:href="#path-3"></use>
-                        <path d="M0.989013672,17.017334 L16.8210449,1.16748047" id="Path-26" stroke="#FFCC00" stroke-width="2"></path>
-                        <path d="M59.0788574,74.9973145 L74.7983398,59.2650146" id="Path-26" stroke="#FFCC00" stroke-width="2"></path>
-                        <path d="M59.1091309,17.1687012 L74.9368896,1.10351562" id="Path-26-Copy" stroke="#FFCC00" stroke-width="2"></path>
-                        <path d="M1.07910156,17.2504883 L26.0395508,33.4033203" id="Path-26" stroke="#FFCC00" stroke-width="2"></path>
-                        <path d="M17.2602539,1.18457031 L34.0175781,25.1796875" id="Path-26" stroke="#FFCC00" stroke-width="2"></path>
-                        <path d="M51.2958984,25.4736328 L58.8277588,17" id="Path-26-Copy" stroke="#FFCC00" stroke-width="2"></path>
-                        <path d="M1.01904297,50.942627 L25.9216309,75.064209" id="Path-26" stroke="#FFCC00" stroke-width="2" transform="translate(13.470337, 63.003418) scale(-1, 1) translate(-13.470337, -63.003418) "></path>
-                        <path d="M44.1804199,51.300293 L58.9638672,75.010498" id="Path-26" stroke="#FFCC00" stroke-width="2"></path>
-                        <path d="M52.0131836,43.1345215 L75.0227051,58.9299316" id="Path-26" stroke="#FFCC00" stroke-width="2"></path>
+                        <use stroke="#FFCC00" mask="url(#mask-2)" stroke-width="4" xlink:href="#path-1"></use>
+                        <use stroke="#FFCC00" mask="url(#mask-4)" stroke-width="4" xlink:href="#path-3"></use>
+                        <path d="M0.989013672,17.017334 L16.8210449,1.16748047" stroke="#FFCC00" stroke-width="2"></path>
+                        <path d="M59.0788574,74.9973145 L74.7983398,59.2650146" stroke="#FFCC00" stroke-width="2"></path>
+                        <path d="M59.1091309,17.1687012 L74.9368896,1.10351562" stroke="#FFCC00" stroke-width="2"></path>
+                        <path d="M1.07910156,17.2504883 L26.0395508,33.4033203" stroke="#FFCC00" stroke-width="2"></path>
+                        <path d="M17.2602539,1.18457031 L34.0175781,25.1796875" stroke="#FFCC00" stroke-width="2"></path>
+                        <path d="M51.2958984,25.4736328 L58.8277588,17" stroke="#FFCC00" stroke-width="2"></path>
+                        <path d="M1.01904297,50.942627 L25.9216309,75.064209" stroke="#FFCC00" stroke-width="2" transform="translate(13.470337, 63.003418) scale(-1, 1) translate(-13.470337, -63.003418) "></path>
+                        <path d="M44.1804199,51.300293 L58.9638672,75.010498" stroke="#FFCC00" stroke-width="2"></path>
+                        <path d="M52.0131836,43.1345215 L75.0227051,58.9299316" stroke="#FFCC00" stroke-width="2"></path>
                         <g id="Group-3" transform="translate(25.000000, 24.000000)" stroke="#444444">
-                            <use id="Rectangle-33" mask="url(#mask-6)" stroke-width="4" xlink:href="#path-5"></use>
-                            <use id="Rectangle-33" mask="url(#mask-8)" stroke-width="4" xlink:href="#path-7"></use>
-                            <path d="M19.2587891,1.08825684 L26.7729492,8.8046875" id="Path-26" stroke-width="2" transform="translate(23.015869, 4.946472) scale(-1, 1) translate(-23.015869, -4.946472) "></path>
-                            <path d="M1.05773926,1.04125977 L8.82080078,8.9654541" id="Path-26" stroke-width="2" transform="translate(4.939270, 5.003357) scale(-1, 1) translate(-4.939270, -5.003357) "></path>
-                            <path d="M1.12487793,18.887207 L9.26220703,26.8897705" id="Path-26" stroke-width="2" transform="translate(5.193542, 22.888489) scale(-1, 1) translate(-5.193542, -22.888489) "></path>
-                            <path d="M19.038208,19.1968994 L26.9085693,26.9760742" id="Path-26" stroke-width="2" transform="translate(22.973389, 23.086487) scale(-1, 1) translate(-22.973389, -23.086487) "></path>
+                            <use mask="url(#mask-6)" stroke-width="4" xlink:href="#path-5"></use>
+                            <use mask="url(#mask-8)" stroke-width="4" xlink:href="#path-7"></use>
+                            <path d="M19.2587891,1.08825684 L26.7729492,8.8046875" stroke-width="2" transform="translate(23.015869, 4.946472) scale(-1, 1) translate(-23.015869, -4.946472) "></path>
+                            <path d="M1.05773926,1.04125977 L8.82080078,8.9654541" stroke-width="2" transform="translate(4.939270, 5.003357) scale(-1, 1) translate(-4.939270, -5.003357) "></path>
+                            <path d="M1.12487793,18.887207 L9.26220703,26.8897705" stroke-width="2" transform="translate(5.193542, 22.888489) scale(-1, 1) translate(-5.193542, -22.888489) "></path>
+                            <path d="M19.038208,19.1968994 L26.9085693,26.9760742" stroke-width="2" transform="translate(22.973389, 23.086487) scale(-1, 1) translate(-22.973389, -23.086487) "></path>
                         </g>
                     </g>
                 </g>

From 394e242328e9ed30333871be2e4ff9ab8726e7e4 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:03:43 +0300
Subject: [PATCH 142/222] Update CODEOWNERS

---
 .github/CODEOWNERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index c93accf1009..257040c68b7 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,4 @@
 dbms/* @ClickHouse/core-assigner
+utils/* @ClickHouse/core-assigner
 docs/* @ClickHouse/docs
 docs/zh/* @ClickHouse/docs-zh

From 9af95b9d7ab5f769ef88a63ababe3090c1c0abd1 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:06:33 +0300
Subject: [PATCH 143/222] [make_changelog] avoid except without specifying
 class

---
 utils/make_changelog.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/utils/make_changelog.py b/utils/make_changelog.py
index 40070d62693..67423d5e39c 100755
--- a/utils/make_changelog.py
+++ b/utils/make_changelog.py
@@ -35,7 +35,7 @@ def http_get_json(url, token, max_retries, retry_timeout):
                         logging.warning(msg)
                         time.sleep(retry_timeout)
                         continue
-                except:
+                except Exception:
                     pass
 
             raise Exception(msg)
@@ -60,7 +60,7 @@ def get_merge_base(first, second, project_root):
         sha = tuple(filter(len, text.split()))[0]
         check_sha(sha)
         return sha
-    except:
+    except Exception:
         logging.error('Cannot find merge base for %s and %s', first, second)
         raise
 
@@ -198,7 +198,7 @@ def process_unknown_commits(commits, commits_info, users):
                 # First, try get name from github user
                 try:
                     name = users[login]['name']
-                except:
+                except KeyError:
                     pass
             else:
                 login = 'Unknown'
@@ -207,7 +207,7 @@ def process_unknown_commits(commits, commits_info, users):
             if not name:
                 try:
                     name = info['commit']['author']['name']
-                except:
+                except KeyError:
                     pass
 
             author = '[{}]({})'.format(name or login, info['author']['html_url'])

From 4765b7bee5b04324dca1d157b22b701dddf12663 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:08:29 +0300
Subject: [PATCH 144/222] [utils/kafka] provide some exit code from main

---
 utils/kafka/consume.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/kafka/consume.py b/utils/kafka/consume.py
index 34a1997d3e6..c82901f9e0e 100755
--- a/utils/kafka/consume.py
+++ b/utils/kafka/consume.py
@@ -34,6 +34,7 @@ def main():
     pprint(client.poll(10000))
     client.unsubscribe()
     client.close()
+    return 0
 
 
 if __name__ == "__main__":

From ac7fb0084901a23a6dd2642d9c9aa746eef8cdfb Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:09:35 +0300
Subject: [PATCH 145/222] Update manage.py

---
 utils/kafka/manage.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/kafka/manage.py b/utils/kafka/manage.py
index 13bc2fa0388..01847c7675b 100755
--- a/utils/kafka/manage.py
+++ b/utils/kafka/manage.py
@@ -35,6 +35,7 @@ def main():
         print(client.delete_topics(args.delete))
 
     client.close()
+    return 0
 
 
 if __name__ == "__main__":

From 659b4672905419d7519bcaac950d03f9e1dec20e Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:10:15 +0300
Subject: [PATCH 146/222] Update produce.py

---
 utils/kafka/produce.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/kafka/produce.py b/utils/kafka/produce.py
index 218471e4840..97e2e6b7705 100755
--- a/utils/kafka/produce.py
+++ b/utils/kafka/produce.py
@@ -77,6 +77,7 @@ def main():
 
     client.flush()
     client.close()
+    return 0
 
 
 if __name__ == "__main__":

From d0c65b491acfef4f7927c725164d58059a4d0e4c Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:10:50 +0300
Subject: [PATCH 147/222] Update status.py

---
 utils/kafka/status.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/kafka/status.py b/utils/kafka/status.py
index 8331a056dff..ff2db1b1265 100755
--- a/utils/kafka/status.py
+++ b/utils/kafka/status.py
@@ -46,6 +46,7 @@ def main():
         consumer.close()
 
     client.close()
+    return 0
 
 
 if __name__ == "__main__":

From 6ae6707eda3ded3fe630562100dc0b76f710266d Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:14:58 +0300
Subject: [PATCH 148/222] Update status.py

---
 utils/kafka/status.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/kafka/status.py b/utils/kafka/status.py
index ff2db1b1265..28ba3c9c36f 100755
--- a/utils/kafka/status.py
+++ b/utils/kafka/status.py
@@ -5,7 +5,6 @@
 import kafka  # … kafka-python
 
 import argparse
-from pprint import pprint
 
 
 def main():

From 8b1dc12baea7d8e1c4d1b2424fe205fc9a0f455a Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Fri, 1 Nov 2019 19:17:21 +0300
Subject: [PATCH 149/222] minor js style fix

---
 website/benchmark.html | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/website/benchmark.html b/website/benchmark.html
index be7eaca1528..433a9138237 100644
--- a/website/benchmark.html
+++ b/website/benchmark.html
@@ -2008,7 +2008,10 @@ function calculate_totals() {
             var k = current_runs[current_run_idx];
 
             var current_ratios = ratios[j][k].filter(
-                function(x, i) { return x && $("#query_checkbox" + i).is(':checked'); })
+                function(x, i) {
+                    return x && $("#query_checkbox" + i).is(':checked');
+                }
+            );
 
             var ratio = Math.pow(
                 current_ratios.reduce(

From de1e5d0e533f280b93985ff54ea90b238ebb061d Mon Sep 17 00:00:00 2001
From: Ernest Poletaev <poletaev.ernest@gmail.com>
Date: Fri, 1 Nov 2019 23:33:43 +0700
Subject: [PATCH 150/222] Fixed compile errors on macOS Catalina

---
 cmake/tools.cmake                             |  2 +-
 contrib/arrow-cmake/CMakeLists.txt            |  4 ++++
 contrib/libcxx-cmake/CMakeLists.txt           |  2 +-
 dbms/src/Functions/isValidUTF8.cpp            |  2 +-
 .../src/Storages/LiveView/StorageLiveView.cpp | 20 -------------------
 5 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/cmake/tools.cmake b/cmake/tools.cmake
index 04e0946ee73..5a183f9eeba 100644
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@@ -1,6 +1,6 @@
 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set (COMPILER_GCC 1)
-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang|AppleClang")
     set (COMPILER_CLANG 1)
 endif ()
 
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index a564f62384d..efd139d45ae 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -70,6 +70,10 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
         --cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
         "${PROTO_DIR}/orc_proto.proto")
 
+if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+  set (CXX11_FLAGS "-std=c++0x")
+endif()
+
 include(${ClickHouse_SOURCE_DIR}/contrib/orc/cmake_modules/CheckSourceCompiles.cmake)
 include(orc_check.cmake)
 configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in"  "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh")
diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt
index 82d11e3e32d..6b222d31e19 100644
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@@ -44,7 +44,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_S
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
 
 target_compile_options(cxx PUBLIC -nostdinc++ -Wno-reserved-id-macro)
-if (OS_DARWIN AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9)
+if (OS_DARWIN AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8 AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11)
     target_compile_options(cxx PUBLIC -Wno-ctad-maybe-unsupported)
 endif ()
 
diff --git a/dbms/src/Functions/isValidUTF8.cpp b/dbms/src/Functions/isValidUTF8.cpp
index ff3c4466115..f255ec08c54 100644
--- a/dbms/src/Functions/isValidUTF8.cpp
+++ b/dbms/src/Functions/isValidUTF8.cpp
@@ -251,7 +251,7 @@ SOFTWARE.
              * pos-240: | 0   0      0  | 0   0            0  | 0   1      15 |
              * pos+112: | 112 113    127|       >= 128        |     >= 128    |
              */
-            tmp1 = _mm_subs_epu8(pos, _mm_set1_epi8(240));
+            tmp1 = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
             range2 = _mm_shuffle_epi8(df_ee_tbl, tmp1);
             tmp2 = _mm_adds_epu8(pos, _mm_set1_epi8(112));
             range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp2));
diff --git a/dbms/src/Storages/LiveView/StorageLiveView.cpp b/dbms/src/Storages/LiveView/StorageLiveView.cpp
index 0e4b6f0b945..06340dfac38 100644
--- a/dbms/src/Storages/LiveView/StorageLiveView.cpp
+++ b/dbms/src/Storages/LiveView/StorageLiveView.cpp
@@ -88,26 +88,6 @@ static void extractDependentTable(ASTSelectQuery & query, String & select_databa
             DB::ErrorCodes::LOGICAL_ERROR);
 }
 
-static void checkAllowedQueries(const ASTSelectQuery & query)
-{
-    if (query.prewhere() || query.final() || query.sample_size())
-        throw Exception("LIVE VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW);
-
-    ASTPtr subquery = extractTableExpression(query, 0);
-    if (!subquery)
-        return;
-
-    if (const auto * ast_select = subquery->as<ASTSelectWithUnionQuery>())
-    {
-        if (ast_select->list_of_selects->children.size() != 1)
-            throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW);
-
-        const auto & inner_query = ast_select->list_of_selects->children.at(0);
-
-        checkAllowedQueries(inner_query->as<ASTSelectQuery &>());
-    }
-}
-
 
 void StorageLiveView::writeIntoLiveView(
     StorageLiveView & live_view,

From 334b91f35129d36a300c921f829ddfef1c948e24 Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Fri, 1 Nov 2019 20:41:07 +0300
Subject: [PATCH 151/222] revrite NonJoinedBlockInputStream (in progress)

---
 dbms/src/Interpreters/Join.cpp                | 408 ++++++++----------
 dbms/src/Interpreters/Join.h                  |  16 +-
 .../00974_full_outer_join.reference           |   6 +-
 .../00999_full_join_dup_keys_crash.reference  |  60 +++
 .../00999_full_join_dup_keys_crash.sql        |  63 +++
 5 files changed, 324 insertions(+), 229 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference
 create mode 100644 dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.sql

diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 0f068343da9..1de58a22282 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -104,13 +104,23 @@ static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column,
     return std::move(column);
 }
 
+static void changeNullability(MutableColumnPtr & mutable_column)
+{
+    ColumnPtr column = std::move(mutable_column);
+    if (auto * nullable = checkAndGetColumn<ColumnNullable>(*column))
+        column = nullable->getNestedColumnPtr();
+    else
+        column = makeNullable(column);
+
+    mutable_column = (*std::move(column)).mutate();
+}
+
 
 Join::Join(std::shared_ptr<AnalyzedJoin> table_join_, const Block & right_sample_block, bool any_take_last_row_)
     : table_join(table_join_)
     , kind(table_join->kind())
     , strictness(table_join->strictness())
     , key_names_right(table_join->keyNamesRight())
-    , required_right_keys(table_join->requiredRightKeys())
     , nullable_right_side(table_join->forceNullableRight())
     , nullable_left_side(table_join->forceNullableLeft())
     , any_take_last_row(any_take_last_row_)
@@ -305,6 +315,13 @@ void Join::setSampleBlock(const Block & block)
 
     ColumnRawPtrs key_columns = JoinCommon::extractKeysForJoin(key_names_right, block, right_table_keys, sample_block_with_columns_to_add);
 
+    initRightBlockStructure();
+    initRequiredRightKeys();
+
+    JoinCommon::createMissedColumns(sample_block_with_columns_to_add);
+    if (nullable_right_side)
+        JoinCommon::convertColumnsToNullable(sample_block_with_columns_to_add);
+
     if (strictness == ASTTableJoin::Strictness::Asof)
     {
         if (kind != ASTTableJoin::Kind::Left and kind != ASTTableJoin::Kind::Inner)
@@ -338,14 +355,6 @@ void Join::setSampleBlock(const Block & block)
         /// Choose data structure to use for JOIN.
         init(chooseMethod(key_columns, key_sizes));
     }
-
-    blocklist_sample = Block(block.getColumnsWithTypeAndName());
-    prepareBlockListStructure(blocklist_sample);
-
-    JoinCommon::createMissedColumns(sample_block_with_columns_to_add);
-
-    if (nullable_right_side)
-        JoinCommon::convertColumnsToNullable(sample_block_with_columns_to_add);
 }
 
 namespace
@@ -459,39 +468,57 @@ namespace
     }
 }
 
-void Join::prepareBlockListStructure(Block & stored_block)
+void Join::initRequiredRightKeys()
+{
+    const Names & left_keys = table_join->keyNamesLeft();
+    const Names & right_keys = table_join->keyNamesRight();
+    NameSet required_keys(table_join->requiredRightKeys().begin(), table_join->requiredRightKeys().end());
+
+    for (size_t i = 0; i < right_keys.size(); ++i)
+    {
+        const String & right_key_name = right_keys[i];
+
+        if (required_keys.count(right_key_name) && !required_right_keys.has(right_key_name))
+        {
+            const auto & right_key = right_table_keys.getByName(right_key_name);
+            required_right_keys.insert(right_key);
+            required_right_keys_sources.push_back(left_keys[i]);
+        }
+    }
+}
+
+void Join::initRightBlockStructure()
 {
     if (isRightOrFull(kind))
     {
-        /** Move the key columns to the beginning of the block.
-          * This is where NonJoinedBlockInputStream will expect.
-          */
-        size_t key_num = 0;
-        for (const auto & name : key_names_right)
-        {
-            size_t pos = stored_block.getPositionByName(name);
-            ColumnWithTypeAndName col = stored_block.safeGetByPosition(pos);
-            stored_block.erase(pos);
-            stored_block.insert(key_num, std::move(col));
-            ++key_num;
-        }
+        /// Save keys for NonJoinedBlockInputStream
+        saved_block_sample = right_table_keys.cloneEmpty();
     }
-    else
+    else if (strictness == ASTTableJoin::Strictness::Asof)
     {
-        NameSet erased; /// HOTFIX: there could be duplicates in JOIN ON section
-
-        /// Remove the key columns from stored_block, as they are not needed.
-        /// However, do not erase the ASOF column if this is an asof join
-        for (const auto & name : key_names_right)
-        {
-            if (strictness == ASTTableJoin::Strictness::Asof && name == key_names_right.back())
-                break; // this is the last column so break is OK
-
-            if (!erased.count(name))
-                stored_block.erase(stored_block.getPositionByName(name));
-            erased.insert(name);
-        }
+        /// Save ASOF key
+        saved_block_sample.insert(right_table_keys.safeGetByPosition(right_table_keys.columns() - 1));
     }
+
+    /// Save non key columns
+    for (auto & column : sample_block_with_columns_to_add)
+        saved_block_sample.insert(column);
+}
+
+Block * Join::storeRightBlock(const Block & block)
+{
+    Block structured_block;
+    for (auto & columns : saved_block_sample.getColumnsWithTypeAndName())
+        structured_block.insert(block.getByName(columns.name));
+
+    /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
+    materializeBlockInplace(structured_block);
+
+    if (nullable_right_side)
+        JoinCommon::convertColumnsToNullable(structured_block, (isFull(kind) ? right_table_keys.columns() : 0));
+
+    blocks.push_back(structured_block);
+    return &blocks.back();
 }
 
 bool Join::addJoinedBlock(const Block & block)
@@ -510,20 +537,10 @@ bool Join::addJoinedBlock(const Block & block)
     ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
 
     size_t rows = block.rows();
-
     if (rows)
         has_no_rows_in_maps = false;
 
-    blocks.push_back(block);
-    Block * stored_block = &blocks.back();
-
-    prepareBlockListStructure(*stored_block);
-
-    /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
-    materializeBlockInplace(*stored_block);
-
-    if (nullable_right_side)
-        JoinCommon::convertColumnsToNullable(*stored_block, (isFull(kind) ? key_names_right.size() : 0));
+    Block * stored_block = storeRightBlock(block);
 
     if (kind != ASTTableJoin::Kind::Cross)
     {
@@ -559,7 +576,7 @@ public:
     AddedColumns(const Block & sample_block_with_columns_to_add,
                  const Block & block_with_columns_to_add,
                  const Block & block,
-                 const Block & blocklist_sample,
+                 const Block & saved_block_sample,
                  const ColumnsWithTypeAndName & extras)
     {
         size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
@@ -581,7 +598,7 @@ public:
             addColumn(extra);
 
         for (auto & tn : type_name)
-            right_indexes.push_back(blocklist_sample.getPositionByName(tn.second));
+            right_indexes.push_back(saved_block_sample.getPositionByName(tn.second));
     }
 
     size_t size() const { return columns.size(); }
@@ -791,7 +808,7 @@ void Join::joinBlockImpl(
     ColumnsWithTypeAndName extras;
     if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
         extras.push_back(right_table_keys.getByName(key_names_right.back()));
-    AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block, blocklist_sample, extras);
+    AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block, saved_block_sample, extras);
 
     std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
 
@@ -814,17 +831,14 @@ void Join::joinBlockImpl(
             block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(row_filter, -1);
 
         /// Add join key columns from right block if needed.
-        for (size_t i = 0; i < right_table_keys.columns(); ++i)
+        for (size_t i = 0; i < required_right_keys.columns(); ++i)
         {
-            const auto & right_key = right_table_keys.getByPosition(i);
-            auto & left_name = key_names_left[i];
+            const auto & right_key = required_right_keys.getByPosition(i);
+            const auto & left_name = required_right_keys_sources[i];
 
-            if (required_right_keys.count(right_key.name) && !block.has(right_key.name))
-            {
-                const auto & col = block.getByName(left_name);
-                bool is_nullable = nullable_right_side || right_key.type->isNullable();
-                block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable));
-            }
+            const auto & col = block.getByName(left_name);
+            bool is_nullable = nullable_right_side || right_key.type->isNullable();
+            block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable));
         }
     }
     else
@@ -836,22 +850,19 @@ void Join::joinBlockImpl(
         const IColumn::Filter & filter = null_map_filter.getData();
 
         /// Add join key columns from right block if needed.
-        for (size_t i = 0; i < right_table_keys.columns(); ++i)
+        for (size_t i = 0; i < required_right_keys.columns(); ++i)
         {
-            const auto & right_key = right_table_keys.getByPosition(i);
-            auto & left_name = key_names_left[i];
+            const auto & right_key = required_right_keys.getByPosition(i);
+            const auto & left_name = required_right_keys_sources[i];
 
-            if (required_right_keys.count(right_key.name) && !block.has(right_key.name))
-            {
-                const auto & col = block.getByName(left_name);
-                bool is_nullable = nullable_right_side || right_key.type->isNullable();
+            const auto & col = block.getByName(left_name);
+            bool is_nullable = nullable_right_side || right_key.type->isNullable();
 
-                ColumnPtr thin_column = filterWithBlanks(col.column, filter);
-                block.insert(correctNullability({thin_column, col.type, right_key.name}, is_nullable, null_map_filter));
+            ColumnPtr thin_column = filterWithBlanks(col.column, filter);
+            block.insert(correctNullability({thin_column, col.type, right_key.name}, is_nullable, null_map_filter));
 
-                if constexpr (is_all_join)
-                    right_keys_to_replicate.push_back(block.getPositionByName(right_key.name));
-            }
+            if constexpr (is_all_join)
+                right_keys_to_replicate.push_back(block.getPositionByName(right_key.name));
         }
     }
 
@@ -1012,10 +1023,6 @@ struct AdderNonJoined<ASTTableJoin::Strictness::Any, Mapped>
         for (size_t j = 0; j < columns_right.size(); ++j)
         {
             const auto & mapped_column = mapped.block->getByPosition(j).column;
-#ifndef NDEBUG
-            if (columns_right[j]->isNullable() != mapped_column->isNullable())
-                throw Exception("Wrong columns nullability", ErrorCodes::LOGICAL_ERROR);
-#endif
             columns_right[j]->insertFrom(*mapped_column, mapped.row_num);
         }
 
@@ -1033,10 +1040,6 @@ struct AdderNonJoined<ASTTableJoin::Strictness::All, Mapped>
             for (size_t j = 0; j < columns_right.size(); ++j)
             {
                 const auto & mapped_column = it->block->getByPosition(j).column;
-#ifndef NDEBUG
-                if (columns_right[j]->isNullable() != mapped_column->isNullable())
-                    throw Exception("Wrong columns nullability", ErrorCodes::LOGICAL_ERROR);
-#endif
                 columns_right[j]->insertFrom(*mapped_column, it->row_num);
             }
 
@@ -1062,61 +1065,55 @@ public:
         : parent(parent_)
         , max_block_size(max_block_size_)
     {
-        const Names & key_names_left = parent_.table_join->keyNamesLeft();
-
-        /** left_sample_block contains keys and "left" columns.
-          * result_sample_block - keys, "left" columns, and "right" columns.
-          */
-
-        std::vector<bool> is_left_key(left_sample_block.columns(), false);
-        std::vector<size_t> key_positions_left;
-        key_positions_left.reserve(key_names_left.size());
-
-        for (const std::string & key : key_names_left)
+        /// Left or right keys map. In case of collisions it contains any right_key that has data for left one.
+        std::unordered_map<size_t, size_t> left_to_right_key_position;
+        for (size_t i = 0; i < parent.table_join->keyNamesLeft().size(); ++i)
         {
-            size_t key_pos = left_sample_block.getPositionByName(key);
-            key_positions_left.push_back(key_pos);
-            is_left_key[key_pos] = true;
+            const String & left_key_name = parent.table_join->keyNamesLeft()[i];
+            const String & right_key_name = parent.table_join->keyNamesRight()[i];
+
+            size_t left_key_pos = left_sample_block.getPositionByName(left_key_name);
+            size_t right_key_pos = parent.saved_block_sample.getPositionByName(right_key_name);
+
+            left_to_right_key_position[left_key_pos] = right_key_pos;
         }
 
-        const Block & right_sample_block = parent.sample_block_with_columns_to_add;
+        makeResultSampleBlock(left_sample_block);
 
-        std::unordered_map<size_t, size_t> left_to_right_key_map;
-        makeResultSampleBlock(left_sample_block, right_sample_block, key_positions_left, left_to_right_key_map);
-
-        auto nullability_changes = getNullabilityChanges(parent.right_table_keys, result_sample_block,
-                                                         key_positions_left, left_to_right_key_map);
-
-        column_indices_left.reserve(left_sample_block.columns() - key_names_left.size());
-        column_indices_keys_and_right.reserve(key_names_left.size() + right_sample_block.columns());
-        key_nullability_changes.reserve(key_positions_left.size());
-
-        /// Use right key columns if present. @note left & right key columns could have different nullability.
-        for (size_t key_pos : key_positions_left)
+        bool join_using = parent.table_join->hasUsing();
+        for (size_t left_pos = 0; left_pos < left_sample_block.columns(); ++left_pos)
         {
-            /// Here we establish the mapping between key columns of the left- and right-side tables.
-            /// key_pos index is inserted in the position corresponding to key column in parent.blocks
-            /// (saved blocks of the right-side table) and points to the same key column
-            /// in the left_sample_block and thus in the result_sample_block.
-
-            auto it = left_to_right_key_map.find(key_pos);
-            if (it != left_to_right_key_map.end())
+            /// We need right 'x' for 'RIGHT JOIN ... USING(x)'.
+            if (join_using && left_to_right_key_position.count(left_pos))
             {
-                column_indices_left.push_back(key_pos);
-                key_pos = it->second;
+                size_t right_key_pos = left_to_right_key_position[left_pos];
+                setRightIndex(right_key_pos, left_pos);
             }
-
-            column_indices_keys_and_right.push_back(key_pos);
-            key_nullability_changes.push_back(nullability_changes.count(key_pos));
+            else
+                column_indices_left.emplace_back(left_pos);
         }
 
-        for (size_t i = 0; i < left_sample_block.columns(); ++i)
-            if (!is_left_key[i])
-                column_indices_left.emplace_back(i);
+        for (size_t right_pos = 0; right_pos < parent.saved_block_sample.columns(); ++right_pos)
+        {
+            const String & name = parent.saved_block_sample.getByPosition(right_pos).name;
+            if (!result_sample_block.has(name))
+                continue;
 
-        size_t num_additional_keys = left_to_right_key_map.size();
-        for (size_t i = left_sample_block.columns(); i < result_sample_block.columns() - num_additional_keys; ++i)
-            column_indices_keys_and_right.emplace_back(i);
+            size_t result_position = result_sample_block.getPositionByName(name);
+
+            /// Don't remap left keys twice. We need only qualified right keys here
+            if (result_position < left_sample_block.columns())
+                continue;
+
+            setRightIndex(right_pos, result_position);
+        }
+
+        if (column_indices_left.size() + column_indices_right.size() + same_result_keys.size() != result_sample_block.columns())
+            throw Exception("Error in columns mapping in RIGHT|FULL JOIN. Left: " + toString(column_indices_left.size()) +
+                            ", right: " + toString(column_indices_right.size()) +
+                            ", same: " + toString(same_result_keys.size()) +
+                            ", result: " + toString(result_sample_block.columns()),
+                            ErrorCodes::LOGICAL_ERROR);
     }
 
     String getName() const override { return "NonJoined"; }
@@ -1137,104 +1134,118 @@ private:
     UInt64 max_block_size;
 
     Block result_sample_block;
-    /// Indices of columns in result_sample_block that come from the left-side table (except shared right+left key columns).
-    ColumnNumbers column_indices_left;
-    /// Indices of key columns in result_sample_block or columns that come from the right-side table.
-    /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks.
-    ColumnNumbers column_indices_keys_and_right;
-    /// Which key columns need change nullability (right is nullable and left is not or vice versa)
-    std::vector<bool> key_nullability_changes;
+    /// Indices of columns in result_sample_block that come from the left-side table: left_pos == result_pos
+    std::vector<size_t> column_indices_left;
+    /// Indices of columns that come from the right-side table: right_pos -> result_pos
+    std::unordered_map<size_t, size_t> column_indices_right;
+    ///
+    std::unordered_map<size_t, size_t> same_result_keys;
+    /// Which right columns (saved in parent) need nullability change before placing them in result block
+    std::vector<size_t> right_nullability_changes;
 
     std::any position;
     std::optional<Join::BlockNullmapList::const_iterator> nulls_position;
 
 
-    void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block,
-                               const std::vector<size_t> & key_positions_left,
-                               std::unordered_map<size_t, size_t> & left_to_right_key_map)
+    /// result_sample_block: "left keys", "left" columns, "right" columns, some "right keys"
+    void makeResultSampleBlock(const Block & left_sample_block)
     {
         result_sample_block = materializeBlock(left_sample_block);
         if (parent.nullable_left_side)
             JoinCommon::convertColumnsToNullable(result_sample_block);
 
-        /// Add columns from the right-side table to the block.
-        for (size_t i = 0; i < right_sample_block.columns(); ++i)
+        for (const ColumnWithTypeAndName & src_column : parent.sample_block_with_columns_to_add)
         {
-            const ColumnWithTypeAndName & src_column = right_sample_block.getByPosition(i);
             if (!result_sample_block.has(src_column.name))
                 result_sample_block.insert(src_column.cloneEmpty());
         }
 
-        /// Add join key columns from right block if they has different name.
-        for (size_t i = 0; i < parent.right_table_keys.columns(); ++i)
+        for (auto & required_key : parent.required_right_keys)
         {
-            const auto & right_key = parent.right_table_keys.getByPosition(i);
-            size_t left_key_pos = key_positions_left[i];
+            const auto & right_key = parent.saved_block_sample.getByName(required_key.name);
 
-            if (parent.required_right_keys.count(right_key.name) && !result_sample_block.has(right_key.name))
-            {
-                const auto & col = result_sample_block.getByPosition(left_key_pos);
-                bool is_nullable = (parent.nullable_right_side && isFull(parent.kind)) || right_key.type->isNullable();
-                result_sample_block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable));
-
-                size_t right_key_pos = result_sample_block.getPositionByName(right_key.name);
-                left_to_right_key_map[left_key_pos] = right_key_pos;
-            }
+            bool is_nullable = (parent.nullable_right_side && isFull(parent.kind)) || right_key.column->isNullable();
+            result_sample_block.insert(correctNullability({right_key.column, right_key.type, right_key.name}, is_nullable));
         }
     }
 
+    void setRightIndex(size_t right_pos, size_t result_position)
+    {
+        if (!column_indices_right.count(right_pos))
+        {
+            column_indices_right[right_pos] = result_position;
+
+            if (hasNullabilityChange(right_pos, result_position))
+                right_nullability_changes.push_back(right_pos);
+        }
+        else
+            same_result_keys[result_position] = column_indices_right[right_pos];
+    }
+
+    bool hasNullabilityChange(size_t right_pos, size_t result_pos) const
+    {
+        const auto & src = parent.saved_block_sample.getByPosition(right_pos).column;
+        const auto & dst = result_sample_block.getByPosition(result_pos).column;
+        return src->isNullable() != dst->isNullable();
+    }
+
     Block createBlock()
     {
-        MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left);
-        MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right);
-
-        /// Temporary change destination key columns' nullability according to mapped block
-        changeNullability(columns_keys_and_right, key_nullability_changes);
+        MutableColumns columns_right = parent.saved_block_sample.cloneEmptyColumns();
 
         size_t rows_added = 0;
 
         auto fill_callback = [&](auto, auto strictness, auto & map)
         {
-            rows_added = fillColumnsFromMap<strictness>(map, columns_keys_and_right);
+            rows_added = fillColumnsFromMap<strictness>(map, columns_right);
         };
 
         if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback))
             throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
 
-        fillNullsFromBlocks(columns_keys_and_right, rows_added);
+        fillNullsFromBlocks(columns_right, rows_added);
 
         if (!rows_added)
             return {};
 
-        /// Revert columns nullability
-        changeNullability(columns_keys_and_right, key_nullability_changes);
+        for (size_t pos : right_nullability_changes)
+            changeNullability(columns_right[pos]);
 
         Block res = result_sample_block.cloneEmpty();
 
         /// @note it's possible to make ColumnConst here and materialize it later
-        for (size_t i = 0; i < columns_left.size(); ++i)
-            res.getByPosition(column_indices_left[i]).column = columns_left[i]->cloneResized(rows_added);
+        for (size_t pos : column_indices_left)
+            res.getByPosition(pos).column = res.getByPosition(pos).column->cloneResized(rows_added);
 
-        for (size_t i = 0; i < columns_keys_and_right.size(); ++i)
-            res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]);
-
-        return res;
-    }
-
-    static MutableColumns columnsForIndex(const Block & block, const ColumnNumbers & indices)
-    {
-        size_t num_columns = indices.size();
-
-        MutableColumns columns;
-        columns.resize(num_columns);
-
-        for (size_t i = 0; i < num_columns; ++i)
+        for (auto & pr : column_indices_right)
         {
-            const auto & src_col = block.safeGetByPosition(indices[i]);
-            columns[i] = src_col.type->createColumn();
+            auto & right_column = columns_right[pr.first];
+            auto & result_column = res.getByPosition(pr.second).column;
+#ifndef NDEBUG
+            if (result_column->getName() != right_column->getName())
+                throw Exception("Wrong columns assign in RIGHT|FULL JOIN: " + result_column->getName() +
+                                " " + right_column->getName(), ErrorCodes::LOGICAL_ERROR);
+#endif
+            result_column = std::move(right_column);
         }
 
-        return columns;
+        for (auto & pr : same_result_keys)
+        {
+            auto & src_column = res.getByPosition(pr.second).column;
+            auto & dst_column = res.getByPosition(pr.first).column;
+
+            if (src_column->isNullable() && !dst_column->isNullable())
+            {
+                auto * nullable = checkAndGetColumn<ColumnNullable>(*src_column);
+                dst_column = nullable->getNestedColumnPtr();
+            }
+            else if (!src_column->isNullable() && dst_column->isNullable())
+                dst_column = makeNullable(src_column);
+            else
+                dst_column = src_column;
+        }
+
+        return res;
     }
 
     template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
@@ -1310,47 +1321,6 @@ private:
             }
         }
     }
-
-    static std::unordered_set<size_t> getNullabilityChanges(const Block & right_table_keys, const Block & out_block,
-                                                            const std::vector<size_t> & key_positions,
-                                                            const std::unordered_map<size_t, size_t> & left_to_right_key_map)
-    {
-        std::unordered_set<size_t> nullability_changes;
-
-        for (size_t i = 0; i < key_positions.size(); ++i)
-        {
-            size_t key_pos = key_positions[i];
-
-            auto it = left_to_right_key_map.find(key_pos);
-            if (it != left_to_right_key_map.end())
-                key_pos = it->second;
-
-            const auto & dst = out_block.getByPosition(key_pos).column;
-            const auto & src = right_table_keys.getByPosition(i).column;
-            if (dst->isNullable() != src->isNullable())
-                nullability_changes.insert(key_pos);
-        }
-
-        return nullability_changes;
-    }
-
-    static void changeNullability(MutableColumns & columns, const std::vector<bool> & changes_bitmap)
-    {
-        /// @note changes_bitmap.size() <= columns.size()
-        for (size_t i = 0; i < changes_bitmap.size(); ++i)
-        {
-            if (changes_bitmap[i])
-            {
-                ColumnPtr column = std::move(columns[i]);
-                if (auto * nullable = checkAndGetColumn<ColumnNullable>(*column))
-                    column = nullable->getNestedColumnPtr();
-                else
-                    column = makeNullable(column);
-
-                columns[i] = (*std::move(column)).mutate();
-            }
-        }
-    }
 };
 
 
diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h
index 5cc104d0dac..32ecd09c952 100644
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@@ -283,8 +283,6 @@ private:
 
     /// Names of key columns in right-side table (in the order they appear in ON/USING clause). @note It could contain duplicates.
     const Names & key_names_right;
-    /// Names right-side table keys that are needed in result (would be attached after joined columns).
-    const NameSet required_right_keys;
 
     /// In case of LEFT and FULL joins, if use_nulls, convert right-side columns to Nullable.
     bool nullable_right_side;
@@ -319,9 +317,13 @@ private:
     Block sample_block_with_columns_to_add;
     /// Block with key columns in the same order they appear in the right-side table (duplicates appear once).
     Block right_table_keys;
+    /// Block with key columns right-side table keys that are needed in result (would be attached after joined columns).
+    Block required_right_keys;
+    /// Left table column names that are sources for required_right_keys columns
+    std::vector<String> required_right_keys_sources;
 
     /// Block as it would appear in the BlockList
-    Block blocklist_sample;
+    Block saved_block_sample;
 
     Poco::Logger * log;
 
@@ -340,10 +342,10 @@ private:
       */
     void setSampleBlock(const Block & block);
 
-    /** Take an inserted block and discard everything that does not need to be stored
-     *  Example, remove the keys as they come from the LHS block, but do keep the ASOF timestamps
-     */
-    void prepareBlockListStructure(Block & stored_block);
+    /// Modify (structure) and save right block, @returns pointer to saved block
+    Block * storeRightBlock(const Block & stored_block);
+    void initRightBlockStructure();
+    void initRequiredRightKeys();
 
     template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
     void joinBlockImpl(
diff --git a/dbms/tests/queries/0_stateless/00974_full_outer_join.reference b/dbms/tests/queries/0_stateless/00974_full_outer_join.reference
index 82c1bd051d3..d8e9d3ea067 100644
--- a/dbms/tests/queries/0_stateless/00974_full_outer_join.reference
+++ b/dbms/tests/queries/0_stateless/00974_full_outer_join.reference
@@ -1,5 +1,5 @@
 2015-12-01	0	0
 2015-12-02	1	1
-2015-12-03	0	2
-2015-12-04	0	3
-2015-12-05	0	4
+0000-00-00	0	2
+0000-00-00	0	3
+0000-00-00	0	4
diff --git a/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference b/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference
new file mode 100644
index 00000000000..9467a539421
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference
@@ -0,0 +1,60 @@
+1	2	0	0
+0	0	1	2
+-
+0	0	1	2
+-
+1	2	0	0
+0	0	1	2
+-
+0	0	1	2
+-
+1
+0
+-
+0
+-
+1
+0
+-
+0
+-
+0
+1
+-
+1
+-
+0
+1
+-
+1
+-
+1	2	\N	\N
+\N	\N	8	2
+-
+\N	\N	1	2
+-
+1	2	\N	\N
+\N	\N	1	2
+-
+\N	\N	1	2
+-
+1
+\N
+-
+\N
+-
+1
+\N
+-
+\N
+-
+\N
+8
+-
+1
+-
+\N
+1
+-
+1
+-
diff --git a/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.sql b/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.sql
new file mode 100644
index 00000000000..a04c4e7de1b
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.sql
@@ -0,0 +1,63 @@
+SET join_use_nulls = 0;
+
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+
+SET join_use_nulls = 1;
+
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+SELECT * FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+SELECT foo.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.a = bar.b) AND (foo.b = bar.b);
+SELECT '-';
+
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo FULL JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';
+SELECT bar.a FROM (SELECT 1 AS a, 2 AS b) AS foo RIGHT JOIN (SELECT 1 AS a, 2 AS b) AS bar ON (foo.b = bar.a) AND (foo.b = bar.b);
+SELECT '-';

From 51e9618703792144bec1f120fab16ef819816086 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 1 Nov 2019 21:10:13 +0300
Subject: [PATCH 152/222] review fixes

---
 CHANGELOG.md | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index be9e59bc968..6b461e09422 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@
 * Remove legacy `asterisk_left_columns_only` setting (it was disabled by default).
   [#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Artem
 Zuikov](https://github.com/4ertus2))
+* Format strings for Template data format are now specified in files.
+  [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118)
+([tavplubix](https://github.com/tavplubix))
 
 ### New Feature
 * Introduce uniqCombined64() to calculate cardinality greater than UINT_MAX.
@@ -37,11 +40,15 @@ Kutenin](https://github.com/danlark1))
 * Add a new database engine `Lazy` that is optimized for storing a large number of small -Log
   tables.  [#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Nikita
 Vasilev](https://github.com/nikvas0))
-* Add AND, OR, XOR aggregate function for bitmap columns. [#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang
+* Add aggregate functions groupBitmapAnd, -Or, -Xor for bitmap columns. [#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang
 Yu](https://github.com/yuzhichang))
-* Add aggregate function combinators which fill null or default value when there is nothing to
-  aggregate. [#7331](https://github.com/ClickHouse/ClickHouse/pull/7331)
+* Add aggregate function combinators -OrNull and -OrDefault, which return null
+  or default values when there is nothing to aggregate.
+[#7331](https://github.com/ClickHouse/ClickHouse/pull/7331)
 ([hcz](https://github.com/hczhcz))
+* Introduce CustomSeparated data format that supports custom escaping and
+  delimiter rules.  [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118)
+([tavplubix](https://github.com/tavplubix))
 
 ### Bug Fix
 * Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is
@@ -52,13 +59,13 @@ Popov](https://github.com/CurtizJ))
 Baranov](https://github.com/yurriy))
 * Fix exception `Cannot convert column ... because it is constant but values of constants are
   different in source and result` which could rarely happen when functions `now()`, `today()`,
-`yestarday()`, `randConstant()` are used.
+`yesterday()`, `randConstant()` are used.
 [#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Nikolai
 Kochetov](https://github.com/KochetovNicolai))
 * Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout.
   [#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily
 Nemkov](https://github.com/Enmk))
-* Fixed [#7109](https://github.com/ClickHouse/ClickHouse/issues/7109) groupBitmapOr(31) segfault.
+* Fixed a segmentation fault in groupBitmapOr (issue [#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)).
   [#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Zhichang
 Yu](https://github.com/yuzhichang))
 * For materialized views the commit for Kafka is called after all data were written.
@@ -80,13 +87,14 @@ Kuzmenkov](https://github.com/akuzm))
 * Fix wrong result for some queries given by the optimization of empty IN subqueries and empty
   INNER/RIGHT JOIN. [#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Nikolai
 Kochetov](https://github.com/KochetovNicolai))
+* Fixing AddressSanitizer error in the LIVE VIEW getHeader() method.
+  [#7271](https://github.com/ClickHouse/ClickHouse/pull/7271)
+([vzakaznikov](https://github.com/vzakaznikov))
 
 ### Improvement
 * Add a message in case of queue_wait_max_ms wait takes place.
   [#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat
 Khuzhin](https://github.com/azat))
-* Minor improvements of Template format. [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118)
-  ([tavplubix](https://github.com/tavplubix))
 * Made setting `s3_min_upload_part_size` table-level.
   [#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Vladimir
 Chebotarev](https://github.com/excitoon))
@@ -139,7 +147,7 @@ Bird](https://github.com/amosbird))
 ([tavplubix](https://github.com/tavplubix))
 
 ### Build/Testing/Packaging Improvement
-* Disable some contribs for cross-compilation.
+* Disable some contribs for cross-compilation to Mac OS.
   [#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Ivan](https://github.com/abyss7))
 * Add missing linking with PocoXML for clickhouse_common_io.
   [#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat
@@ -195,14 +203,6 @@ Kuzmenkov](https://github.com/akuzm)), [#7376](https://github.com/ClickHouse/Cli
 DateTime and UUID. This fixes [#7245](https://github.com/ClickHouse/ClickHouse/issues/7245)
 [#7252](https://github.com/ClickHouse/ClickHouse/pull/7252)
 ([alexey-milovidov](https://github.com/alexey-milovidov))
-* Fixing AddressSanitizer error in the LIVE VIEW getHeader() method.
-  [#7271](https://github.com/ClickHouse/ClickHouse/pull/7271)
-([vzakaznikov](https://github.com/vzakaznikov))
-* Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws
-  an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and
-fix comments to make obvious that it may throw.
-[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350)
-([tavplubix](https://github.com/tavplubix))
 * Fixing ThreadSanitizer data race error in the LIVE VIEW when accessing no_users_thread variable.
   [#7353](https://github.com/ClickHouse/ClickHouse/pull/7353)
 ([vzakaznikov](https://github.com/vzakaznikov))
@@ -242,6 +242,11 @@ Zuikov](https://github.com/4ertus2))
 * Improved code readability a little bit (`MergeTreeData::getActiveContainingPart`).
   [#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Vladimir
 Chebotarev](https://github.com/excitoon))
+* Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws
+  an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and
+fix comments to make obvious that it may throw.
+[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350)
+([tavplubix](https://github.com/tavplubix))
 
 ## ClickHouse release 19.15.2.2, 2019-10-01
 

From fa3a2bb53340335b854cf1e4237794772293e839 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 1 Nov 2019 22:16:03 +0300
Subject: [PATCH 153/222] Update CMakeLists.txt

---
 contrib/arrow-cmake/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index efd139d45ae..3ba24f49498 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -70,6 +70,10 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
         --cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
         "${PROTO_DIR}/orc_proto.proto")
 
+# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features. 
+# Apple Clang compiler failed to compile this code without specifying c++11 standard. 
+# As result these compiler features detected as absent. In result it failed to compile orc itself.
+# In orc makefile there is code that sets flags, but arrow-cmake ignores these flags.
 if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
   set (CXX11_FLAGS "-std=c++0x")
 endif()

From d388565d04ce381a7e5964320c3ad48161faf08a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 1 Nov 2019 22:35:50 +0300
Subject: [PATCH 154/222] Add create database query in test.

---
 dbms/tests/integration/test_filesystem_layout/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/tests/integration/test_filesystem_layout/test.py b/dbms/tests/integration/test_filesystem_layout/test.py
index 6dc4e5f8763..cb41085a857 100644
--- a/dbms/tests/integration/test_filesystem_layout/test.py
+++ b/dbms/tests/integration/test_filesystem_layout/test.py
@@ -16,6 +16,7 @@ def started_cluster():
 
 
 def test_file_path_escaping(started_cluster):
+    node.query('CREATE DATABASE IF NOT EXISTS test')
     node.query('''
         CREATE TABLE test.`T.a_b,l-e!` (`~Id` UInt32)
         ENGINE = MergeTree() PARTITION BY `~Id` ORDER BY `~Id`;

From 89d8e6de931a42afaf960e598949c9d4717aa5ce Mon Sep 17 00:00:00 2001
From: stavrolia <insubconsciousness@gmail.com>
Date: Sat, 2 Nov 2019 02:39:43 +0300
Subject: [PATCH 155/222] Fix

---
 .../Formats/Impl/PrettySpaceBlockOutputFormat.cpp           | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
index b8162fa32cd..85655a33004 100644
--- a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
@@ -29,12 +29,6 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind
     Widths name_widths;
     calculateWidths(header, chunk, widths, max_widths, name_widths);
 
-    /// Do not align on too long values.
-    if (terminal_width > 80)
-        for (size_t i = 0; i < num_columns; ++i)
-            if (max_widths[i] > terminal_width / 2)
-                max_widths[i] = terminal_width / 2;
-
     /// Names
     for (size_t i = 0; i < num_columns; ++i)
     {

From d503c38cac396baf568796f94928c7d5eb4a8ae4 Mon Sep 17 00:00:00 2001
From: stavrolia <insubconsciousness@gmail.com>
Date: Sat, 2 Nov 2019 03:23:27 +0300
Subject: [PATCH 156/222] Revert "Merge pull request #7587 from
 ClickHouse/fix-nonsignificant-djvu-bug"

This reverts commit 102a76ab47dd58f8c21d3da7f90cbf0179cff78c, reversing
changes made to dea9220e3cf4b9fdd41a34cdf73ee7d6ee6c3e68.
---
 .../Formats/Impl/PrettySpaceBlockOutputFormat.cpp           | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
index 85655a33004..b8162fa32cd 100644
--- a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
@@ -29,6 +29,12 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind
     Widths name_widths;
     calculateWidths(header, chunk, widths, max_widths, name_widths);
 
+    /// Do not align on too long values.
+    if (terminal_width > 80)
+        for (size_t i = 0; i < num_columns; ++i)
+            if (max_widths[i] > terminal_width / 2)
+                max_widths[i] = terminal_width / 2;
+
     /// Names
     for (size_t i = 0; i < num_columns; ++i)
     {

From 5e46576d20a64f1c5f7175b7c395529428a05af4 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 2 Nov 2019 11:46:35 +0300
Subject: [PATCH 157/222] Update CMakeLists.txt

---
 contrib/libcxx-cmake/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt
index 6b222d31e19..a654ce59d6a 100644
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@@ -44,7 +44,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_S
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
 
 target_compile_options(cxx PUBLIC -nostdinc++ -Wno-reserved-id-macro)
-if (OS_DARWIN AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8 AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11)
+if (OS_DARWIN AND (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11))
     target_compile_options(cxx PUBLIC -Wno-ctad-maybe-unsupported)
 endif ()
 

From e58aa4e579dbbd23b93b867b2a1a9950b20cd244 Mon Sep 17 00:00:00 2001
From: stavrolia <insubconsciousness@gmail.com>
Date: Sat, 2 Nov 2019 11:53:35 +0300
Subject: [PATCH 158/222] One more attempt of bugfix of infinite loop in pretty
 space format

---
 .../Formats/Impl/PrettySpaceBlockOutputFormat.cpp           | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
index b8162fa32cd..85655a33004 100644
--- a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
@@ -29,12 +29,6 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind
     Widths name_widths;
     calculateWidths(header, chunk, widths, max_widths, name_widths);
 
-    /// Do not align on too long values.
-    if (terminal_width > 80)
-        for (size_t i = 0; i < num_columns; ++i)
-            if (max_widths[i] > terminal_width / 2)
-                max_widths[i] = terminal_width / 2;
-
     /// Names
     for (size_t i = 0; i < num_columns; ++i)
     {

From 31a1f36bbb3f9bbca5f1f22c4858471033c49627 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 2 Nov 2019 11:59:30 +0300
Subject: [PATCH 159/222] Update build_cross.md

---
 docs/en/development/build_cross.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/development/build_cross.md b/docs/en/development/build_cross.md
index 15792120158..61f0acf5b76 100644
--- a/docs/en/development/build_cross.md
+++ b/docs/en/development/build_cross.md
@@ -1,5 +1,7 @@
 # How to Build ClickHouse on Linux for Mac OS X
 
+This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with another instruction: https://clickhouse.yandex/docs/en/development/build_osx/
+
 The cross-build for Mac OS X is based on the Build instructions, follow them first.
 
 # Install Clang-8

From ad12099642bfc396bb53a21f8277a66d760bf900 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 2 Nov 2019 13:55:06 +0800
Subject: [PATCH 160/222] ubsan fix

---
 dbms/programs/performance-test/JSONString.h   |  2 +-
 dbms/src/Columns/ColumnVector.cpp             |  2 +-
 dbms/src/Common/HashTable/Hash.h              |  2 +-
 dbms/src/Common/RadixSort.h                   | 10 ++---
 dbms/src/Common/UInt128.h                     | 24 ++++++-----
 .../CompressionCodecDoubleDelta.cpp           |  4 +-
 dbms/src/Compression/CompressionCodecT64.cpp  | 10 ++---
 .../tests/gtest_compressionCodec.cpp          |  2 +-
 dbms/src/Core/AccurateComparison.h            | 15 +++----
 dbms/src/Core/DecimalComparison.h             |  4 +-
 dbms/src/Core/Field.h                         |  2 +-
 dbms/src/Core/SettingsCommon.cpp              |  8 ++--
 dbms/src/Core/Types.h                         |  1 +
 .../GraphiteRollupSortedBlockInputStream.cpp  |  2 +-
 dbms/src/DataTypes/DataTypeNumberBase.cpp     |  8 ++--
 dbms/src/DataTypes/NumberTraits.h             | 30 ++++++-------
 dbms/src/Formats/ProtobufReader.cpp           |  2 +-
 dbms/src/Formats/ProtobufWriter.cpp           |  2 +-
 dbms/src/Functions/FunctionUnaryArithmetic.h  |  2 +-
 dbms/src/Functions/FunctionsConversion.h      |  4 +-
 dbms/src/Functions/FunctionsJSON.h            |  2 +-
 dbms/src/Functions/abs.cpp                    |  4 +-
 dbms/src/Functions/bitWrapperFunc.cpp         |  2 +-
 dbms/src/Functions/intDiv.cpp                 |  2 +-
 dbms/src/Functions/intDiv.h                   |  6 +--
 dbms/src/Functions/roundToExp2.cpp            |  4 +-
 dbms/src/IO/ReadHelpers.h                     | 16 +++----
 dbms/src/IO/WriteHelpers.h                    | 10 ++---
 dbms/src/IO/tests/parse_int_perf2.cpp         |  4 +-
 libs/libcommon/include/common/Types.h         | 43 +++++++++++++++++++
 30 files changed, 136 insertions(+), 93 deletions(-)

diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h
index 5695145442e..ebd850877d7 100644
--- a/dbms/programs/performance-test/JSONString.h
+++ b/dbms/programs/performance-test/JSONString.h
@@ -22,7 +22,7 @@ public:
     void set(const std::string & key, std::string value, bool wrap = true);
 
     template <typename T>
-    std::enable_if_t<std::is_arithmetic_v<T>> set(const std::string key, T value)
+    std::enable_if_t<is_arithmetic_v<T>> set(const std::string key, T value)
     {
         set(key, std::to_string(value), /*wrap= */ false);
     }
diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp
index d2edbe496e7..5ec436cd28b 100644
--- a/dbms/src/Columns/ColumnVector.cpp
+++ b/dbms/src/Columns/ColumnVector.cpp
@@ -112,7 +112,7 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
     else
     {
         /// A case for radix sort
-        if constexpr (std::is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
+        if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
         {
             /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
             if (s >= 256 && s <= std::numeric_limits<UInt32>::max())
diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h
index 0f740163179..90ee89953c0 100644
--- a/dbms/src/Common/HashTable/Hash.h
+++ b/dbms/src/Common/HashTable/Hash.h
@@ -76,7 +76,7 @@ template <typename T, typename Enable = void>
 struct DefaultHash;
 
 template <typename T>
-struct DefaultHash<T, std::enable_if_t<std::is_arithmetic_v<T>>>
+struct DefaultHash<T, std::enable_if_t<is_arithmetic_v<T>>>
 {
     size_t operator() (T key) const
     {
diff --git a/dbms/src/Common/RadixSort.h b/dbms/src/Common/RadixSort.h
index 5a564a6a447..3a832b82bf3 100644
--- a/dbms/src/Common/RadixSort.h
+++ b/dbms/src/Common/RadixSort.h
@@ -165,12 +165,10 @@ struct RadixSortIntTraits
 
 
 template <typename T>
-using RadixSortNumTraits =
-    std::conditional_t<std::is_integral_v<T>,
-        std::conditional_t<std::is_unsigned_v<T>,
-            RadixSortUIntTraits<T>,
-            RadixSortIntTraits<T>>,
-        RadixSortFloatTraits<T>>;
+using RadixSortNumTraits = std::conditional_t<
+    is_integral_v<T>,
+    std::conditional_t<is_unsigned_v<T>, RadixSortUIntTraits<T>, RadixSortIntTraits<T>>,
+    RadixSortFloatTraits<T>>;
 
 
 template <typename Traits>
diff --git a/dbms/src/Common/UInt128.h b/dbms/src/Common/UInt128.h
index 9dc15bba758..1d497485238 100644
--- a/dbms/src/Common/UInt128.h
+++ b/dbms/src/Common/UInt128.h
@@ -182,18 +182,8 @@ struct UInt256HashCRC32
 struct UInt256HashCRC32 : public UInt256Hash {};
 
 #endif
-}
 
-/// Overload hash for type casting
-namespace std
-{
-template <> struct hash<DB::UInt128>
-{
-    size_t operator()(const DB::UInt128 & u) const
-    {
-        return CityHash_v1_0_2::Hash128to64({u.low, u.high});
-    }
-};
+}
 
 template <> struct is_signed<DB::UInt128>
 {
@@ -215,4 +205,16 @@ template <> struct is_arithmetic<DB::UInt128>
 {
     static constexpr bool value = false;
 };
+
+/// Overload hash for type casting
+namespace std
+{
+template <> struct hash<DB::UInt128>
+{
+    size_t operator()(const DB::UInt128 & u) const
+    {
+        return CityHash_v1_0_2::Hash128to64({u.low, u.high});
+    }
+};
+
 }
diff --git a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
index 9a6b551f159..17eeba9a152 100644
--- a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -109,7 +109,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 {
     // Since only unsinged int has granted 2-compliment overflow handling, we are doing math here on unsigned types.
     // To simplify and booletproof code, we operate enforce ValueType to be unsigned too.
-    static_assert(std::is_unsigned_v<ValueType>, "ValueType must be unsigned.");
+    static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
     using UnsignedDeltaType = ValueType;
 
     // We use signed delta type to turn huge unsigned values into smaller signed:
@@ -189,7 +189,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 template <typename ValueType>
 void decompressDataForType(const char * source, UInt32 source_size, char * dest)
 {
-    static_assert(std::is_unsigned_v<ValueType>, "ValueType must be unsigned.");
+    static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
     using UnsignedDeltaType = ValueType;
     using SignedDeltaType = typename std::make_signed<UnsignedDeltaType>::type;
 
diff --git a/dbms/src/Compression/CompressionCodecT64.cpp b/dbms/src/Compression/CompressionCodecT64.cpp
index b53670b9ceb..af55b6ec512 100644
--- a/dbms/src/Compression/CompressionCodecT64.cpp
+++ b/dbms/src/Compression/CompressionCodecT64.cpp
@@ -262,10 +262,10 @@ void reverseTranspose(const char * src, T * buf, UInt32 num_bits, UInt32 tail =
         reverseTransposeBytes(matrix, col, buf[col]);
 }
 
-template <typename T, typename MinMaxT = std::conditional_t<std::is_signed_v<T>, Int64, UInt64>>
+template <typename T, typename MinMaxT = std::conditional_t<is_signed_v<T>, Int64, UInt64>>
 void restoreUpperBits(T * buf, T upper_min, T upper_max [[maybe_unused]], T sign_bit [[maybe_unused]], UInt32 tail = 64)
 {
-    if constexpr (std::is_signed_v<T>)
+    if constexpr (is_signed_v<T>)
     {
         /// Restore some data as negatives and others as positives
         if (sign_bit)
@@ -334,7 +334,7 @@ using Variant = CompressionCodecT64::Variant;
 template <typename T, bool full>
 UInt32 compressData(const char * src, UInt32 bytes_size, char * dst)
 {
-    using MinMaxType = std::conditional_t<std::is_signed_v<T>, Int64, UInt64>;
+    using MinMaxType = std::conditional_t<is_signed_v<T>, Int64, UInt64>;
 
     static constexpr const UInt32 matrix_size = 64;
     static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
@@ -389,7 +389,7 @@ UInt32 compressData(const char * src, UInt32 bytes_size, char * dst)
 template <typename T, bool full>
 void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 uncompressed_size)
 {
-    using MinMaxType = std::conditional_t<std::is_signed_v<T>, Int64, UInt64>;
+    using MinMaxType = std::conditional_t<is_signed_v<T>, Int64, UInt64>;
 
     static constexpr const UInt32 matrix_size = 64;
     static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
@@ -441,7 +441,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco
     if (num_bits < 64)
         upper_min = UInt64(min) >> num_bits << num_bits;
 
-    if constexpr (std::is_signed_v<T>)
+    if constexpr (is_signed_v<T>)
     {
         if (min < 0 && max >= 0 && num_bits < 64)
         {
diff --git a/dbms/src/Compression/tests/gtest_compressionCodec.cpp b/dbms/src/Compression/tests/gtest_compressionCodec.cpp
index 30b9c736de3..32fff70d564 100644
--- a/dbms/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/dbms/src/Compression/tests/gtest_compressionCodec.cpp
@@ -441,7 +441,7 @@ auto SequentialGenerator = [](auto stride = 1)
 template <typename T>
 using uniform_distribution =
 typename std::conditional_t<std::is_floating_point_v<T>, std::uniform_real_distribution<T>,
-        typename std::conditional_t<std::is_integral_v<T>, std::uniform_int_distribution<T>, void>>;
+        typename std::conditional_t<is_integral_v<T>, std::uniform_int_distribution<T>, void>>;
 
 
 template <typename T = Int32>
diff --git a/dbms/src/Core/AccurateComparison.h b/dbms/src/Core/AccurateComparison.h
index e26269b136d..2a27a9abc50 100644
--- a/dbms/src/Core/AccurateComparison.h
+++ b/dbms/src/Core/AccurateComparison.h
@@ -35,10 +35,10 @@ using DB::UInt64;
 // Case 1. Is pair of floats or pair of ints or pair of uints
 template <typename A, typename B>
 constexpr bool is_safe_conversion = (std::is_floating_point_v<A> && std::is_floating_point_v<B>)
-    || (std::is_integral_v<A> && std::is_integral_v<B> && !(std::is_signed_v<A> ^ std::is_signed_v<B>))
+    || (is_integral_v<A> && is_integral_v<B> && !(is_signed_v<A> ^ is_signed_v<B>))
     || (std::is_same_v<A, DB::Int128> && std::is_same_v<B, DB::Int128>)
-    || (std::is_integral_v<A> && std::is_same_v<B, DB::Int128>)
-    || (std::is_same_v<A, DB::Int128> && std::is_integral_v<B>);
+    || (is_integral_v<A> && std::is_same_v<B, DB::Int128>)
+    || (std::is_same_v<A, DB::Int128> && is_integral_v<B>);
 template <typename A, typename B>
 using bool_if_safe_conversion = std::enable_if_t<is_safe_conversion<A, B>, bool>;
 template <typename A, typename B>
@@ -47,8 +47,8 @@ using bool_if_not_safe_conversion = std::enable_if_t<!is_safe_conversion<A, B>,
 
 /// Case 2. Are params IntXX and UIntYY ?
 template <typename TInt, typename TUInt>
-constexpr bool is_any_int_vs_uint = std::is_integral_v<TInt> && std::is_integral_v<TUInt> &&
-                               std::is_signed_v<TInt> && std::is_unsigned_v<TUInt>;
+constexpr bool is_any_int_vs_uint
+    = is_integral_v<TInt> && is_integral_v<TUInt> && is_signed_v<TInt> && is_unsigned_v<TUInt>;
 
 
 // Case 2a. Are params IntXX and UIntYY and sizeof(IntXX) >= sizeof(UIntYY) (in such case will use accurate compare)
@@ -117,9 +117,8 @@ inline bool_if_gt_int_vs_uint<TInt, TUInt> equalsOpTmpl(TUInt a, TInt b)
 
 // Case 3a. Comparison via conversion to double.
 template <typename TAInt, typename TAFloat>
-using bool_if_double_can_be_used = std::enable_if_t<
-                                        std::is_integral_v<TAInt> && (sizeof(TAInt) <= 4) && std::is_floating_point_v<TAFloat>,
-                                        bool>;
+using bool_if_double_can_be_used
+    = std::enable_if_t<is_integral_v<TAInt> && (sizeof(TAInt) <= 4) && std::is_floating_point_v<TAFloat>, bool>;
 
 template <typename TAInt, typename TAFloat>
 inline bool_if_double_can_be_used<TAInt, TAFloat> greaterOpTmpl(TAInt a, TAFloat b)
diff --git a/dbms/src/Core/DecimalComparison.h b/dbms/src/Core/DecimalComparison.h
index 13cbfda32a3..bc676ae86c8 100644
--- a/dbms/src/Core/DecimalComparison.h
+++ b/dbms/src/Core/DecimalComparison.h
@@ -233,9 +233,9 @@ private:
                 overflow |= (A(x) != a);
             if constexpr (sizeof(B) > sizeof(CompareInt))
                 overflow |= (B(y) != b);
-            if constexpr (std::is_unsigned_v<A>)
+            if constexpr (is_unsigned_v<A>)
                 overflow |= (x < 0);
-            if constexpr (std::is_unsigned_v<B>)
+            if constexpr (is_unsigned_v<B>)
                 overflow |= (y < 0);
 
             if constexpr (scale_left)
diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h
index a35bf608e5c..3d34502c339 100644
--- a/dbms/src/Core/Field.h
+++ b/dbms/src/Core/Field.h
@@ -656,7 +656,7 @@ template <> struct TypeName<AggregateFunctionStateData> { static std::string get
 /// char may be signed or unsigned, and behave identically to signed char or unsigned char,
 ///  but they are always three different types.
 /// signedness of char is different in Linux on x86 and Linux on ARM.
-template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<std::is_signed_v<char>, Int64, UInt64>; };
+template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
 template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
 template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
 
diff --git a/dbms/src/Core/SettingsCommon.cpp b/dbms/src/Core/SettingsCommon.cpp
index 2de2202eae3..ddaa294eeb6 100644
--- a/dbms/src/Core/SettingsCommon.cpp
+++ b/dbms/src/Core/SettingsCommon.cpp
@@ -92,9 +92,9 @@ void SettingNumber<bool>::set(const String & x)
 template <typename Type>
 void SettingNumber<Type>::serialize(WriteBuffer & buf) const
 {
-    if constexpr (std::is_integral_v<Type> && std::is_unsigned_v<Type>)
+    if constexpr (is_integral_v<Type> && is_unsigned_v<Type>)
         writeVarUInt(static_cast<UInt64>(value), buf);
-    else if constexpr (std::is_integral_v<Type> && std::is_signed_v<Type>)
+    else if constexpr (is_integral_v<Type> && is_signed_v<Type>)
         writeVarInt(static_cast<Int64>(value), buf);
     else
     {
@@ -106,13 +106,13 @@ void SettingNumber<Type>::serialize(WriteBuffer & buf) const
 template <typename Type>
 void SettingNumber<Type>::deserialize(ReadBuffer & buf)
 {
-    if constexpr (std::is_integral_v<Type> && std::is_unsigned_v<Type>)
+    if constexpr (is_integral_v<Type> && is_unsigned_v<Type>)
     {
         UInt64 x;
         readVarUInt(x, buf);
         set(static_cast<Type>(x));
     }
-    else if constexpr (std::is_integral_v<Type> && std::is_signed_v<Type>)
+    else if constexpr (is_integral_v<Type> && is_signed_v<Type>)
     {
         Int64 x;
         readVarInt(x, buf);
diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h
index 81446180cdc..bbc309aff94 100644
--- a/dbms/src/Core/Types.h
+++ b/dbms/src/Core/Types.h
@@ -3,6 +3,7 @@
 #include <cstdint>
 #include <string>
 #include <vector>
+#include <common/Types.h>
 
 
 namespace DB
diff --git a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp
index 326141b0140..340e10df12f 100644
--- a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp
@@ -103,7 +103,7 @@ Graphite::RollupRule GraphiteRollupSortedBlockInputStream::selectPatternForPath(
 
 UInt32 GraphiteRollupSortedBlockInputStream::selectPrecision(const Graphite::Retentions & retentions, time_t time) const
 {
-    static_assert(std::is_signed_v<time_t>, "time_t must be signed type");
+    static_assert(is_signed_v<time_t>, "time_t must be signed type");
 
     for (const auto & retention : retentions)
     {
diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp
index ea0494cfee0..51a0abd1f49 100644
--- a/dbms/src/DataTypes/DataTypeNumberBase.cpp
+++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp
@@ -26,7 +26,7 @@ void DataTypeNumberBase<T>::deserializeText(IColumn & column, ReadBuffer & istr,
 {
     T x;
 
-    if constexpr (std::is_integral_v<T> && std::is_arithmetic_v<T>)
+    if constexpr (is_integral_v<T> && is_arithmetic_v<T>)
         readIntTextUnsafe(x, istr);
     else
         readText(x, istr);
@@ -68,7 +68,7 @@ void DataTypeNumberBase<T>::serializeTextJSON(const IColumn & column, size_t row
     auto x = assert_cast<const ColumnVector<T> &>(column).getData()[row_num];
     bool is_finite = isFinite(x);
 
-    const bool need_quote = (std::is_integral_v<T> && (sizeof(T) == 8) && settings.json.quote_64bit_integers)
+    const bool need_quote = (is_integral_v<T> && (sizeof(T) == 8) && settings.json.quote_64bit_integers)
         || (settings.json.quote_denormals && !is_finite);
 
     if (need_quote)
@@ -242,13 +242,13 @@ MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
 template <typename T>
 bool DataTypeNumberBase<T>::isValueRepresentedByInteger() const
 {
-    return std::is_integral_v<T>;
+    return is_integral_v<T>;
 }
 
 template <typename T>
 bool DataTypeNumberBase<T>::isValueRepresentedByUnsignedInteger() const
 {
-    return std::is_integral_v<T> && std::is_unsigned_v<T>;
+    return is_integral_v<T> && is_unsigned_v<T>;
 }
 
 
diff --git a/dbms/src/DataTypes/NumberTraits.h b/dbms/src/DataTypes/NumberTraits.h
index 00a0d297d72..63edafa6810 100644
--- a/dbms/src/DataTypes/NumberTraits.h
+++ b/dbms/src/DataTypes/NumberTraits.h
@@ -66,7 +66,7 @@ template <> struct Construct<true, true, 8> { using Type = Float64; };
 template <typename A, typename B> struct ResultOfAdditionMultiplication
 {
     using Type = typename Construct<
-        std::is_signed_v<A> || std::is_signed_v<B>,
+        is_signed_v<A> || is_signed_v<B>,
         std::is_floating_point_v<A> || std::is_floating_point_v<B>,
         nextSize(max(sizeof(A), sizeof(B)))>::Type;
 };
@@ -91,7 +91,7 @@ template <typename A, typename B> struct ResultOfFloatingPointDivision
 template <typename A, typename B> struct ResultOfIntegerDivision
 {
     using Type = typename Construct<
-        std::is_signed_v<A> || std::is_signed_v<B>,
+        is_signed_v<A> || is_signed_v<B>,
         false,
         sizeof(A)>::Type;
 };
@@ -101,7 +101,7 @@ template <typename A, typename B> struct ResultOfIntegerDivision
 template <typename A, typename B> struct ResultOfModulo
 {
     using Type = typename Construct<
-        std::is_signed_v<A> || std::is_signed_v<B>,
+        is_signed_v<A> || is_signed_v<B>,
         false,
         sizeof(B)>::Type;
 };
@@ -111,7 +111,7 @@ template <typename A> struct ResultOfNegate
     using Type = typename Construct<
         true,
         std::is_floating_point_v<A>,
-        std::is_signed_v<A> ? sizeof(A) : nextSize(sizeof(A))>::Type;
+        is_signed_v<A> ? sizeof(A) : nextSize(sizeof(A))>::Type;
 };
 
 template <typename A> struct ResultOfAbs
@@ -127,7 +127,7 @@ template <typename A> struct ResultOfAbs
 template <typename A, typename B> struct ResultOfBit
 {
     using Type = typename Construct<
-        std::is_signed_v<A> || std::is_signed_v<B>,
+        is_signed_v<A> || is_signed_v<B>,
         false,
         std::is_floating_point_v<A> || std::is_floating_point_v<B> ? 8 : max(sizeof(A), sizeof(B))>::Type;
 };
@@ -135,7 +135,7 @@ template <typename A, typename B> struct ResultOfBit
 template <typename A> struct ResultOfBitNot
 {
     using Type = typename Construct<
-        std::is_signed_v<A>,
+        is_signed_v<A>,
         false,
         sizeof(A)>::Type;
 };
@@ -156,13 +156,13 @@ template <typename A, typename B>
 struct ResultOfIf
 {
     static constexpr bool has_float = std::is_floating_point_v<A> || std::is_floating_point_v<B>;
-    static constexpr bool has_integer = std::is_integral_v<A> || std::is_integral_v<B>;
-    static constexpr bool has_signed = std::is_signed_v<A> || std::is_signed_v<B>;
-    static constexpr bool has_unsigned = !std::is_signed_v<A> || !std::is_signed_v<B>;
+    static constexpr bool has_integer = is_integral_v<A> || is_integral_v<B>;
+    static constexpr bool has_signed = is_signed_v<A> || is_signed_v<B>;
+    static constexpr bool has_unsigned = !is_signed_v<A> || !is_signed_v<B>;
 
-    static constexpr size_t max_size_of_unsigned_integer = max(std::is_signed_v<A> ? 0 : sizeof(A), std::is_signed_v<B> ? 0 : sizeof(B));
-    static constexpr size_t max_size_of_signed_integer = max(std::is_signed_v<A> ? sizeof(A) : 0, std::is_signed_v<B> ? sizeof(B) : 0);
-    static constexpr size_t max_size_of_integer = max(std::is_integral_v<A> ? sizeof(A) : 0, std::is_integral_v<B> ? sizeof(B) : 0);
+    static constexpr size_t max_size_of_unsigned_integer = max(is_signed_v<A> ? 0 : sizeof(A), is_signed_v<B> ? 0 : sizeof(B));
+    static constexpr size_t max_size_of_signed_integer = max(is_signed_v<A> ? sizeof(A) : 0, is_signed_v<B> ? sizeof(B) : 0);
+    static constexpr size_t max_size_of_integer = max(is_integral_v<A> ? sizeof(A) : 0, is_integral_v<B> ? sizeof(B) : 0);
     static constexpr size_t max_size_of_float = max(std::is_floating_point_v<A> ? sizeof(A) : 0, std::is_floating_point_v<B> ? sizeof(B) : 0);
 
     using ConstructedType = typename Construct<has_signed, has_float,
@@ -181,7 +181,7 @@ struct ResultOfIf
 template <typename A> struct ToInteger
 {
     using Type = typename Construct<
-        std::is_signed_v<A>,
+        is_signed_v<A>,
         false,
         std::is_floating_point_v<A> ? 8 : sizeof(A)>::Type;
 };
@@ -191,9 +191,9 @@ template <typename A> struct ToInteger
 // NOTE: This case is applied for 64-bit integers only (for backward compatibility), but could be used for any-bit integers
 template <typename A, typename B>
 constexpr bool LeastGreatestSpecialCase =
-    std::is_integral_v<A> && std::is_integral_v<B>
+    is_integral_v<A> && is_integral_v<B>
     && (8 == sizeof(A) && sizeof(A) == sizeof(B))
-    && (std::is_signed_v<A> ^ std::is_signed_v<B>);
+    && (is_signed_v<A> ^ is_signed_v<B>);
 
 template <typename A, typename B>
 using ResultOfLeast = std::conditional_t<LeastGreatestSpecialCase<A, B>,
diff --git a/dbms/src/Formats/ProtobufReader.cpp b/dbms/src/Formats/ProtobufReader.cpp
index 5ef39315f5f..f62066ec7c8 100644
--- a/dbms/src/Formats/ProtobufReader.cpp
+++ b/dbms/src/Formats/ProtobufReader.cpp
@@ -759,7 +759,7 @@ private:
     template<typename EnumType>
     bool readEnum(EnumType & value)
     {
-        if constexpr (!std::is_integral_v<FromType>)
+        if constexpr (!is_integral_v<FromType>)
             cannotConvertType("Enum"); // It's not correct to convert floating point to enum.
         FromType number;
         if (!readField(number))
diff --git a/dbms/src/Formats/ProtobufWriter.cpp b/dbms/src/Formats/ProtobufWriter.cpp
index fcaedcab2a0..5e502f0814c 100644
--- a/dbms/src/Formats/ProtobufWriter.cpp
+++ b/dbms/src/Formats/ProtobufWriter.cpp
@@ -526,7 +526,7 @@ public:
 
     void writeEnum16(Int16 value) override
     {
-        if constexpr (!std::is_integral_v<ToType>)
+        if constexpr (!is_integral_v<ToType>)
             cannotConvertType("Enum"); // It's not correct to convert enum to floating point.
         castNumericAndWriteField(value);
     }
diff --git a/dbms/src/Functions/FunctionUnaryArithmetic.h b/dbms/src/Functions/FunctionUnaryArithmetic.h
index 243b9813086..8186d0545c9 100644
--- a/dbms/src/Functions/FunctionUnaryArithmetic.h
+++ b/dbms/src/Functions/FunctionUnaryArithmetic.h
@@ -187,7 +187,7 @@ public:
             {
                 auto & b = static_cast<llvm::IRBuilder<> &>(builder);
                 auto * v = nativeCast(b, types[0], values[0](), std::make_shared<DataTypeNumber<T1>>());
-                result = Op<T0>::compile(b, v, std::is_signed_v<T1>);
+                result = Op<T0>::compile(b, v, is_signed_v<T1>);
                 return true;
             }
             return false;
diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h
index 7f89ea2430f..0d058807a8b 100644
--- a/dbms/src/Functions/FunctionsConversion.h
+++ b/dbms/src/Functions/FunctionsConversion.h
@@ -398,7 +398,7 @@ bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateL
 {
     if constexpr (std::is_floating_point_v<typename DataType::FieldType>)
         return tryReadFloatText(x, rb);
-    else /*if constexpr (std::is_integral_v<typename DataType::FieldType>)*/
+    else /*if constexpr (is_integral_v<typename DataType::FieldType>)*/
         return tryReadIntText(x, rb);
 }
 
@@ -1233,7 +1233,7 @@ struct ToNumberMonotonicity
         /// Integer cases.
 
         const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger();
-        const bool to_is_unsigned = std::is_unsigned_v<T>;
+        const bool to_is_unsigned = is_unsigned_v<T>;
 
         const size_t size_of_from = type.getSizeOfValueInMemory();
         const size_t size_of_to = sizeof(T);
diff --git a/dbms/src/Functions/FunctionsJSON.h b/dbms/src/Functions/FunctionsJSON.h
index 62eceb7f521..08f42dd2b05 100644
--- a/dbms/src/Functions/FunctionsJSON.h
+++ b/dbms/src/Functions/FunctionsJSON.h
@@ -478,7 +478,7 @@ public:
             if (!accurate::convertNumeric(JSONParser::getDouble(it), value))
                 return false;
         }
-        else if (JSONParser::isBool(it) && std::is_integral_v<NumberType> && convert_bool_to_integer)
+        else if (JSONParser::isBool(it) && is_integral_v<NumberType> && convert_bool_to_integer)
             value = static_cast<NumberType>(JSONParser::getBool(it));
         else
             return false;
diff --git a/dbms/src/Functions/abs.cpp b/dbms/src/Functions/abs.cpp
index 4a31e0eba5d..d760138572c 100644
--- a/dbms/src/Functions/abs.cpp
+++ b/dbms/src/Functions/abs.cpp
@@ -15,9 +15,9 @@ struct AbsImpl
     {
         if constexpr (IsDecimalNumber<A>)
             return a < 0 ? A(-a) : a;
-        else if constexpr (std::is_integral_v<A> && std::is_signed_v<A>)
+        else if constexpr (is_integral_v<A> && is_signed_v<A>)
             return a < 0 ? static_cast<ResultType>(~a) + 1 : a;
-        else if constexpr (std::is_integral_v<A> && std::is_unsigned_v<A>)
+        else if constexpr (is_integral_v<A> && is_unsigned_v<A>)
             return static_cast<ResultType>(a);
         else if constexpr (std::is_floating_point_v<A>)
             return static_cast<ResultType>(std::abs(a));
diff --git a/dbms/src/Functions/bitWrapperFunc.cpp b/dbms/src/Functions/bitWrapperFunc.cpp
index c8951de66d1..0b53c02736b 100644
--- a/dbms/src/Functions/bitWrapperFunc.cpp
+++ b/dbms/src/Functions/bitWrapperFunc.cpp
@@ -19,7 +19,7 @@ namespace DB
 
         static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
         {
-            if constexpr (!std::is_integral_v<A>)
+            if constexpr (!is_integral_v<A>)
                 throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::BAD_CAST);
             return a == 0 ? static_cast<ResultType>(0b10) : static_cast<ResultType >(0b1);
         }
diff --git a/dbms/src/Functions/intDiv.cpp b/dbms/src/Functions/intDiv.cpp
index bbeb2fd6f14..4a6986d7a67 100644
--- a/dbms/src/Functions/intDiv.cpp
+++ b/dbms/src/Functions/intDiv.cpp
@@ -28,7 +28,7 @@ struct DivideIntegralByConstantImpl
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wsign-compare"
 
-        if (unlikely(std::is_signed_v<B> && b == -1))
+        if (unlikely(is_signed_v<B> && b == -1))
         {
             size_t size = a.size();
             for (size_t i = 0; i < size; ++i)
diff --git a/dbms/src/Functions/intDiv.h b/dbms/src/Functions/intDiv.h
index 0cbe612c6e1..ae431fc1eb0 100644
--- a/dbms/src/Functions/intDiv.h
+++ b/dbms/src/Functions/intDiv.h
@@ -27,7 +27,7 @@ inline void throwIfDivisionLeadsToFPE(A a, B b)
         throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION);
 
     /// http://avva.livejournal.com/2548306.html
-    if (unlikely(std::is_signed_v<A> && std::is_signed_v<B> && a == std::numeric_limits<A>::min() && b == -1))
+    if (unlikely(is_signed_v<A> && is_signed_v<B> && a == std::numeric_limits<A>::min() && b == -1))
         throw Exception("Division of minimal signed number by minus one", ErrorCodes::ILLEGAL_DIVISION);
 }
 
@@ -37,7 +37,7 @@ inline bool divisionLeadsToFPE(A a, B b)
     if (unlikely(b == 0))
         return true;
 
-    if (unlikely(std::is_signed_v<A> && std::is_signed_v<B> && a == std::numeric_limits<A>::min() && b == -1))
+    if (unlikely(is_signed_v<A> && is_signed_v<B> && a == std::numeric_limits<A>::min() && b == -1))
         return true;
 
     return false;
@@ -58,7 +58,7 @@ struct DivideIntegralImpl
 
         /// Otherwise overflow may occur due to integer promotion. Example: int8_t(-1) / uint64_t(2).
         /// NOTE: overflow is still possible when dividing large signed number to large unsigned number or vice-versa. But it's less harmful.
-        if constexpr (std::is_integral_v<A> && std::is_integral_v<B> && (std::is_signed_v<A> || std::is_signed_v<B>))
+        if constexpr (is_integral_v<A> && is_integral_v<B> && (is_signed_v<A> || is_signed_v<B>))
             return std::make_signed_t<A>(a) / std::make_signed_t<B>(b);
         else
             return a / b;
diff --git a/dbms/src/Functions/roundToExp2.cpp b/dbms/src/Functions/roundToExp2.cpp
index e2c642cc71a..c5394efe63b 100644
--- a/dbms/src/Functions/roundToExp2.cpp
+++ b/dbms/src/Functions/roundToExp2.cpp
@@ -7,14 +7,14 @@ namespace DB
 {
 
 template <typename T>
-inline std::enable_if_t<std::is_integral_v<T> && (sizeof(T) <= sizeof(UInt32)), T>
+inline std::enable_if_t<is_integral_v<T> && (sizeof(T) <= sizeof(UInt32)), T>
 roundDownToPowerOfTwo(T x)
 {
     return x <= 0 ? 0 : (T(1) << (31 - __builtin_clz(x)));
 }
 
 template <typename T>
-inline std::enable_if_t<std::is_integral_v<T> && (sizeof(T) == sizeof(UInt64)), T>
+inline std::enable_if_t<is_integral_v<T> && (sizeof(T) == sizeof(UInt64)), T>
 roundDownToPowerOfTwo(T x)
 {
     return x <= 0 ? 0 : (T(1) << (63 - __builtin_clzll(x)));
diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h
index a5572c4df99..c3935e1092d 100644
--- a/dbms/src/IO/ReadHelpers.h
+++ b/dbms/src/IO/ReadHelpers.h
@@ -272,7 +272,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
             case '+':
                 break;
             case '-':
-                if (std::is_signed_v<T>)
+                if (is_signed_v<T>)
                     negative = true;
                 else
                 {
@@ -339,7 +339,7 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
     if (unlikely(buf.eof()))
         return on_error();
 
-    if (std::is_signed_v<T> && *buf.position() == '-')
+    if (is_signed_v<T> && *buf.position() == '-')
     {
         ++buf.position();
         negative = true;
@@ -372,7 +372,7 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
     }
 
     /// See note about undefined behaviour above.
-    x = std::is_signed_v<T> && negative ? -res : res;
+    x = is_signed_v<T> && negative ? -res : res;
 }
 
 template <typename T>
@@ -653,7 +653,7 @@ inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf)
 
 /// Generic methods to read value in native binary format.
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 readBinary(T & x, ReadBuffer & buf) { readPODBinary(x, buf); }
 
 inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); }
@@ -668,7 +668,7 @@ inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf);
 
 /// Generic methods to read value in text tab-separated format.
 template <typename T>
-inline std::enable_if_t<std::is_integral_v<T>, void>
+inline std::enable_if_t<is_integral_v<T>, void>
 readText(T & x, ReadBuffer & buf) { readIntText(x, buf); }
 
 template <typename T>
@@ -691,7 +691,7 @@ inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); }
 /// Generic methods to read value in text format,
 ///  possibly in single quotes (only for data types that use quotes in VALUES format of INSERT statement in SQL).
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
 
 inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); }
@@ -713,7 +713,7 @@ inline void readQuoted(LocalDateTime & x, ReadBuffer & buf)
 
 /// Same as above, but in double quotes.
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
 
 inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); }
@@ -752,7 +752,7 @@ inline void readCSVSimple(T & x, ReadBuffer & buf)
 }
 
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
 
 inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); }
diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h
index 49f34595fe1..cd21a379a75 100644
--- a/dbms/src/IO/WriteHelpers.h
+++ b/dbms/src/IO/WriteHelpers.h
@@ -707,7 +707,7 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI
 
 /// Methods for output in binary format.
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 
 inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
@@ -724,7 +724,7 @@ inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBi
 
 /// Methods for outputting the value in text form for a tab-separated format.
 template <typename T>
-inline std::enable_if_t<std::is_integral_v<T>, void>
+inline std::enable_if_t<is_integral_v<T>, void>
 writeText(const T & x, WriteBuffer & buf) { writeIntText(x, buf); }
 
 template <typename T>
@@ -778,7 +778,7 @@ void writeText(Decimal<T> value, UInt32 scale, WriteBuffer & ostr)
 
 /// String, date, datetime are in single quotes with C-style escaping. Numbers - without.
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 writeQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
 
 inline void writeQuoted(const String & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
@@ -806,7 +806,7 @@ inline void writeQuoted(const UUID & x, WriteBuffer & buf)
 
 /// String, date, datetime are in double quotes with C-style escaping. Numbers - without.
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 writeDoubleQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
 
 inline void writeDoubleQuoted(const String & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
@@ -835,7 +835,7 @@ inline void writeDoubleQuoted(const UUID & x, WriteBuffer & buf)
 
 /// String - in double quotes and with CSV-escaping; date, datetime - in double quotes. Numbers - without.
 template <typename T>
-inline std::enable_if_t<std::is_arithmetic_v<T>, void>
+inline std::enable_if_t<is_arithmetic_v<T>, void>
 writeCSV(const T & x, WriteBuffer & buf) { writeText(x, buf); }
 
 inline void writeCSV(const String & x, WriteBuffer & buf) { writeCSVString<>(x, buf); }
diff --git a/dbms/src/IO/tests/parse_int_perf2.cpp b/dbms/src/IO/tests/parse_int_perf2.cpp
index 1c5d6ad3f13..8cd48964639 100644
--- a/dbms/src/IO/tests/parse_int_perf2.cpp
+++ b/dbms/src/IO/tests/parse_int_perf2.cpp
@@ -18,7 +18,7 @@ namespace test
         if (unlikely(buf.eof()))
             DB::throwReadAfterEOF();
 
-        if (std::is_signed_v<T> && *buf.position() == '-')
+        if (is_signed_v<T> && *buf.position() == '-')
         {
             ++buf.position();
             negative = true;
@@ -42,7 +42,7 @@ namespace test
                 break;
         }
 
-        if (std::is_signed_v<T> && negative)
+        if (is_signed_v<T> && negative)
             x = -x;
     }
 }
diff --git a/libs/libcommon/include/common/Types.h b/libs/libcommon/include/common/Types.h
index 70c9c3d2f3c..5d933f218c1 100644
--- a/libs/libcommon/include/common/Types.h
+++ b/libs/libcommon/include/common/Types.h
@@ -1,5 +1,8 @@
 #pragma once
 #include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+#include <algorithm>
 
 using Int8 = int8_t;
 using Int16 = int16_t;
@@ -10,3 +13,43 @@ using UInt8 = uint8_t;
 using UInt16 = uint16_t;
 using UInt32 = uint32_t;
 using UInt64 = uint64_t;
+
+/// The standard library type traits, such as std::is_arithmetic, with one exception
+/// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior.
+/// So instead of using the std type_traits, we use our own version which allows extension.
+template <typename T>
+struct is_signed
+{
+    static constexpr bool value = std::is_signed_v<T>;
+};
+
+template <typename T>
+inline constexpr bool is_signed_v = is_signed<T>::value;
+
+template <typename T>
+struct is_unsigned
+{
+    static constexpr bool value = std::is_unsigned_v<T>;
+};
+
+template <typename T>
+inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
+
+template <typename T>
+struct is_integral
+{
+    static constexpr bool value = std::is_integral_v<T>;
+};
+
+template <typename T>
+inline constexpr bool is_integral_v = is_integral<T>::value;
+
+template <typename T>
+struct is_arithmetic
+{
+    static constexpr bool value = std::is_arithmetic_v<T>;
+};
+
+template <typename T>
+inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;
+

From 6a573b409270cf8f033249a124be158173d42cfe Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Sat, 2 Nov 2019 13:20:46 +0300
Subject: [PATCH 161/222] Allow to use mysql format without ssl - try2 with
 mysql interface split (#7524)

* Allow use mysql format without ssl

* fix

* fix

*  fix

* Also disable poco's optional libraries

* clean

* fix

* fix

* Requested changes

* clean

* Requested fixes

* Update MySQLHandler.cpp
---
 cmake/find/poco.cmake                         |  8 ++
 dbms/programs/server/CMakeLists.txt           | 12 ++-
 dbms/programs/server/MySQLHandler.cpp         | 85 +++++++++++++------
 dbms/programs/server/MySQLHandler.h           | 37 ++++++--
 dbms/programs/server/MySQLHandlerFactory.cpp  | 21 ++++-
 dbms/programs/server/MySQLHandlerFactory.h    | 11 ++-
 dbms/programs/server/Server.cpp               |  3 +-
 dbms/src/Core/MySQLProtocol.cpp               |  5 --
 dbms/src/Core/MySQLProtocol.h                 | 17 ++--
 dbms/src/Formats/FormatFactory.cpp            |  4 -
 .../Formats/Impl/MySQLOutputFormat.cpp        |  6 --
 .../Formats/Impl/MySQLOutputFormat.h          |  6 --
 12 files changed, 131 insertions(+), 84 deletions(-)

diff --git a/cmake/find/poco.cmake b/cmake/find/poco.cmake
index ee2f5d9df1f..b44d2932276 100644
--- a/cmake/find/poco.cmake
+++ b/cmake/find/poco.cmake
@@ -8,6 +8,14 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/poco/CMakeLists.txt")
    set (MISSING_INTERNAL_POCO_LIBRARY 1)
 endif ()
 
+if (NOT ENABLE_LIBRARIES)
+    set (ENABLE_POCO_NETSSL ${ENABLE_LIBRARIES} CACHE BOOL "")
+    set (ENABLE_POCO_MONGODB ${ENABLE_LIBRARIES} CACHE BOOL "")
+    set (ENABLE_POCO_REDIS ${ENABLE_LIBRARIES} CACHE BOOL "")
+    set (ENABLE_POCO_ODBC ${ENABLE_LIBRARIES} CACHE BOOL "")
+    set (ENABLE_POCO_SQL ${ENABLE_LIBRARIES} CACHE BOOL "")
+endif ()
+
 set (POCO_COMPONENTS Net XML SQL Data)
 if (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL)
     list (APPEND POCO_COMPONENTS Crypto NetSSL)
diff --git a/dbms/programs/server/CMakeLists.txt b/dbms/programs/server/CMakeLists.txt
index 16aa7131291..1ebf227c3da 100644
--- a/dbms/programs/server/CMakeLists.txt
+++ b/dbms/programs/server/CMakeLists.txt
@@ -10,13 +10,11 @@ set(CLICKHOUSE_SERVER_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/TCPHandler.cpp
    )
 
-if (USE_SSL)
-    set(CLICKHOUSE_SERVER_SOURCES
-        ${CLICKHOUSE_SERVER_SOURCES}
-        ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp
-    )
-endif ()
+set(CLICKHOUSE_SERVER_SOURCES
+    ${CLICKHOUSE_SERVER_SOURCES}
+    ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp
+)
 
 set(CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_dictionaries clickhouse_common_io clickhouse_common_config clickhouse_common_zookeeper clickhouse_parsers string_utils PUBLIC daemon PRIVATE clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Poco_Net_LIBRARY})
 if (USE_POCO_NETSSL)
diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp
index f7429ebf2a7..68a5b1c6bc1 100644
--- a/dbms/programs/server/MySQLHandler.cpp
+++ b/dbms/programs/server/MySQLHandler.cpp
@@ -1,7 +1,6 @@
 #include <Common/config.h>
-#if USE_SSL
-#include "MySQLHandler.h"
 
+#include "MySQLHandler.h"
 #include <limits>
 #include <ext/scope_guard.h>
 #include <Columns/ColumnVector.h>
@@ -15,37 +14,39 @@
 #include <IO/ReadBufferFromPocoSocket.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromPocoSocket.h>
-#include <Poco/Crypto/CipherFactory.h>
-#include <Poco/Crypto/RSAKey.h>
-#include <Poco/Net/SecureStreamSocket.h>
-#include <Poco/Net/SSLManager.h>
 #include <Storages/IStorage.h>
 
+#if USE_POCO_NETSSL
+#include <Poco/Net/SecureStreamSocket.h>
+#include <Poco/Net/SSLManager.h>
+#include <Poco/Crypto/CipherFactory.h>
+#include <Poco/Crypto/RSAKey.h>
+#endif
 
 namespace DB
 {
 
 using namespace MySQLProtocol;
 
-
+#if USE_POCO_NETSSL
 using Poco::Net::SecureStreamSocket;
 using Poco::Net::SSLManager;
-
+#endif
 
 namespace ErrorCodes
 {
     extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES;
     extern const int OPENSSL_ERROR;
+    extern const int SUPPORT_IS_DISABLED;
 }
 
-MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, RSA & public_key_, RSA & private_key_, bool ssl_enabled, size_t connection_id_)
+MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_,
+    bool ssl_enabled, size_t connection_id_)
     : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
     , log(&Poco::Logger::get("MySQLHandler"))
     , connection_context(server.context())
     , connection_id(connection_id_)
-    , public_key(public_key_)
-    , private_key(private_key_)
     , auth_plugin(new MySQLProtocol::Authentication::Native41())
 {
     server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF;
@@ -197,21 +198,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::HandshakeResponse & packet)
 
     if (payload_size == SSL_REQUEST_PAYLOAD_SIZE)
     {
-        read_bytes(packet_size); /// Reading rest SSLRequest.
-        SSLRequest ssl_request;
-        ReadBufferFromMemory payload(buf, pos);
-        payload.ignore(PACKET_HEADER_SIZE);
-        ssl_request.readPayload(payload);
-        connection_context.mysql.client_capabilities = ssl_request.capability_flags;
-        connection_context.mysql.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH;
-        secure_connection = true;
-        ss = std::make_shared<SecureStreamSocket>(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext()));
-        in = std::make_shared<ReadBufferFromPocoSocket>(*ss);
-        out = std::make_shared<WriteBufferFromPocoSocket>(*ss);
-        connection_context.mysql.sequence_id = 2;
-        packet_sender = std::make_shared<PacketSender>(*in, *out, connection_context.mysql.sequence_id);
-        packet_sender->max_packet_size = connection_context.mysql.max_packet_size;
-        packet_sender->receivePacket(packet); /// Reading HandshakeResponse from secure socket.
+        finishHandshakeSSL(packet_size, buf, pos, read_bytes, packet);
     }
     else
     {
@@ -232,7 +219,9 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl
     // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used.
     auto user = connection_context.getUser(user_name);
     if (user->authentication.getType() != DB::Authentication::DOUBLE_SHA1_PASSWORD)
-        auth_plugin = std::make_unique<MySQLProtocol::Authentication::Sha256Password>(public_key, private_key, log);
+    {
+        authPluginSSL();
+    }
 
     try {
         std::optional<String> auth_response = auth_plugin_name == auth_plugin->getName() ? std::make_optional<String>(initial_auth_response) : std::nullopt;
@@ -302,5 +291,47 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
         packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true);
 }
 
+void MySQLHandler::authPluginSSL()
+{
+    throw Exception("Compiled without SSL", ErrorCodes::SUPPORT_IS_DISABLED);
 }
+
+void MySQLHandler::finishHandshakeSSL([[maybe_unused]] size_t packet_size, [[maybe_unused]] char * buf, [[maybe_unused]] size_t pos, [[maybe_unused]] std::function<void(size_t)> read_bytes, [[maybe_unused]] MySQLProtocol::HandshakeResponse & packet)
+{
+    throw Exception("Compiled without SSL", ErrorCodes::SUPPORT_IS_DISABLED);
+}
+
+#if USE_SSL && USE_POCO_NETSSL
+MySQLHandlerSSL::MySQLHandlerSSL(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_)
+    : MySQLHandler(server_, socket_, ssl_enabled, connection_id_)
+    , public_key(public_key_)
+    , private_key(private_key_)
+{}
+
+void MySQLHandlerSSL::authPluginSSL()
+{
+    auth_plugin = std::make_unique<MySQLProtocol::Authentication::Sha256Password>(public_key, private_key, log);
+}
+
+void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function<void(size_t)> read_bytes, MySQLProtocol::HandshakeResponse & packet)
+{
+    read_bytes(packet_size); /// Reading rest SSLRequest.
+    SSLRequest ssl_request;
+    ReadBufferFromMemory payload(buf, pos);
+    payload.ignore(PACKET_HEADER_SIZE);
+    ssl_request.readPayload(payload);
+    connection_context.mysql.client_capabilities = ssl_request.capability_flags;
+    connection_context.mysql.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH;
+    secure_connection = true;
+    ss = std::make_shared<SecureStreamSocket>(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext()));
+    in = std::make_shared<ReadBufferFromPocoSocket>(*ss);
+    out = std::make_shared<WriteBufferFromPocoSocket>(*ss);
+    connection_context.mysql.sequence_id = 2;
+    packet_sender = std::make_shared<PacketSender>(*in, *out, connection_context.mysql.sequence_id);
+    packet_sender->max_packet_size = connection_context.mysql.max_packet_size;
+    packet_sender->receivePacket(packet); /// Reading HandshakeResponse from secure socket.
+}
+
 #endif
+
+}
diff --git a/dbms/programs/server/MySQLHandler.h b/dbms/programs/server/MySQLHandler.h
index 9d51667925d..96cb353d897 100644
--- a/dbms/programs/server/MySQLHandler.h
+++ b/dbms/programs/server/MySQLHandler.h
@@ -1,13 +1,13 @@
 #pragma once
 #include <Common/config.h>
-#if USE_SSL
-
 #include <Poco/Net/TCPServerConnection.h>
-#include <Poco/Net/SecureStreamSocket.h>
 #include <Common/getFQDNOrHostName.h>
 #include <Core/MySQLProtocol.h>
 #include "IServer.h"
 
+#if USE_POCO_NETSSL
+#include <Poco/Net/SecureStreamSocket.h>
+#endif
 
 namespace DB
 {
@@ -16,7 +16,7 @@ namespace DB
 class MySQLHandler : public Poco::Net::TCPServerConnection
 {
 public:
-    MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, RSA & public_key_, RSA & private_key_, bool ssl_enabled, size_t connection_id_);
+    MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_);
 
     void run() final;
 
@@ -34,28 +34,47 @@ private:
 
     void authenticate(const String & user_name, const String & auth_plugin_name, const String & auth_response);
 
+    virtual void authPluginSSL();
+    virtual void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function<void(size_t)> read_bytes, MySQLProtocol::HandshakeResponse & packet);
+
     IServer & server;
+
+protected:
     Poco::Logger * log;
+
     Context connection_context;
 
     std::shared_ptr<MySQLProtocol::PacketSender> packet_sender;
 
+private:
     size_t connection_id = 0;
 
     size_t server_capability_flags = 0;
     size_t client_capability_flags = 0;
 
-    RSA & public_key;
-    RSA & private_key;
-
+protected:
     std::unique_ptr<MySQLProtocol::Authentication::IPlugin> auth_plugin;
 
-    std::shared_ptr<Poco::Net::SecureStreamSocket> ss;
     std::shared_ptr<ReadBuffer> in;
     std::shared_ptr<WriteBuffer> out;
 
     bool secure_connection = false;
 };
 
-}
+#if USE_SSL && USE_POCO_NETSSL
+class MySQLHandlerSSL : public MySQLHandler
+{
+public:
+    MySQLHandlerSSL(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_);
+
+private:
+    void authPluginSSL() override;
+    void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function<void(size_t)> read_bytes, MySQLProtocol::HandshakeResponse & packet) override;
+
+    RSA & public_key;
+    RSA & private_key;
+    std::shared_ptr<Poco::Net::SecureStreamSocket> ss;
+};
 #endif
+
+}
diff --git a/dbms/programs/server/MySQLHandlerFactory.cpp b/dbms/programs/server/MySQLHandlerFactory.cpp
index 752d88e05d8..987efbfa347 100644
--- a/dbms/programs/server/MySQLHandlerFactory.cpp
+++ b/dbms/programs/server/MySQLHandlerFactory.cpp
@@ -1,7 +1,5 @@
 #include "MySQLHandlerFactory.h"
-#if USE_POCO_NETSSL && USE_SSL
 #include <Common/OpenSSLHelpers.h>
-#include <Poco/Net/SSLManager.h>
 #include <Poco/Net/TCPServerConnectionFactory.h>
 #include <Poco/Util/Application.h>
 #include <common/logger_useful.h>
@@ -9,6 +7,10 @@
 #include "IServer.h"
 #include "MySQLHandler.h"
 
+#if USE_POCO_NETSSL
+#include <Poco/Net/SSLManager.h>
+#endif
+
 namespace DB
 {
 
@@ -24,6 +26,8 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
     : server(server_)
     , log(&Logger::get("MySQLHandlerFactory"))
 {
+
+#if USE_POCO_NETSSL
     try
     {
         Poco::Net::SSLManager::instance().defaultServerContext();
@@ -33,7 +37,9 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
         LOG_INFO(log, "Failed to create SSL context. SSL will be disabled. Error: " << getCurrentExceptionMessage(false));
         ssl_enabled = false;
     }
+#endif
 
+#if USE_SSL
     /// Reading rsa keys for SHA256 authentication plugin.
     try
     {
@@ -44,8 +50,10 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
         LOG_WARNING(log, "Failed to read RSA keys. Error: " << getCurrentExceptionMessage(false));
         generateRSAKeys();
     }
+#endif
 }
 
+#if USE_SSL
 void MySQLHandlerFactory::readRSAKeys()
 {
     const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config();
@@ -113,13 +121,18 @@ void MySQLHandlerFactory::generateRSAKeys()
     if (!private_key)
         throw Exception("Failed to copy RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR);
 }
+#endif
 
 Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket)
 {
     size_t connection_id = last_connection_id++;
     LOG_TRACE(log, "MySQL connection. Id: " << connection_id << ". Address: " << socket.peerAddress().toString());
-    return new MySQLHandler(server, socket, *public_key, *private_key, ssl_enabled, connection_id);
+#if USE_POCO_NETSSL && USE_SSL
+    return new MySQLHandlerSSL(server, socket, ssl_enabled, connection_id, *public_key, *private_key);
+#else
+    return new MySQLHandler(server, socket, ssl_enabled, connection_id);
+#endif
+
 }
 
 }
-#endif
diff --git a/dbms/programs/server/MySQLHandlerFactory.h b/dbms/programs/server/MySQLHandlerFactory.h
index 78022ddce37..b7df9fa60e0 100644
--- a/dbms/programs/server/MySQLHandlerFactory.h
+++ b/dbms/programs/server/MySQLHandlerFactory.h
@@ -1,12 +1,12 @@
 #pragma once
 
 #include <Common/config.h>
-#if USE_POCO_NETSSL && USE_SSL
-
 #include <Poco/Net/TCPServerConnectionFactory.h>
 #include <atomic>
-#include <openssl/rsa.h>
 #include "IServer.h"
+#if USE_SSL
+#include <openssl/rsa.h>
+#endif
 
 namespace DB
 {
@@ -17,6 +17,7 @@ private:
     IServer & server;
     Poco::Logger * log;
 
+#if USE_SSL
     struct RSADeleter
     {
         void operator()(RSA * ptr) { RSA_free(ptr); }
@@ -27,6 +28,9 @@ private:
     RSAPtr private_key;
 
     bool ssl_enabled = true;
+#else
+    bool ssl_enabled = false;
+#endif
 
     std::atomic<size_t> last_connection_id = 0;
 public:
@@ -40,4 +44,3 @@ public:
 };
 
 }
-#endif
diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp
index e274dca6bdc..79285b8cd23 100644
--- a/dbms/programs/server/Server.cpp
+++ b/dbms/programs/server/Server.cpp
@@ -57,7 +57,7 @@
 #include "TCPHandlerFactory.h"
 #include "Common/config_version.h"
 #include <Common/SensitiveDataMasker.h>
-
+#include "MySQLHandlerFactory.h"
 
 #if defined(OS_LINUX)
 #include <Common/hasLinuxCapability.h>
@@ -65,7 +65,6 @@
 #endif
 
 #if USE_POCO_NETSSL
-#include "MySQLHandlerFactory.h"
 #include <Poco/Net/Context.h>
 #include <Poco/Net/SecureServerSocket.h>
 #endif
diff --git a/dbms/src/Core/MySQLProtocol.cpp b/dbms/src/Core/MySQLProtocol.cpp
index 21e29cf8e6a..1c4e94c492c 100644
--- a/dbms/src/Core/MySQLProtocol.cpp
+++ b/dbms/src/Core/MySQLProtocol.cpp
@@ -1,7 +1,4 @@
 #include "MySQLProtocol.h"
-
-#if USE_SSL
-
 #include <IO/WriteBuffer.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromString.h>
@@ -104,5 +101,3 @@ size_t getLengthEncodedStringSize(const String & s)
 }
 
 }
-
-#endif // USE_SSL
diff --git a/dbms/src/Core/MySQLProtocol.h b/dbms/src/Core/MySQLProtocol.h
index 2829e489f25..5e8be549bbd 100644
--- a/dbms/src/Core/MySQLProtocol.h
+++ b/dbms/src/Core/MySQLProtocol.h
@@ -1,12 +1,5 @@
 #pragma once
-
-#include "config_core.h"
-
-#if USE_SSL
-
 #include <ext/scope_guard.h>
-#include <openssl/pem.h>
-#include <openssl/rsa.h>
 #include <random>
 #include <sstream>
 #include <Common/MemoryTracker.h>
@@ -27,6 +20,11 @@
 #include <Poco/Net/StreamSocket.h>
 #include <Poco/RandomStream.h>
 #include <Poco/SHA1Engine.h>
+#include "config_core.h"
+#if USE_SSL
+#include <openssl/pem.h>
+#include <openssl/rsa.h>
+#endif
 
 /// Implementation of MySQL wire protocol.
 /// Works only on little-endian architecture.
@@ -941,6 +939,7 @@ private:
     String scramble;
 };
 
+#if USE_SSL
 /// Caching SHA2 plugin is not used because it would be possible to authenticate knowing hash from users.xml.
 /// https://dev.mysql.com/doc/internals/en/sha256.html
 class Sha256Password : public IPlugin
@@ -1001,7 +1000,6 @@ public:
         if (auth_response == "\1")
         {
             LOG_TRACE(log, "Client requests public key.");
-
             BIO * mem = BIO_new(BIO_s_mem());
             SCOPE_EXIT(BIO_free(mem));
             if (PEM_write_bio_RSA_PUBKEY(mem, &public_key) != 1)
@@ -1074,10 +1072,9 @@ private:
     Logger * log;
     String scramble;
 };
+#endif
 
 }
 
 }
 }
-
-#endif // USE_SSL
diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp
index eddd5aa92cd..f52b645d027 100644
--- a/dbms/src/Formats/FormatFactory.cpp
+++ b/dbms/src/Formats/FormatFactory.cpp
@@ -264,9 +264,7 @@ void registerOutputFormatProcessorXML(FormatFactory & factory);
 void registerOutputFormatProcessorODBCDriver(FormatFactory & factory);
 void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
 void registerOutputFormatProcessorNull(FormatFactory & factory);
-#if USE_SSL
 void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory);
-#endif
 
 /// Input only formats.
 void registerInputFormatProcessorCapnProto(FormatFactory & factory);
@@ -314,9 +312,7 @@ FormatFactory::FormatFactory()
     registerOutputFormatProcessorODBCDriver(*this);
     registerOutputFormatProcessorODBCDriver2(*this);
     registerOutputFormatProcessorNull(*this);
-#if USE_SSL
     registerOutputFormatProcessorMySQLWrite(*this);
-#endif
 }
 
 FormatFactory & FormatFactory::instance()
diff --git a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp
index bf5a0324690..75ec4c0e6cd 100644
--- a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp
@@ -1,12 +1,8 @@
 #include <Processors/Formats/Impl/MySQLOutputFormat.h>
-
-#if USE_SSL
-
 #include <Core/MySQLProtocol.h>
 #include <Interpreters/ProcessList.h>
 #include <Formats/FormatFactory.h>
 #include <Interpreters/Context.h>
-
 #include <iomanip>
 #include <sstream>
 
@@ -118,5 +114,3 @@ void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory)
 }
 
 }
-
-#endif // USE_SSL
diff --git a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h
index 5793c044fed..39d04818dee 100644
--- a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h
+++ b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h
@@ -1,9 +1,5 @@
 #pragma once
 
-#include "config_core.h"
-
-#if USE_SSL
-
 #include <Processors/Formats/IRowOutputFormat.h>
 #include <Core/Block.h>
 
@@ -44,5 +40,3 @@ private:
 };
 
 }
-
-#endif

From c23a1cb9a570445b70e390e68e0f660f380b12c5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 3 Nov 2019 14:10:03 +0300
Subject: [PATCH 162/222] Added ANTLR4 grammar #7595

---
 utils/grammar/ClickHouseLexer.g4  | 232 ++++++++++++
 utils/grammar/ClickHouseParser.g4 | 584 ++++++++++++++++++++++++++++++
 utils/grammar/README.md           |   7 +
 3 files changed, 823 insertions(+)
 create mode 100644 utils/grammar/ClickHouseLexer.g4
 create mode 100644 utils/grammar/ClickHouseParser.g4
 create mode 100644 utils/grammar/README.md

diff --git a/utils/grammar/ClickHouseLexer.g4 b/utils/grammar/ClickHouseLexer.g4
new file mode 100644
index 00000000000..766e30d2850
--- /dev/null
+++ b/utils/grammar/ClickHouseLexer.g4
@@ -0,0 +1,232 @@
+lexer grammar ClickHouseLexer;
+
+LINE_COMMENT
+	:	'--' ~[\r\n]*  -> channel(HIDDEN)
+	;
+
+ // TOKENS, KEYWORDS
+
+K_ADD : A D D;
+K_AFTER : A F T E R;
+K_ALL : A L L;
+K_ALIAS : A L I A S;
+K_ALTER : A L T E R;
+K_AND : A N D;
+K_ANY : A N Y;
+K_ARRAY : A R R A Y;
+K_AS : A S;
+K_ASCENDING : A S C E N D I N G;
+K_ASC : A S C;
+K_ASYNC : A S Y N C;
+K_ATTACH : A T T A C H;
+K_BETWEEN : B E T W E E N;
+K_BY : B Y;
+K_CASE : C A S E;
+K_CAST : C A S T;
+K_CHECK : C H E C K;
+K_CLUSTER : C L U S T E R;
+K_COLUMN : C O L U M N;
+K_COLLATE : C O L L A T E;
+K_CREATE : C R E A T E;
+K_CROSS : C R O S S;
+K_DAY : D A Y;
+K_DESCRIBE : D E S C R I B E;
+K_DESCENDING : D E S C E N D I N G;
+K_DESC : D E S C;
+K_DATABASE : D A T A B A S E;
+K_DATABASES : D A T A B A S E S;
+K_DEFAULT : D E F A U L T;
+K_DETACH : D E T A C H;
+K_DISTINCT : D I S T I N C T;
+K_DROP : D R O P;
+K_ELSE : E L S E;
+K_END : E N D;
+K_ENGINE : E N G I N E;
+K_EXISTS : E X I S T S;
+K_FETCH : F E T C H;
+K_FINAL : F I N A L;
+K_FIRST : F I R S T;
+K_FROM : F R O M;
+K_FREEZE : F R E E Z E;
+K_FORMAT : F O R M A T;
+K_FULL : F U L L;
+K_GLOBAL : G L O B A L;
+K_GROUP : G R O U P;
+K_HAVING : H A V I N G;
+K_HOUR : H O U R;
+K_ID : I D;
+K_IF : I F;
+K_INNER : I N N E R;
+K_INSERT : I N S E R T;
+K_INTERVAL : I N T E R V A L;
+K_INTO : I N T O;
+K_IN : I N;
+K_IS : I S;
+K_JOIN : J O I N;
+K_KILL: K I L L;
+K_LAST : L A S T;
+K_LEFT : L E F T;
+K_LIKE : L I K E;
+K_LIMIT : L I M I T;
+K_MAIN : M A I N;  // not a clickhouse reverved word
+K_MATERIALIZED : M A T E R I A L I Z E D;
+K_MINUTE : M I N U T E;
+K_MODIFY : M O D I F Y;
+K_MONTH : M O N T H;
+K_NOT : N O T;
+K_NULL : N U L L;
+K_NULLS : N U L L S;
+K_OFFSET : O F F S E T;
+K_ON : O N;
+K_OPTIMIZE : O P T I M I Z E;
+K_ORDER : O R D E R;
+K_OR : O R;
+K_OUTFILE : O U T F I L E;
+K_PARTITION : P A R T I T I O N;
+K_POPULATE : P O P U L A T E;
+K_PREWHERE : P R E W H E R E;
+K_PROCESSLIST : P R O C E S S L I S T;
+K_QUERY : Q U E R Y;
+K_RENAME : R E N A M E;
+K_RETURN : R E T U R N;  // not a clickhouse reverved word
+K_RIGHT : R I G H T;
+K_SAMPLE : S A M P L E;
+K_SECOND : S E C O N D;
+K_SELECT : S E L E C T;
+K_SET : S E T;
+K_SETTINGS : S E T T I N G S;
+K_SHOW : S H O W;
+K_SYNC : S Y N C;
+K_TABLE : T A B L E;
+K_TABLES : T A B L E S;
+K_TEMPORARY : T E M P O R A R Y;
+K_TEST : T E S T;
+K_THEN : T H E N;
+K_TOTALS : T O T A L S;
+K_TO : T O;
+K_OUTER: O U T E R;
+K_VALUES : V A L U E S;
+K_VIEW : V I E W;
+K_UNION : U N I O N;
+K_USE : U S E;
+K_USING : U S I N G;
+K_WEEK : W E E K;
+K_WHEN : W H E N;
+K_WHERE : W H E R E;
+K_WITH : W I T H;
+K_YEAR : Y E A R;
+
+COLON        : ':'                    ;
+COMMA        : ','                    ;
+SEMI         : ';'                    ;
+LPAREN       : '('                    ;
+RPAREN       : ')'                    ;
+RARROW       : '->'                   ;
+LT           : '<'                    ;
+GT           : '>'                    ;
+QUESTION     : '?'                    ;
+STAR         : '*'                    ;
+PLUS         : '+'                    ;
+CONCAT       : '||'                   ;
+OR           : '|'                    ;
+DOLLAR       : '$'                    ;
+DOT		     : '.'                    ;
+PERCENT      : '%'                    ;
+MINUS        : '-'                    ;
+DIVIDE       : '/'                    ;
+EQUALS       : '=='                   ;
+ASSIGN       : '='                    ;
+NOT_EQUALS   : '!='                   ;
+NOT_EQUALS2  : '<>'                   ;
+LE           : '<='                   ;
+GE           : '>='                   ;
+LBRAKET      : '['                    ;
+RBRAKET      : ']'                    ;
+LCURLY       : '{'                    ;
+RCURLY       : '}'                    ;
+
+
+T_ARRAY : 'Array' ;
+T_TUPLE : 'Tuple' ;
+T_NULLABLE : 'Nullable' ;
+T_FLOAT32 : 'Float32' ;
+T_FLOAT64 : 'Float64' ;
+T_UINT8 : 'UInt8' ;
+T_UINT16 : 'UInt16' ;
+T_UINT32 : 'UInt32' ;
+T_UINT64 : 'UInt64' ;
+T_INT8 : 'Int8' ;
+T_INT16 : 'Int16' ;
+T_INT32 : 'Int32' ;
+T_INT64 : 'Int64' ;
+T_ENUM8 : 'Enum8' ;
+T_ENUM16 : 'Enum16' ;
+T_UUID : 'UUID' ;
+T_DATE : 'Date' ;
+T_DATETIME : 'DateTime' ;
+T_STRING : 'String' ;
+T_FIXEDSTRING : 'FixedString' ;
+T_NULL : 'Null' ;
+T_INTERVAL_YEAR : 'IntervalYear' ;
+T_INTERVAL_MONTH : 'IntervalMonth' ;
+T_INTERVAL_WEEK : 'IntervalWeek' ;
+T_INTERVAL_DAY : 'IntervalDay' ;
+T_INTERVAL_HOUR : 'IntervalHour' ;
+T_INTERVAL_MINUTE : 'IntervalMinute' ;
+T_INTERVAL_SECOND : 'IntervalSecond' ;
+T_AGGREGATE_FUNCTION : 'AggregateFunction' ;
+// lambda type has unknown name.
+
+IDENTIFIER
+  : [a-zA-Z_] [a-zA-Z_0-9]*
+  ;
+
+NUMERIC_LITERAL
+ : DIGIT+ ( '.' DIGIT* )? ( E [-+]? DIGIT+ )?
+ | '.' DIGIT+ ( E [-+]? DIGIT+ )?
+ ;
+
+STRING_LITERAL
+ : '\'' ( ~'\'' | '\\\'' )* '\''
+ ;
+
+QUOTED_LITERAL
+ : '`' ( ~'`' )* '`'
+ ;
+
+SPACES
+ : [ \u000B\t\r\n] -> channel(HIDDEN)
+ ;
+
+UNEXPECTED_CHAR
+ : .
+ ;
+
+fragment DIGIT : [0-9];
+
+fragment A : [aA];
+fragment B : [bB];
+fragment C : [cC];
+fragment D : [dD];
+fragment E : [eE];
+fragment F : [fF];
+fragment G : [gG];
+fragment H : [hH];
+fragment I : [iI];
+fragment J : [jJ];
+fragment K : [kK];
+fragment L : [lL];
+fragment M : [mM];
+fragment N : [nN];
+fragment O : [oO];
+fragment P : [pP];
+fragment Q : [qQ];
+fragment R : [rR];
+fragment S : [sS];
+fragment T : [tT];
+fragment U : [uU];
+fragment V : [vV];
+fragment W : [wW];
+fragment X : [xX];
+fragment Y : [yY];
+fragment Z : [zZ];
diff --git a/utils/grammar/ClickHouseParser.g4 b/utils/grammar/ClickHouseParser.g4
new file mode 100644
index 00000000000..fa00d29d704
--- /dev/null
+++ b/utils/grammar/ClickHouseParser.g4
@@ -0,0 +1,584 @@
+parser grammar ClickHouseParser;
+
+options {
+	tokenVocab=ClickHouseLexer;
+}
+
+// эта грамматика написана по сорсам парсеров, имена правил примерно соответствуют парсерам в cpp.
+// известные расхождения
+// 1. скобки не обязательно сразу идут после имени функции.
+// 2. многословные токены поделены на самостоятельные слова
+// 3. для INSERT запроса не написана часть парсинга значений.
+// 4. правило для expr переписано чтобы понизить глубину AST и сразу выходить на уровень expr - al
+
+parse
+ : ( query | error ) EOF
+ ;
+
+query
+ :    show_tables_query
+ |    select_query
+ |    insert_query
+ |    create_query
+ |    rename_query
+ |    drop_query
+ |    alter_query
+ |    use_query
+ |    set_query
+ |    optimize_query
+ |    table_properties_query
+ |    show_processlist_query
+ |    check_query
+ |    kill_query_query
+ ;
+
+// 1. QUERIES
+
+select_query
+ :  select_query_main ( K_UNION K_ALL select_query_main ) *
+    query_outfile_step?
+    select_format_step?
+ ;
+
+select_query_main
+ :  select_with_step
+    select_select_step select_from_step?
+    K_FINAL? select_sample_step?
+    select_array_join_step? select_join_step?
+    select_prewhere_step? select_where_step?
+    select_groupby_step? select_having_step?
+    select_orderby_step?
+    select_limitby_step? select_limit_step?
+    select_settings_step?
+ ;
+
+select_with_step
+ : K_WITH select_expr_list
+ ;
+
+select_select_step
+ : K_SELECT K_DISTINCT? select_expr_list
+ ;
+
+select_from_step
+ : K_FROM ( full_table_name
+          | table_function
+          | subquery
+          ) select_alias?
+ ;
+
+select_array_join_step
+ : K_LEFT? K_ARRAY K_JOIN not_empty_expression_list
+ ;
+
+select_sample_step
+ : K_SAMPLE sample_ratio (K_OFFSET sample_ratio ) ?
+ ;
+
+sample_ratio
+ : NUMERIC_LITERAL ( DIVIDE NUMERIC_LITERAL ) ?
+ ;
+
+select_join_step
+ :  K_GLOBAL?
+        ( K_ANY | K_ALL ) ( K_INNER | K_LEFT K_OUTER? | K_RIGHT K_OUTER? | K_FULL K_OUTER? ) K_JOIN select_join_right_part
+      ( K_USING LPAREN not_empty_expression_list RPAREN
+      | K_USING not_empty_expression_list
+      // | K_ON expr  на самом деле нет.
+      )
+ |  K_GLOBAL? K_CROSS K_JOIN select_join_right_part
+ ;
+
+select_join_right_part
+ : identifier
+ | subquery
+ ;
+
+select_prewhere_step
+ : K_PREWHERE expression_with_optional_alias
+ ;
+
+select_where_step
+ : K_WHERE expression_with_optional_alias
+ ;
+
+select_groupby_step
+ : K_GROUP K_BY not_empty_expression_list ( K_WITH K_TOTALS ) ?
+ ;
+
+select_having_step
+ : K_HAVING expression_with_optional_alias
+ ;
+
+select_orderby_step
+ : K_ORDER K_BY order_by_expression_list
+ ;
+
+select_limit_step
+ : K_LIMIT NUMERIC_LITERAL ( COMMA NUMERIC_LITERAL )?
+ ;
+
+select_limitby_step
+ : K_LIMIT NUMERIC_LITERAL K_BY not_empty_expression_list
+ ;
+
+select_settings_step
+ : K_SETTINGS assignment_list
+ ;
+
+select_format_step
+ : K_FORMAT identifier
+ ;
+
+insert_query
+ :  K_INSERT K_INTO full_table_name
+          ( K_ID ASSIGN STRING_LITERAL )? // wtf?
+          ( LPAREN column_name_list RPAREN )?
+          ( K_VALUES LPAREN literal (COMMA literal )* RPAREN(COMMA LPAREN literal (COMMA literal )* RPAREN)* // ch тут дальше не парсит. а я написал скобки
+          | K_FORMAT format_name // ch тут дальше не парсит, только доедает все пробелы или один перевод строки. pushMode()
+          | select_query )
+ ;
+
+create_query
+ :  ( K_CREATE | K_ATTACH ) K_TEMPORARY?
+            ( K_DATABASE ( K_IF K_NOT K_EXISTS ) ? database_name
+            | K_TABLE ( K_IF K_NOT K_EXISTS ) ? full_table_name ( K_ON K_CLUSTER cluster_name ) ?
+               ( LPAREN column_declaration_list RPAREN engine ( K_AS select_query ) ? // если VIEW - то есть и колонки и select.
+               | engine K_AS (  select_query
+                             |  full_table_name engine? // wtf
+                             )
+               )
+            | K_MATERIALIZED? K_VIEW ( K_IF K_NOT K_EXISTS ) ? full_table_name
+               ( LPAREN column_declaration_list RPAREN ) ? engine? K_POPULATE? K_AS select_query
+            )
+ ;
+
+rename_query
+ :  K_RENAME K_TABLE full_table_name K_TO full_table_name ( COMMA full_table_name K_TO full_table_name )* ( K_ON K_CLUSTER cluster_name ) ?
+ ;
+
+drop_query
+ :  ( K_DROP | K_DETACH )
+            ( K_DATABASE ( K_IF K_EXISTS ) ? database_name ( K_ON K_CLUSTER cluster_name ) ?
+            | K_TABLE ( K_IF K_EXISTS ) ? full_table_name ( K_ON K_CLUSTER cluster_name ) ?
+            )
+ ;
+
+alter_query
+ : K_ALTER K_TABLE full_table_name ( K_ON K_CLUSTER cluster_name ) ?
+        alter_query_element ( COMMA alter_query_element ) *
+ ;
+
+alter_query_element
+ : K_ADD K_COLUMN compound_name_type_pair ( K_AFTER column_name ) ?
+ | K_DROP K_COLUMN column_name
+ | K_MODIFY K_COLUMN compound_name_type_pair
+ | K_ATTACH K_PARTITION partition_name
+ | K_DETACH K_PARTITION partition_name
+ | K_DROP K_PARTITION partition_name
+ | K_FETCH K_PARTITION partition_name K_FROM STRING_LITERAL
+ | K_FREEZE K_PARTITION partition_name
+ ;
+
+clickhouse_type
+    : simple_type
+    | T_AGGREGATE_FUNCTION LPAREN function_name ( COMMA clickhouse_type ) * RPAREN
+    | T_ARRAY LPAREN clickhouse_type RPAREN
+    | T_TUPLE LPAREN clickhouse_type ( COMMA clickhouse_type ) * RPAREN
+    | T_NULLABLE LPAREN clickhouse_type LPAREN
+    ;
+
+simple_type
+    : T_UINT8
+    | T_UINT16
+    | T_UINT32
+    | T_UINT64
+    | T_INT8
+    | T_INT16
+    | T_INT32
+    | T_INT64
+    | T_FLOAT32
+    | T_FLOAT64
+    | T_ENUM8 LPAREN enum_entry ( COMMA enum_entry ) * LPAREN
+    | T_ENUM16 LPAREN enum_entry ( COMMA enum_entry ) * LPAREN
+    | T_UUID
+    | T_DATE
+    | T_DATETIME
+    | T_STRING
+    | T_INTERVAL_YEAR
+    | T_INTERVAL_MONTH
+    | T_INTERVAL_WEEK
+    | T_INTERVAL_DAY
+    | T_INTERVAL_HOUR
+    | T_INTERVAL_MINUTE
+    | T_INTERVAL_SECOND
+    | T_NULL
+    | T_FIXEDSTRING LPAREN NUMERIC_LITERAL LPAREN
+    ;
+
+enum_entry
+    : STRING_LITERAL ASSIGN NUMERIC_LITERAL
+    ;
+
+use_query
+ : K_USE database_name
+ ;
+
+set_query
+ : K_SET K_GLOBAL? assignment_list
+ ;
+
+assignment_list
+ : assignment ( COMMA assignment ) *
+ ;
+
+assignment
+ : identifier ASSIGN literal
+ ;
+
+kill_query_query
+ : K_KILL K_QUERY K_WHERE expression_with_optional_alias ( K_SYNC | K_ASYNC | K_TEST )
+ ;
+
+optimize_query
+ : K_OPTIMIZE K_TABLE full_table_name ( K_PARTITION STRING_LITERAL ) ? K_FINAL?
+ ;
+
+table_properties_query
+ : ( K_EXISTS | ( K_DESCRIBE | K_DESC ) | K_SHOW K_CREATE ) K_TABLE full_table_name query_outfile_step? ( K_FORMAT format_name ) ?
+ ;
+
+show_tables_query
+ : K_SHOW ( K_DATABASES
+            | K_TABLES ( K_FROM database_name ) ? ( K_NOT? K_LIKE STRING_LITERAL ) ? )
+             query_outfile_step?
+            ( K_FORMAT format_name ) ?
+ ;
+
+show_processlist_query
+ : K_SHOW K_PROCESSLIST query_outfile_step? ( K_FORMAT format_name ) ?
+ ;
+
+check_query
+ : K_CHECK K_TABLE full_table_name
+ ;
+
+// 2. QUERY ELEMENTS
+
+full_table_name
+ : ( database_name DOT ) ? table_name
+ ;
+
+partition_name
+ : identifier | STRING_LITERAL
+ ;
+
+cluster_name
+ : identifier | STRING_LITERAL
+ ;
+
+database_name
+ :  identifier
+ ;
+
+table_name
+ :  identifier
+ ;
+
+format_name
+ :  identifier
+ ;
+
+query_outfile_step
+ : K_INTO K_OUTFILE STRING_LITERAL
+ ;
+
+engine
+ : K_ENGINE ASSIGN identifier_with_optional_parameters
+ ;
+
+identifier_with_optional_parameters
+ :    identifier_with_parameters
+ |    identifier
+ ;
+
+identifier_with_parameters
+ : function
+ | nested_table
+ ;
+
+order_by_expression_list
+ :  order_by_element ( COMMA order_by_element ) *
+ ;
+
+order_by_element
+ : expression_with_optional_alias ( K_DESC | K_DESCENDING | K_ASC | K_ASCENDING ) ? ( K_NULLS ( K_FIRST | K_LAST ) ) ? ( K_COLLATE STRING_LITERAL ) ?
+ ;
+
+nested_table
+ :   identifier LPAREN name_type_pair_list RPAREN
+ ;
+
+name_type_pair_list
+ :  name_type_pair ( COMMA name_type_pair ) *
+ ;
+
+name_type_pair
+ : identifier column_type
+ ;
+
+compound_name_type_pair
+ : compound_identifier column_type
+ ;
+
+column_declaration_list
+ : column_declaration ( COMMA column_declaration ) *
+ ;
+
+column_declaration
+ : column_name
+      ( ( K_DEFAULT | K_MATERIALIZED | K_ALIAS ) expr
+      | column_type
+      )
+ ;
+
+column_name
+ : identifier
+ ;
+
+column_type
+ : clickhouse_type
+ ;
+
+column_name_list
+ :  column_name ( COMMA column_name ) *
+ ;
+
+select_expr_list
+ : select_expr ( COMMA select_expr) *
+ ;
+
+select_expr
+ : expr select_alias?
+ ;
+
+select_alias
+ : K_AS? alias_name
+ ;
+
+alias
+ : K_AS alias_name
+ ;
+
+alias_name
+ : identifier
+ ;
+
+table_function
+ :   function
+ ;
+
+
+subquery
+ :  LPAREN select_query_main RPAREN
+ ;
+
+expression_with_optional_alias
+ : expr alias?
+ ;
+
+//  EXPRESSIONS
+
+expr
+ :  LPAREN expr RPAREN                                                                                                                                              # ExprParen
+ |  function                                                                                                                                                        # ExprFunction
+ |  K_CASE expr? ( K_WHEN expr K_THEN expr ) ( K_WHEN expr K_THEN expr ) * K_ELSE expr K_END                                                                        # ExprCase
+ |  expr DOT expr                                                                                                                                                   # ExprTupleElement
+ |  expr LBRAKET expr RBRAKET                                                                                                                                       # ExprArrayElement
+ |  MINUS expr                                                                                                                                                      # ExprUnaryMinus
+ |  K_CAST LPAREN expr K_AS clickhouse_type RPAREN                                                                                                                  # ExprCast
+ |  expr ( STAR | DIVIDE | PERCENT ) expr                                                                                                                           # ExprMul
+ |  expr  ( PLUS | MINUS ) expr                                                                                                                                     # ExprAdd
+ |  expr  CONCAT expr                                                                                                                                               # ExprConcat
+ |  expr  K_BETWEEN expr K_AND expr                                                                                                                                 # ExprBetween
+ |  expr ( EQUALS | ASSIGN | NOT_EQUALS | NOT_EQUALS2 | LE | GE | LT | GT | K_LIKE | K_NOT K_LIKE ) expr                                                            # ExprLogical
+ |  expr ( K_IN | K_NOT K_IN | K_GLOBAL K_IN | K_GLOBAL K_NOT K_IN ) expr                                                                                           # ExprIn
+ |  expr ( K_IS K_NULL | K_IS K_NOT K_NULL )                                                                                                                        # ExprIsNull
+ |  K_INTERVAL expr interval_unit                                                                                                                                   # ExprInterval
+ |  K_NOT expr                                                                                                                                                      # ExprNot
+ |  expr K_AND expr                                                                                                                                                 # ExprAnd
+ |  expr K_OR expr                                                                                                                                                  # ExprOr
+ |  expr QUESTION expr COLON expr                                                                                                                                   # ExprTernary
+ |  ( LPAREN identifier ( COMMA identifier )* RPAREN | identifier ( COMMA identifier )* ) RARROW expr                                                               # ExprLambda
+ |  subquery                                                                                                                                                        # ExprSubquery
+ |  LPAREN  not_empty_expression_list RPAREN                                                                                                                        # ExprList
+ |  array                                                                                                                                                           # ExprArray
+ |  literal                                                                                                                                                         # ExprLiteral
+ |  compound_identifier                                                                                                                                             # ExprId
+ |  STAR                                                                                                                                                            # ExprStar
+ | expr alias                                                                                                                                                       # ExprWithAlias
+ ;
+
+interval_unit
+ : K_YEAR
+ | K_MONTH
+ | K_WEEK
+ | K_DAY
+ | K_HOUR
+ | K_MINUTE
+ | K_SECOND
+ ;
+expression_list
+ :   ( not_empty_expression_list )?
+ ;
+
+not_empty_expression_list
+ : expr ( COMMA expr )*
+ ;
+
+array
+ :   LBRAKET expression_list RBRAKET
+ ;
+
+function
+ : function_name function_parameters? function_arguments
+ ;
+
+function_parameters
+ : LPAREN ( expr ( COMMA expr )* )? RPAREN
+ ;
+function_arguments
+ : LPAREN ( expr ( COMMA expr )* )? RPAREN
+ ;
+
+function_name
+ : identifier
+ ;
+
+identifier
+ : QUOTED_LITERAL
+ | IDENTIFIER
+    // в данном случае мы разрешаем ключевым словам выступать в качестве имен колонок или функций.
+ | simple_type
+ | keyword
+ ;
+
+keyword
+ : K_ADD
+ | K_AFTER
+ | K_ALL
+ | K_ALIAS
+ | K_ALTER
+ | K_AND
+ | K_ANY
+ | K_ARRAY
+ | K_AS
+ | K_ASCENDING
+ | K_ASC
+ | K_ASYNC
+ | K_ATTACH
+ | K_BETWEEN
+ | K_BY
+ | K_CASE
+ | K_CHECK
+ | K_COLUMN
+ | K_COLLATE
+ | K_CREATE
+ | K_CROSS
+ | K_DESCRIBE
+ | K_DESCENDING
+ | K_DESC
+ | K_DATABASE
+ | K_DATABASES
+ | K_DEFAULT
+ | K_DETACH
+ | K_DISTINCT
+ | K_DROP
+ | K_ENGINE
+ | K_ELSE
+ | K_END
+ | K_EXISTS
+ | K_FINAL
+ | K_FIRST
+ | K_FROM
+ | K_FORMAT
+ | K_FULL
+ | K_GLOBAL
+ | K_GROUP
+ | K_HAVING
+ | K_ID
+ | K_IF
+ | K_INNER
+ | K_INSERT
+ | K_INTO
+ | K_IN
+ | K_IS
+ | K_JOIN
+ | K_KILL
+ | K_LAST
+ | K_LEFT
+ | K_LIKE
+ | K_LIMIT
+ | K_MAIN
+ | K_MATERIALIZED
+ | K_MODIFY
+ | K_NOT
+ | K_NULL
+ | K_NULLS
+ | K_OFFSET
+ | K_ON
+ | K_OPTIMIZE
+ | K_ORDER
+ | K_OR
+ | K_OUTFILE
+ | K_PARTITION
+ | K_POPULATE
+ | K_PREWHERE
+ | K_PROCESSLIST
+ | K_QUERY
+ | K_RENAME
+ | K_RETURN
+ | K_RIGHT
+ | K_SAMPLE
+ | K_SELECT
+ | K_SET
+ | K_SETTINGS
+ | K_SHOW
+ | K_SYNC
+ | K_TABLE
+ | K_TABLES
+ | K_TEMPORARY
+ | K_TEST
+ | K_THEN
+ | K_TOTALS
+ | K_TO
+ | K_OUTER
+ | K_VALUES
+ | K_VIEW
+ | K_UNION
+ | K_USE
+ | K_USING
+ | K_WHEN
+ | K_WHERE
+ | K_WITH
+ ;
+
+compound_identifier
+: identifier DOT identifier
+| identifier
+;
+
+
+literal
+ :    K_NULL
+ |    NUMERIC_LITERAL
+ |    STRING_LITERAL
+ ;
+
+error
+ : UNEXPECTED_CHAR
+   {
+     throw new RuntimeException("UNEXPECTED_CHAR=" + $UNEXPECTED_CHAR.text);
+   }
+ ;
+
diff --git a/utils/grammar/README.md b/utils/grammar/README.md
new file mode 100644
index 00000000000..b4f34054b18
--- /dev/null
+++ b/utils/grammar/README.md
@@ -0,0 +1,7 @@
+=== ClickHouse grammar for ANTLR4
+
+Authors: Yuriy Galitskiy (orantius, https://github.com/duremar), Sergey Serebryanik (serebrserg, https://github.com/serebrserg), Efim Pyshnograev (graev).
+
+Initially developed for Yandex.Metrica product and published under Apache 2.0 license with permission from Yandex. It has also found its usage in DataGrip product.
+
+It is not used in ClickHouse directly and is not synchronized with ClickHouse C++ code. Neither supported or tested. Any help welcome.

From fc845b3b928910bf0936f78dada6f609cc2878cb Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 3 Nov 2019 14:12:36 +0300
Subject: [PATCH 163/222] Update README.md

---
 utils/grammar/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/grammar/README.md b/utils/grammar/README.md
index b4f34054b18..03a611be69c 100644
--- a/utils/grammar/README.md
+++ b/utils/grammar/README.md
@@ -1,4 +1,5 @@
-=== ClickHouse grammar for ANTLR4
+ClickHouse grammar for ANTLR4
+=============================
 
 Authors: Yuriy Galitskiy (orantius, https://github.com/duremar), Sergey Serebryanik (serebrserg, https://github.com/serebrserg), Efim Pyshnograev (graev).
 

From f0b3c1f7c7c9facdbc9853aca165e8768e2b079a Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 3 Nov 2019 15:27:35 +0800
Subject: [PATCH 164/222] early constant folding improvement

---
 .../Interpreters/InterpreterSelectQuery.cpp    | 18 +++++++++---------
 .../00597_push_down_predicate.reference        |  6 +++---
 .../01029_early_constant_folding.reference     |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index df115011207..0437f4a7c19 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -384,6 +384,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             }
         }
 
+        if (!options.only_analyze && storage && filter_info && query.prewhere())
+            throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
+
         /// Calculate structure of the result.
         result_header = getSampleBlockImpl();
     };
@@ -393,18 +396,18 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     bool need_analyze_again = false;
     if (analysis_result.prewhere_constant_filter_description.always_false || analysis_result.prewhere_constant_filter_description.always_true)
     {
-        auto constant = std::make_shared<ASTLiteral>(0u);
         if (analysis_result.prewhere_constant_filter_description.always_true)
-            constant->value = 1u;
-        query.setExpression(ASTSelectQuery::Expression::PREWHERE, constant);
+            query.setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+        else
+            query.setExpression(ASTSelectQuery::Expression::PREWHERE, std::make_shared<ASTLiteral>(0u));
         need_analyze_again = true;
     }
     if (analysis_result.where_constant_filter_description.always_false || analysis_result.where_constant_filter_description.always_true)
     {
-        auto constant = std::make_shared<ASTLiteral>(0u);
         if (analysis_result.where_constant_filter_description.always_true)
-            constant->value = 1u;
-        query.setExpression(ASTSelectQuery::Expression::WHERE, constant);
+            query.setExpression(ASTSelectQuery::Expression::WHERE, {});
+        else
+            query.setExpression(ASTSelectQuery::Expression::WHERE, std::make_shared<ASTLiteral>(0u));
         need_analyze_again = true;
     }
     if (need_analyze_again)
@@ -1035,9 +1038,6 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
         else
             pipeline.streams.emplace_back(std::make_shared<NullBlockInputStream>(source_header));
 
-        if (storage && expressions.filter_info && expressions.prewhere_info)
-            throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
-
         if (expressions.prewhere_info)
         {
             if constexpr (pipeline_with_processors)
diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
index 91c09a73e8e..f64243e9be7 100644
--- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -11,15 +11,15 @@ SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) USING (a)\nWHERE b = 0
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b\n)\nANY FULL OUTER JOIN \n(\n    SELECT 1 AS a\n) USING (a)\nWHERE b = 0
 -------Need push down-------
-SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n    WHERE 1\n)\nWHERE 1
+SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n)
 1
-SELECT id\nFROM \n(\n    SELECT 1 AS id\n    WHERE 1\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE 0\n)\nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT 1 AS id\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE 0\n)\nWHERE id = 1
 1
 SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n)\nWHERE id = 1
 1
 SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n)\nWHERE id = 1
 1
-SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE 1\n)\nWHERE 1
+SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n)
 1	1
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597\n    HAVING a = 3\n)\nWHERE a = 3
 3	3
diff --git a/dbms/tests/queries/0_stateless/01029_early_constant_folding.reference b/dbms/tests/queries/0_stateless/01029_early_constant_folding.reference
index 2ea4a6a6357..399f7f5d3b9 100644
--- a/dbms/tests/queries/0_stateless/01029_early_constant_folding.reference
+++ b/dbms/tests/queries/0_stateless/01029_early_constant_folding.reference
@@ -1,5 +1,5 @@
 SELECT 1\nWHERE 0
-SELECT 1\nWHERE 1
+SELECT 1
 SELECT 1\nWHERE 0
 SELECT 1\nWHERE 1 IN (\n(\n    SELECT arrayJoin([1, 2, 3])\n) AS subquery)
 SELECT 1\nWHERE NOT ignore()

From 0c4451aa5764479f6664f915327d099b3f3469db Mon Sep 17 00:00:00 2001
From: Alexander Kazakov <Akazz@users.noreply.github.com>
Date: Sun, 3 Nov 2019 19:08:22 +0300
Subject: [PATCH 165/222] Update CHANGELOG.md

Added changelog for release 19.11.13.74 (Nov 1st, 2019)
---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6b461e09422..272b0d298d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -595,6 +595,13 @@ fix comments to make obvious that it may throw.
 ### Security Fix
 * Fix two vulnerabilities in codecs in decompression phase (malicious user can fabricate compressed data that will lead to buffer overflow in decompression). [#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Artem Zuikov](https://github.com/4ertus2))
 
+## ClickHouse release 19.11.13.74, 2019-11-01
+
+### Bug Fix
+* Fixed rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows). [#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin))
+* Manual update of `SIMDJSON`. This fixes possible flooding of stderr files with bogus json diagnostic messages. [#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Alexander Kazakov](https://github.com/Akazz))
+* Fixed bug with `mrk` file extension for mutations ([alesapin](https://github.com/alesapin))
+
 ## ClickHouse release 19.11.12.69, 2019-10-02
 
 ### Bug Fix

From d56452eac7dae41f4690d961e9c9cdb2412eb126 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Sun, 3 Nov 2019 19:38:28 +0300
Subject: [PATCH 166/222] Update
 docs/en/query_language/agg_functions/parametric_functions.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/en/query_language/agg_functions/parametric_functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md
index db946830c7e..c72bfe26f7b 100644
--- a/docs/en/query_language/agg_functions/parametric_functions.md
+++ b/docs/en/query_language/agg_functions/parametric_functions.md
@@ -230,7 +230,7 @@ windowFunnel(window)(timestamp, cond1, cond2, cond3, ...)
 **Parameters:**
 
 - `window` — Length of the sliding window in seconds.
-- `timestamp` — Name of the column containing the timestamp. Data types supported: `Date`,`DateTime`, and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it's value can't exceed the Int64 maximum, which is 2^63 - 1).
+- `timestamp` — Name of the column containing the timestamp. Data types supported: `Date`, `DateTime`, and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it's value can't exceed the Int64 maximum, which is 2^63 - 1).
 - `cond1`, `cond2`... — Conditions or data describing the chain of events. Data type: `UInt8`. Values can be 0 or 1.
 
 **Algorithm**

From 492d9a21e308692a2426dcdf371b076956244354 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Sun, 3 Nov 2019 19:39:08 +0300
Subject: [PATCH 167/222] Update
 docs/ru/query_language/agg_functions/parametric_functions.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/ru/query_language/agg_functions/parametric_functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md
index a5db4598c3b..6b04bef3a6f 100644
--- a/docs/ru/query_language/agg_functions/parametric_functions.md
+++ b/docs/ru/query_language/agg_functions/parametric_functions.md
@@ -73,7 +73,7 @@ FROM
 
 ## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch}
 
-Проверяет, содержит ли последовательность цепочку событий, которая соответствует шаблону.
+Проверяет, содержит ли последовательность событий цепочку, которая соответствует указанному шаблону.
 
 ```sql
 sequenceMatch(pattern)(timestamp, cond1, cond2, ...)

From a3ca9fd85c1afc0ecb73a5078a6e6290fcebbc56 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Sun, 3 Nov 2019 19:39:39 +0300
Subject: [PATCH 168/222] Update
 docs/ru/query_language/agg_functions/parametric_functions.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/ru/query_language/agg_functions/parametric_functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md
index 6b04bef3a6f..f6d6e1841ff 100644
--- a/docs/ru/query_language/agg_functions/parametric_functions.md
+++ b/docs/ru/query_language/agg_functions/parametric_functions.md
@@ -131,7 +131,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2) FROM t
 └───────────────────────────────────────────────────────────────────────┘
 ```
 
-Функция нашла цепочку событий, в которой число 2 следует за числом 1. Число 3 между ними было пропущено, поскольку оно не описано как событие. Если необходимо учесть это число при поиске цепочки событий, заданной в примере, то необходимо задать для него условие.
+Функция нашла цепочку событий, в которой число 2 следует за числом 1. Число 3 между ними было пропущено, поскольку оно не было использовано ни в одном из условий.
 
 ```sql
 SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM t

From 353d922eb6cb14cc41eef9c0cca0c6c7df61832f Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Sun, 3 Nov 2019 19:40:06 +0300
Subject: [PATCH 169/222] Update
 docs/ru/query_language/agg_functions/parametric_functions.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/ru/query_language/agg_functions/parametric_functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md
index f6d6e1841ff..5023a09ffdf 100644
--- a/docs/ru/query_language/agg_functions/parametric_functions.md
+++ b/docs/ru/query_language/agg_functions/parametric_functions.md
@@ -161,7 +161,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
 
 ## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount}
 
-Вычисляет количество цепочек событий, соответствующих шаблону. Функция отыскивает непересекающиеся цепочки событий. Она начитает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий.
+Вычисляет количество цепочек событий, соответствующих шаблону. Функция обнаруживает только непересекающиеся цепочки событий. Она начитает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий.
 
 !!! warning "Предупреждение"
     События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции.

From 842f4e2ca016770edfb88fe6dc505218b1e54fdd Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 3 Nov 2019 21:44:56 +0300
Subject: [PATCH 170/222] Check column for __getScalar() (avoids assertion)

Can be triggered using the following query:
  CREATE TABLE foo (key String, macro String MATERIALIZED __getScalar(key)) Engine=Null();

Trace:
    3. 0x00007ffff6d5d526 __assert_fail (libc.so.6)
    4. 0x00007ffff41fd931 boost::intrusive_ptr<DB::IColumn const>::operator*() const (libclickhouse_functionsd.so)
    5. 0x00007ffff41fcd64 COW<DB::IColumn>::IntrusivePtr<DB::IColumn const>::operator*() const & (libclickhouse_functionsd.so)
    6. 0x00007ffff4dc5944 DB::FunctionGetScalar::getReturnTypeImpl() const (libclickhouse_functionsd.so)

(Even though it is internal I guess it is better to fix it)

Refs: #7392
Cc: @amosbird
---
 dbms/src/Functions/getScalar.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/getScalar.cpp b/dbms/src/Functions/getScalar.cpp
index b04fcdc83f5..9fd7f18eba4 100644
--- a/dbms/src/Functions/getScalar.cpp
+++ b/dbms/src/Functions/getScalar.cpp
@@ -42,7 +42,7 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (arguments.size() != 1 || !isString(arguments[0].type) || !isColumnConst(*arguments[0].column))
+        if (arguments.size() != 1 || !isString(arguments[0].type) || !arguments[0].column || !isColumnConst(*arguments[0].column))
             throw Exception("Function " + getName() + " accepts one const string argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         auto scalar_name = assert_cast<const ColumnConst &>(*arguments[0].column).getField().get<String>();
         scalar = context.getScalar(scalar_name).getByPosition(0);

From d4e14680229382ea77262f389dde7ef411aa5880 Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Sun, 3 Nov 2019 22:33:59 +0300
Subject: [PATCH 171/222] fix Join.saved_block_sample columns' nullability

---
 dbms/src/Interpreters/Join.cpp                | 37 +++++++++++--------
 .../00999_full_join_dup_keys_crash.reference  |  4 +-
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 1de58a22282..2635816f3e8 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -503,19 +503,24 @@ void Join::initRightBlockStructure()
     /// Save non key columns
     for (auto & column : sample_block_with_columns_to_add)
         saved_block_sample.insert(column);
-}
-
-Block * Join::storeRightBlock(const Block & block)
-{
-    Block structured_block;
-    for (auto & columns : saved_block_sample.getColumnsWithTypeAndName())
-        structured_block.insert(block.getByName(columns.name));
-
-    /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
-    materializeBlockInplace(structured_block);
 
     if (nullable_right_side)
-        JoinCommon::convertColumnsToNullable(structured_block, (isFull(kind) ? right_table_keys.columns() : 0));
+        JoinCommon::convertColumnsToNullable(saved_block_sample, (isFull(kind) ? right_table_keys.columns() : 0));
+}
+
+Block * Join::storeRightBlock(const Block & source_block)
+{
+    /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
+    Block block = materializeBlock(source_block);
+
+    Block structured_block;
+    for (auto & sample_column : saved_block_sample.getColumnsWithTypeAndName())
+    {
+        auto & column = block.getByName(sample_column.name);
+        if (sample_column.column->isNullable())
+            JoinCommon::convertColumnToNullable(column);
+        structured_block.insert(column);
+    }
 
     blocks.push_back(structured_block);
     return &blocks.back();
@@ -1147,24 +1152,24 @@ private:
     std::optional<Join::BlockNullmapList::const_iterator> nulls_position;
 
 
-    /// result_sample_block: "left keys", "left" columns, "right" columns, some "right keys"
+    /// "left" columns, "right" not key columns, some "right keys"
     void makeResultSampleBlock(const Block & left_sample_block)
     {
         result_sample_block = materializeBlock(left_sample_block);
         if (parent.nullable_left_side)
             JoinCommon::convertColumnsToNullable(result_sample_block);
 
-        for (const ColumnWithTypeAndName & src_column : parent.sample_block_with_columns_to_add)
+        for (const ColumnWithTypeAndName & column : parent.sample_block_with_columns_to_add)
         {
-            if (!result_sample_block.has(src_column.name))
-                result_sample_block.insert(src_column.cloneEmpty());
+            bool is_nullable = parent.nullable_right_side || column.column->isNullable();
+            result_sample_block.insert(correctNullability({column.column, column.type, column.name}, is_nullable));
         }
 
         for (auto & required_key : parent.required_right_keys)
         {
             const auto & right_key = parent.saved_block_sample.getByName(required_key.name);
 
-            bool is_nullable = (parent.nullable_right_side && isFull(parent.kind)) || right_key.column->isNullable();
+            bool is_nullable = parent.nullable_right_side || right_key.column->isNullable();
             result_sample_block.insert(correctNullability({right_key.column, right_key.type, right_key.name}, is_nullable));
         }
     }
diff --git a/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference b/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference
index 9467a539421..3d66698c45d 100644
--- a/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference
+++ b/dbms/tests/queries/0_stateless/00999_full_join_dup_keys_crash.reference
@@ -29,7 +29,7 @@
 1
 -
 1	2	\N	\N
-\N	\N	8	2
+\N	\N	1	2
 -
 \N	\N	1	2
 -
@@ -49,7 +49,7 @@
 \N
 -
 \N
-8
+1
 -
 1
 -

From b7b664845d49e441e25e4c7998e25fc15546d68b Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Sun, 3 Nov 2019 23:30:49 +0300
Subject: [PATCH 172/222] return right x in RIGHT|FULL JOIN .. USING(x) from
 NonJoinedBlockInputStream if not qualified

---
 dbms/src/Interpreters/Join.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 2635816f3e8..67ad6ef264a 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -1070,8 +1070,9 @@ public:
         : parent(parent_)
         , max_block_size(max_block_size_)
     {
-        /// Left or right keys map. In case of collisions it contains any right_key that has data for left one.
-        std::unordered_map<size_t, size_t> left_to_right_key_position;
+        bool remap_keys = parent.table_join->hasUsing();
+        std::unordered_map<size_t, size_t> left_to_right_key_remap;
+
         for (size_t i = 0; i < parent.table_join->keyNamesLeft().size(); ++i)
         {
             const String & left_key_name = parent.table_join->keyNamesLeft()[i];
@@ -1080,18 +1081,18 @@ public:
             size_t left_key_pos = left_sample_block.getPositionByName(left_key_name);
             size_t right_key_pos = parent.saved_block_sample.getPositionByName(right_key_name);
 
-            left_to_right_key_position[left_key_pos] = right_key_pos;
+            if (remap_keys && !parent.required_right_keys.has(right_key_name))
+                left_to_right_key_remap[left_key_pos] = right_key_pos;
         }
 
         makeResultSampleBlock(left_sample_block);
 
-        bool join_using = parent.table_join->hasUsing();
         for (size_t left_pos = 0; left_pos < left_sample_block.columns(); ++left_pos)
         {
             /// We need right 'x' for 'RIGHT JOIN ... USING(x)'.
-            if (join_using && left_to_right_key_position.count(left_pos))
+            if (left_to_right_key_remap.count(left_pos))
             {
-                size_t right_key_pos = left_to_right_key_position[left_pos];
+                size_t right_key_pos = left_to_right_key_remap[left_pos];
                 setRightIndex(right_key_pos, left_pos);
             }
             else
@@ -1165,10 +1166,8 @@ private:
             result_sample_block.insert(correctNullability({column.column, column.type, column.name}, is_nullable));
         }
 
-        for (auto & required_key : parent.required_right_keys)
+        for (const ColumnWithTypeAndName & right_key : parent.required_right_keys)
         {
-            const auto & right_key = parent.saved_block_sample.getByName(required_key.name);
-
             bool is_nullable = parent.nullable_right_side || right_key.column->isNullable();
             result_sample_block.insert(correctNullability({right_key.column, right_key.type, right_key.name}, is_nullable));
         }

From 920572915efe7a10f8b9824dfc96d5dab267062f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 4 Nov 2019 00:30:37 +0300
Subject: [PATCH 173/222] Add regression test for the __getScalar()

---
 dbms/tests/queries/0_stateless/01024__getScalar.reference | 0
 dbms/tests/queries/0_stateless/01024__getScalar.sql       | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 dbms/tests/queries/0_stateless/01024__getScalar.reference
 create mode 100644 dbms/tests/queries/0_stateless/01024__getScalar.sql

diff --git a/dbms/tests/queries/0_stateless/01024__getScalar.reference b/dbms/tests/queries/0_stateless/01024__getScalar.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/01024__getScalar.sql b/dbms/tests/queries/0_stateless/01024__getScalar.sql
new file mode 100644
index 00000000000..0f66411a32f
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01024__getScalar.sql
@@ -0,0 +1 @@
+CREATE TABLE foo (key String, macro String MATERIALIZED __getScalar(key)) Engine=Null(); -- { serverError 43 }

From e76bbfc5adf43c420e68e51b109aadb577e99a0b Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Mon, 4 Nov 2019 01:27:05 +0300
Subject: [PATCH 174/222] Made `MergeTreeData::cloneAndLoadDataPart` only work
 for the same disk.

---
 dbms/src/Storages/MergeTree/MergeTreeData.cpp             | 8 ++++----
 dbms/src/Storages/MergeTree/MergeTreeData.h               | 2 +-
 .../src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +-
 dbms/src/Storages/StorageMergeTree.cpp                    | 2 +-
 dbms/src/Storages/StorageReplicatedMergeTree.cpp          | 6 +++---
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
index 082bc038a36..ce79f156fe3 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3219,14 +3219,14 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePt
     return *src_data;
 }
 
-MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPart(const MergeTreeData::DataPartPtr & src_part,
-                                                                      const String & tmp_part_prefix,
-                                                                      const MergeTreePartInfo & dst_part_info)
+MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk(const MergeTreeData::DataPartPtr & src_part,
+                                                                                const String & tmp_part_prefix,
+                                                                                const MergeTreePartInfo & dst_part_info)
 {
     String dst_part_name = src_part->getNewName(dst_part_info);
     String tmp_dst_part_name = tmp_part_prefix + dst_part_name;
 
-    auto reservation = reserveSpace(src_part->bytes_on_disk);
+    auto reservation = src_part->disk->reserve(src_part->bytes_on_disk);
     String dst_part_path = getFullPathOnDisk(reservation->getDisk());
     Poco::Path dst_part_absolute_path = Poco::Path(dst_part_path + tmp_dst_part_name).absolute();
     Poco::Path src_part_absolute_path = Poco::Path(src_part->getFullPath()).absolute();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h
index a8bd661fafa..0d265074a9a 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.h
@@ -633,7 +633,7 @@ public:
     /// Tables structure should be locked.
     MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const;
 
-    MergeTreeData::MutableDataPartPtr cloneAndLoadDataPart(
+    MergeTreeData::MutableDataPartPtr cloneAndLoadDataPartOnSameDisk(
         const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info);
 
     virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index df3720359d3..8b8393adf6e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -944,7 +944,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     if (!mutations_interpreter.isStorageTouchedByMutations())
     {
         LOG_TRACE(log, "Part " << source_part->name << " doesn't change up to mutation version " << future_part.part_info.mutation);
-        return data.cloneAndLoadDataPart(source_part, "tmp_clone_", future_part.part_info);
+        return data.cloneAndLoadDataPartOnSameDisk(source_part, "tmp_clone_", future_part.part_info);
     }
     else
         LOG_TRACE(log, "Mutating part " << source_part->name << " to mutation version " << future_part.part_info.mutation);
diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp
index c752109e328..fcc6551e143 100644
--- a/dbms/src/Storages/StorageMergeTree.cpp
+++ b/dbms/src/Storages/StorageMergeTree.cpp
@@ -1118,7 +1118,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
         MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
 
         std::shared_lock<std::shared_mutex> part_lock(src_part->columns_lock);
-        dst_parts.emplace_back(cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info));
+        dst_parts.emplace_back(cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info));
     }
 
     /// ATTACH empty part set
diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
index c55378d8526..cd9f72bb511 100644
--- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1761,7 +1761,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
             if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex())
                 throw Exception("Checksums of " + part_desc->src_table_part->name + " is suddenly changed", ErrorCodes::UNFINISHED);
 
-            part_desc->res_part = cloneAndLoadDataPart(
+            part_desc->res_part = cloneAndLoadDataPartOnSameDisk(
                 part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info);
         }
         else if (!part_desc->replica.empty())
@@ -2773,7 +2773,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin
     {
         get_part = [&, part_to_clone]()
         {
-            return cloneAndLoadDataPart(part_to_clone, "tmp_clone_", part_info);
+            return cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info);
         };
     }
     else
@@ -4864,7 +4864,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_
 
         UInt64 index = lock->getNumber();
         MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
-        auto dst_part = cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info);
+        auto dst_part = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info);
 
         src_parts.emplace_back(src_part);
         dst_parts.emplace_back(dst_part);

From 8e2b1b3d8a430d5e52b453b712dacceaf43b1db0 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 4 Nov 2019 02:39:11 +0300
Subject: [PATCH 175/222] Delete stale.yml

---
 .github/stale.yml | 70 -----------------------------------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 .github/stale.yml

diff --git a/.github/stale.yml b/.github/stale.yml
deleted file mode 100644
index 6628bbbd305..00000000000
--- a/.github/stale.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-# Configuration for probot-stale - https://github.com/probot/stale
-
-# Number of days of inactivity before an Issue or Pull Request becomes stale
-daysUntilStale: 60
-
-# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
-# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
-daysUntilClose: 30
-
-# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
-onlyLabels: []
-
-# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
-exemptLabels:
-  - bug
-  - feature
-  - memory
-  - performance
-  - prio-crit
-  - prio-major
-  - st-accepted
-  - st-in-progress
-  - st-waiting-for-fix
-  - segfault
-  - crash
-
-# Set to true to ignore issues in a project (defaults to false)
-exemptProjects: false
-
-# Set to true to ignore issues in a milestone (defaults to false)
-exemptMilestones: false
-
-# Set to true to ignore issues with an assignee (defaults to false)
-exemptAssignees: false
-
-# Label to use when marking as stale
-staleLabel: stale
-
-# Comment to post when marking as stale. Set to `false` to disable
-markComment: >
-  This issue has been automatically marked as stale because it has not had
-  recent activity. Please post a comment if this issue is still relevant to you.
-  Thank you for your contributions.
-
-# Comment to post when removing the stale label.
-# unmarkComment: >
-#   Your comment here.
-
-# Comment to post when closing a stale Issue or Pull Request.
-# closeComment: >
-#   Your comment here.
-
-# Limit the number of actions per hour, from 1-30. Default is 30
-limitPerRun: 30
-
-# Limit to only `issues` or `pulls`
-# only: issues
-
-# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
-pulls:
-  daysUntilStale: 365
-  daysUntilClose: 60
-  markComment: >
-    This pull request has been automatically marked as stale because it has not had
-    any activity for over a year. Please post a comment about whether you intend to continue working on it.
-    Thank you for your contributions.
-
-# issues:
-#   exemptLabels:
-#     - confirmed

From 08894484ee28eac811086ae478f0c3a8faf4d5f9 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Mon, 4 Nov 2019 10:56:12 +0800
Subject: [PATCH 176/222] fix cluster system is_local

---
 dbms/src/Storages/System/StorageSystemClusters.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/System/StorageSystemClusters.cpp b/dbms/src/Storages/System/StorageSystemClusters.cpp
index 83c8d36df0d..83d165f54f7 100644
--- a/dbms/src/Storages/System/StorageSystemClusters.cpp
+++ b/dbms/src/Storages/System/StorageSystemClusters.cpp
@@ -55,7 +55,7 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, const Context
                 auto resolved = address.getResolvedAddress();
                 res_columns[i++]->insert(resolved ? resolved->host().toString() : String());
                 res_columns[i++]->insert(address.port);
-                res_columns[i++]->insert(shard_info.isLocal());
+                res_columns[i++]->insert(address.is_local);
                 res_columns[i++]->insert(address.user);
                 res_columns[i++]->insert(address.default_database);
                 res_columns[i++]->insert(pool_status[replica_index].error_count);

From 8f902dc2a93a0c0b856eed2d911f4b1a6fda5fec Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 4 Nov 2019 11:53:26 +0800
Subject: [PATCH 177/222] Better null format for tcp handler.

---
 dbms/programs/server/TCPHandler.cpp    |  3 ++-
 dbms/src/DataStreams/BlockIO.h         |  3 +++
 dbms/src/Interpreters/executeQuery.cpp | 11 ++++++++++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp
index 88b1eb6ae3e..cb1512a81e8 100644
--- a/dbms/programs/server/TCPHandler.cpp
+++ b/dbms/programs/server/TCPHandler.cpp
@@ -530,7 +530,8 @@ void TCPHandler::processOrdinaryQuery()
                 sendLogs();
             }
 
-            sendData(block);
+            if (!block || !state.io.null_format)
+                sendData(block);
             if (!block)
                 break;
         }
diff --git a/dbms/src/DataStreams/BlockIO.h b/dbms/src/DataStreams/BlockIO.h
index e7c1d41e845..50af2922306 100644
--- a/dbms/src/DataStreams/BlockIO.h
+++ b/dbms/src/DataStreams/BlockIO.h
@@ -33,6 +33,9 @@ struct BlockIO
     std::function<void(IBlockInputStream *, IBlockOutputStream *)>    finish_callback;
     std::function<void()>                                             exception_callback;
 
+    /// When it is true, don't bother sending any non-empty blocks to the out stream
+    bool null_format = false;
+
     /// Call these functions if you want to log the request.
     void onFinish()
     {
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 2407d936c5d..c84aabd439d 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -563,9 +563,18 @@ BlockIO executeQuery(
     bool may_have_embedded_data,
     bool allow_processors)
 {
+    ASTPtr ast;
     BlockIO streams;
-    std::tie(std::ignore, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context,
+    std::tie(ast, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context,
         internal, stage, !may_have_embedded_data, nullptr, allow_processors);
+    if (streams.in)
+    {
+        const auto * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());
+        String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
+                ? getIdentifierName(ast_query_with_output->format) : context.getDefaultFormat();
+        if (format_name == "Null")
+            streams.null_format = true;
+    }
     return streams;
 }
 

From da2d05802cd1c2aecc02ac866bef6beb45eea1b9 Mon Sep 17 00:00:00 2001
From: Zhichang Yu <yuzhichang@gmail.com>
Date: Sat, 2 Nov 2019 18:10:48 +0800
Subject: [PATCH 178/222] added bitmapTransform

---
 .../AggregateFunctionGroupBitmapData.h        |  17 ++
 dbms/src/Functions/FunctionsBitmap.cpp        |   1 +
 dbms/src/Functions/FunctionsBitmap.h          | 161 +++++++++++++++-
 .../00829_bitmap_function.reference           |   4 +
 .../0_stateless/00829_bitmap_function.sql     |  27 +++
 .../functions/bitmap_functions.md             | 177 ++++++++++--------
 6 files changed, 305 insertions(+), 82 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
index 5ee9df1625c..73061e7b146 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
@@ -581,6 +581,23 @@ public:
         return max_val;
     }
 
+    /**
+     * Replace value
+     */
+    void rb_replace(const UInt32* from_vals, const UInt32* to_vals, size_t num)
+    {
+        if (isSmall())
+            toLarge();
+        for (size_t i = 0; i < num; i++)
+        {
+            if (from_vals[i] == to_vals[i])
+                continue;
+            bool changed = roaring_bitmap_remove_checked(rb, from_vals[i]);
+            if (changed)
+                roaring_bitmap_add(rb, to_vals[i]);
+        }
+    }
+
 private:
     /// To read and write the DB Buffer directly, migrate code from CRoaring
     void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args)
diff --git a/dbms/src/Functions/FunctionsBitmap.cpp b/dbms/src/Functions/FunctionsBitmap.cpp
index 8ffa641fec8..c94566b04b0 100644
--- a/dbms/src/Functions/FunctionsBitmap.cpp
+++ b/dbms/src/Functions/FunctionsBitmap.cpp
@@ -14,6 +14,7 @@ void registerFunctionsBitmap(FunctionFactory & factory)
     factory.registerFunction<FunctionBitmapToArray>();
     factory.registerFunction<FunctionBitmapSubsetInRange>();
     factory.registerFunction<FunctionBitmapSubsetLimit>();
+    factory.registerFunction<FunctionBitmapTransform>();
 
     factory.registerFunction<FunctionBitmapSelfCardinality>();
     factory.registerFunction<FunctionBitmapMin>();
diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h
index 422f78a7abe..358b7e760c0 100644
--- a/dbms/src/Functions/FunctionsBitmap.h
+++ b/dbms/src/Functions/FunctionsBitmap.h
@@ -33,11 +33,20 @@ namespace ErrorCodes
   * Convert bitmap to integer array:
   * bitmapToArray:	bitmap -> integer[]
   *
+  * Retrun the smallest value in the set:
+  * bitmapMin:	bitmap -> integer
+  *
+  * Retrun the greatest value in the set:
+  * bitmapMax:	bitmap -> integer
+  *
   * Return subset in specified range (not include the range_end):
   * bitmapSubsetInRange:    bitmap,integer,integer -> bitmap
   *
   * Return subset of the smallest `limit` values in set which is no smaller than `range_start`.
-  * bitmapSubsetInRange:    bitmap,integer,integer -> bitmap
+  * bitmapSubsetLimit:    bitmap,integer,integer -> bitmap
+  *
+  * Transform an array of values in a bitmap to another array of values, the result is a new bitmap.
+  * bitmapTransform:    bitmap,integer[],integer[] -> bitmap
   *
   * Two bitmap and calculation:
   * bitmapAnd:	bitmap,bitmap -> bitmap
@@ -54,12 +63,6 @@ namespace ErrorCodes
   * Retrun bitmap cardinality:
   * bitmapCardinality:	bitmap -> integer
   *
-  * Retrun the smallest value in the set:
-  * bitmapMin:	bitmap -> integer
-  *
-  * Retrun the greatest value in the set:
-  * bitmapMax:	bitmap -> integer
-  *
   * Two bitmap and calculation, return cardinality:
   * bitmapAndCardinality:	bitmap,bitmap -> integer
   *
@@ -394,6 +397,150 @@ public:
 using FunctionBitmapSubsetInRange = FunctionBitmapSubset<BitmapSubsetInRangeImpl>;
 using FunctionBitmapSubsetLimit = FunctionBitmapSubset<BitmapSubsetLimitImpl>;
 
+class FunctionBitmapTransform : public IFunction
+{
+public:
+    static constexpr auto name = "bitmapTransform";
+
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapTransform>(); }
+
+    String getName() const override { return name; }
+
+    bool isVariadic() const override { return false; }
+
+    size_t getNumberOfArguments() const override { return 3; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
+        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+            throw Exception(
+                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        for (size_t i = 0; i < 2; i++)
+        {
+            auto array_type = typeid_cast<const DataTypeArray *>(arguments[i+1].get());
+            String msg(i==0 ? "Second" : "Third");
+            msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i+1]->getName() + ".";
+            if (!array_type)
+                throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+            auto nested_type = array_type->getNestedType();
+            WhichDataType which(nested_type);
+            if (!which.isUInt32())
+                throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        return arguments[0];
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
+    {
+        const IDataType * from_type = block.getByPosition(arguments[0]).type.get();
+        const DataTypeAggregateFunction * aggr_type = typeid_cast<const DataTypeAggregateFunction *>(from_type);
+        WhichDataType which(aggr_type->getArgumentsDataTypes()[0]);
+        if (which.isUInt8())
+            executeIntType<UInt8>(block, arguments, result, input_rows_count);
+        else if (which.isUInt16())
+            executeIntType<UInt16>(block, arguments, result, input_rows_count);
+        else if (which.isUInt32())
+            executeIntType<UInt32>(block, arguments, result, input_rows_count);
+        else if (which.isUInt64())
+            executeIntType<UInt64>(block, arguments, result, input_rows_count);
+        else
+            throw Exception(
+                "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+
+private:
+    using ToType = UInt64;
+
+    template <typename T>
+    void executeIntType(
+        Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
+        const
+    {
+        const IColumn * columns[3];
+        bool is_column_const[3];
+        const ColumnAggregateFunction * colAggFunc;
+        const PaddedPODArray<AggregateDataPtr> * container0;
+        const ColumnArray * array;
+
+        for (size_t i = 0; i < 3; ++i)
+        {
+            columns[i] = block.getByPosition(arguments[i]).column.get();
+            is_column_const[i] = isColumnConst(*columns[i]);
+        }
+        if (is_column_const[0])
+        {
+            colAggFunc = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(columns[0])->getDataColumnPtr().get());
+        }
+        else
+        {
+            colAggFunc = typeid_cast<const ColumnAggregateFunction*>(columns[0]);
+        }
+        container0 = &colAggFunc->getData();
+
+        if (is_column_const[1])
+            array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(columns[1])->getDataColumnPtr().get());
+        else
+        {
+            array = typeid_cast<const ColumnArray *>(block.getByPosition(arguments[1]).column.get());
+        }
+        const ColumnArray::Offsets & from_offsets = array->getOffsets();
+        const ColumnVector<UInt32>::Container & from_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
+
+        if (is_column_const[2])
+            array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(columns[2])->getDataColumnPtr().get());
+        else
+        {
+            array = typeid_cast<const ColumnArray *>(block.getByPosition(arguments[2]).column.get());
+        }
+        const ColumnArray::Offsets & to_offsets = array->getOffsets();
+        const ColumnVector<UInt32>::Container & to_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
+        auto col_to = ColumnAggregateFunction::create(colAggFunc->getAggregateFunction());
+        col_to->reserve(input_rows_count);
+
+        size_t from_start, from_end, to_start, to_end;
+        for (size_t i = 0; i < input_rows_count; ++i)
+        {
+            const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
+            const AggregateFunctionGroupBitmapData<T>& bd0
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
+            if (is_column_const[1])
+            {
+                from_start = 0;
+                from_end = from_container.size();
+            }
+            else
+            {
+                from_start = i==0 ? 0 : from_offsets[i-1];
+                from_end = from_offsets[i];
+            }
+            if (is_column_const[2])
+            {
+                to_start = 0;
+                to_end = to_container.size();
+            }
+            else
+            {
+                to_start = i==0 ? 0 : to_offsets[i-1];
+                to_end = to_offsets[i];
+            }
+            if (from_end - from_start != to_end - to_start)
+                throw Exception("From array size and to array size mismatch", ErrorCodes::LOGICAL_ERROR);
+
+            col_to->insertDefault();
+            AggregateFunctionGroupBitmapData<T> & bd2
+                = *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
+            bd2.rbs.merge(bd0.rbs);
+            bd2.rbs.rb_replace(&from_container[from_start], &to_container[to_start], from_end - from_start);
+        }
+        block.getByPosition(result).column = std::move(col_to);
+    }
+};
+
 template <typename Impl>
 class FunctionBitmapSelfCardinalityImpl : public IFunction
 {
diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference
index 4b60da9d9af..bc1fda84ed9 100644
--- a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference
+++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference
@@ -34,6 +34,10 @@
 [6,8,10]
 10
 [1,3,5,6,8,10,11,13,14,15]
+tag1	[0,1,2,3,4,5,6,7,8,9]	[]	[]	[0,1,2,3,4,5,6,7,8,9]
+tag2	[0,1,2,3,4,5,6,7,8,9]	[0]	[2]	[1,2,3,4,5,6,7,8,9]
+tag3	[0,1,2,3,4,5,6,7,8,9]	[0,7]	[3,101]	[1,2,3,4,5,6,8,9,101]
+tag4	[0,1,2,3,4,5,6,7,8,9]	[5,999,2]	[2,888,20]	[0,1,3,4,6,7,8,9,20]
 0
 0
 0
diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql
index 6e6fbe012c2..241a827fb9b 100644
--- a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql
+++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql
@@ -128,10 +128,37 @@ SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_
 SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
 SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%');
 
+
+DROP TABLE IF EXISTS bitmap_column_expr_test3;
+CREATE TABLE bitmap_column_expr_test3
+(
+    tag_id String,
+    z AggregateFunction(groupBitmap, UInt32),
+    replace Nested (
+        from UInt32,
+        to UInt32
+    )
+)
+ENGINE = MergeTree
+ORDER BY tag_id;
+
+DROP TABLE IF EXISTS numbers10;
+CREATE VIEW numbers10 AS SELECT number FROM system.numbers LIMIT 10;
+
+INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag1', groupBitmapState(toUInt32(number)), cast([] as Array(UInt32)), cast([] as Array(UInt32)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag2', groupBitmapState(toUInt32(number)), cast([0] as Array(UInt32)), cast([2] as Array(UInt32)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag3', groupBitmapState(toUInt32(number)), cast([0,7] as Array(UInt32)), cast([3,101] as Array(UInt32)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag4', groupBitmapState(toUInt32(number)), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)) FROM numbers10;
+
+SELECT tag_id, bitmapToArray(z), replace.from, replace.to, bitmapToArray(bitmapTransform(z, replace.from, replace.to)) FROM bitmap_column_expr_test3 ORDER BY tag_id;
+
+
 DROP TABLE IF EXISTS bitmap_test;
 DROP TABLE IF EXISTS bitmap_state_test;
 DROP TABLE IF EXISTS bitmap_column_expr_test;
 DROP TABLE IF EXISTS bitmap_column_expr_test2;
+DROP TABLE IF EXISTS numbers10;
+DROP TABLE IF EXISTS bitmap_column_expr_test3;
 
 -- bitmapHasAny:
 ---- Empty
diff --git a/docs/en/query_language/functions/bitmap_functions.md b/docs/en/query_language/functions/bitmap_functions.md
index f50097b08cb..312542044b8 100644
--- a/docs/en/query_language/functions/bitmap_functions.md
+++ b/docs/en/query_language/functions/bitmap_functions.md
@@ -196,6 +196,108 @@ SELECT bitmapHasAll(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res
 ```
 
 
+## bitmapCardinality
+
+Retrun bitmap cardinality of type UInt64.
+
+
+```sql
+bitmapCardinality(bitmap)
+```
+
+**Parameters**
+
+- `bitmap` – bitmap object.
+
+**Example**
+
+```sql
+SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res
+```
+
+```text
+┌─res─┐
+│   5 │
+└─────┘
+```
+
+## bitmapMin
+
+Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is empty.
+
+
+```
+bitmapMin(bitmap)
+```
+
+**Parameters**
+
+- `bitmap` – bitmap object.
+
+**Example**
+
+``` sql
+SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res
+```
+
+```
+┌─res─┐
+│   1 │
+└─────┘
+```
+
+## bitmapMax
+
+Retrun the greatest value of type UInt64 in the set, 0 if the set is empty.
+
+
+```
+bitmapMax(bitmap)
+```
+
+**Parameters**
+
+- `bitmap` – bitmap object.
+
+**Example**
+
+``` sql
+SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res
+```
+
+```
+┌─res─┐
+│   5 │
+└─────┘
+```
+
+## bitmapTransform
+
+Transform an array of values in a bitmap to another array of values, the result is a new bitmap.
+
+
+```
+bitmapTransform(bitmap, from_array, to_array)
+```
+
+**Parameters**
+
+- `bitmap` – bitmap object.
+- `from_array` – UInt32 array. For idx in range [0, from_array.size()), if bitmap contains from_array[idx], then replace it with to_array[idx]. Note that the result depends on array ordering if there are common elements between from_array and to_array.
+- `to_array` – UInt32 array, its size shall be the same to from_array.
+
+**Example**
+
+``` sql
+SELECT bitmapToArray(bitmapTransform(bitmapBuild([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)))) AS res
+```
+
+```
+┌─res───────────────────┐
+│ [1,3,4,6,7,8,9,10,20] │
+└───────────────────────┘
+```
+
 ## bitmapAnd
 
 Two bitmap and calculation, the result is a new bitmap.
@@ -293,81 +395,6 @@ SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS
 └───────┘
 ```
 
-## bitmapCardinality
-
-Retrun bitmap cardinality of type UInt64.
-
-
-```sql
-bitmapCardinality(bitmap)
-```
-
-**Parameters**
-
-- `bitmap` – bitmap object.
-
-**Example**
-
-```sql
-SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res
-```
-
-```text
-┌─res─┐
-│   5 │
-└─────┘
-```
-
-## bitmapMin
-
-Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is empty.
-
-
-```
-bitmapMin(bitmap)
-```
-
-**Parameters**
-
-- `bitmap` – bitmap object.
-
-**Example**
-
-``` sql
-SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res
-```
-
-```
-┌─res─┐
-│   1 │
-└─────┘
-```
-
-## bitmapMax
-
-Retrun the greatest value of type UInt64 in the set, 0 if the set is empty.
-
-
-```
-bitmapMax(bitmap)
-```
-
-**Parameters**
-
-- `bitmap` – bitmap object.
-
-**Example**
-
-``` sql
-SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res
-```
-
-```
-┌─res─┐
-│   5 │
-└─────┘
-```
-
 ## bitmapAndCardinality
 
 Two bitmap and calculation, return cardinality of type UInt64.

From d8858936deec5c6d298968e21fced376a1201f76 Mon Sep 17 00:00:00 2001
From: Nicolae Vartolomei <me@nvartolomei.com>
Date: Mon, 4 Nov 2019 14:38:32 +0000
Subject: [PATCH 179/222] Remove note about unsupported ON CLUSTER alter for
 replicated tables

---
 docs/en/query_language/create.md | 1 -
 docs/ru/query_language/create.md | 2 +-
 docs/zh/query_language/create.md | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md
index 0b400adca30..391ee0fe41c 100644
--- a/docs/en/query_language/create.md
+++ b/docs/en/query_language/create.md
@@ -225,7 +225,6 @@ CREATE TABLE IF NOT EXISTS all_hits ON CLUSTER cluster (p Date, i Int32) ENGINE
 
 In order to run these queries correctly, each host must have the same cluster definition (to simplify syncing configs, you can use substitutions from ZooKeeper). They must also connect to the ZooKeeper servers.
 The local version of the query will eventually be implemented on each host in the cluster, even if some hosts are currently not available. The order for executing queries within a single host is guaranteed.
-`ALTER` queries are not yet supported for replicated tables.
 
 ## CREATE VIEW
 
diff --git a/docs/ru/query_language/create.md b/docs/ru/query_language/create.md
index a752163aca5..99968a25b97 100644
--- a/docs/ru/query_language/create.md
+++ b/docs/ru/query_language/create.md
@@ -223,7 +223,7 @@ CREATE TABLE IF NOT EXISTS all_hits ON CLUSTER cluster (p Date, i Int32) ENGINE
 ```
 
 Для корректного выполнения таких запросов необходимо на каждом хосте иметь одинаковое определение кластера (для упрощения синхронизации конфигов можете использовать подстановки из ZooKeeper). Также необходимо подключение к ZooKeeper серверам.
-Локальная версия запроса в конечном итоге будет выполнена на каждом хосте кластера, даже если некоторые хосты в данный момент не доступны. Гарантируется упорядоченность выполнения запросов в рамках одного хоста. Для реплицированных таблиц не поддерживаются запросы `ALTER`.
+Локальная версия запроса в конечном итоге будет выполнена на каждом хосте кластера, даже если некоторые хосты в данный момент не доступны. Гарантируется упорядоченность выполнения запросов в рамках одного хоста.
 
 ## CREATE VIEW
 
diff --git a/docs/zh/query_language/create.md b/docs/zh/query_language/create.md
index 62630673540..1c96071b207 100644
--- a/docs/zh/query_language/create.md
+++ b/docs/zh/query_language/create.md
@@ -202,7 +202,6 @@ CREATE TABLE IF NOT EXISTS all_hits ON CLUSTER cluster (p Date, i Int32) ENGINE
 
 为了能够正确的运行这种查询，每台主机必须具有相同的cluster声明（为了简化配置的同步，你可以使用zookeeper的方式进行配置）。同时这些主机还必须链接到zookeeper服务器。
 这个查询将最终在集群的每台主机上运行，即使一些主机当前处于不可用状态。同时它还保证了所有的查询在单台主机中的执行顺序。
-replicated系列表还没有支持`ALTER`查询。
 
 ## CREATE VIEW
 

From d5b2412839ca245a215f4bdf56dad9cf2bfefa41 Mon Sep 17 00:00:00 2001
From: rainbowsysu <qiang.song@nlpr.ia.ac.cn>
Date: Mon, 4 Nov 2019 23:43:08 +0800
Subject: [PATCH 180/222] update translation of aggregatefunction.md in zh doc

---
 .../aggregatefunction.md                      | 65 ++++++++++++++++++-
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/docs/zh/data_types/nested_data_structures/aggregatefunction.md b/docs/zh/data_types/nested_data_structures/aggregatefunction.md
index fb453fb9a62..d5d835440c5 100644
--- a/docs/zh/data_types/nested_data_structures/aggregatefunction.md
+++ b/docs/zh/data_types/nested_data_structures/aggregatefunction.md
@@ -1,3 +1,64 @@
-# AggregateFunction(name, types_of_arguments...)
+# AggregateFunction(name, types_of_arguments...) {#data_type-aggregatefunction}
 
-表示聚合函数中的中间状态。可以在聚合函数中通过 '-State' 后缀来访问它。更多信息，参考 "AggregatingMergeTree"。
+聚合函数中的中间状态。可以在聚合函数中通过`-State`后缀来访问它。为了在后续访问聚合数据，请您必须使用含有`-Merge`后缀的相同聚合函数。
+
+`AggregateFunction` — 参数化的数据类型。
+
+**Parameters**
+
+- 聚合函数名
+
+    如果函数是参数化的，则还要指定其参数。
+
+- 聚合函数参数的类型
+
+**Example**
+
+```sql
+CREATE TABLE t
+(
+    column1 AggregateFunction(uniq, UInt64),
+    column2 AggregateFunction(anyIf, String, UInt8),
+    column3 AggregateFunction(quantiles(0.5, 0.9), UInt64)
+) ENGINE = ...
+```
+
+ClickHouse中支持的聚合函数包括 [uniq](../../query_language/agg_functions/reference.md#agg_function-uniq), anyIf ([any](../../query_language/agg_functions/reference.md#agg_function-any)+[If](../../query_language/agg_functions/combinators.md#agg-functions-combinator-if)) 及 [quantiles](../../query_language/agg_functions/reference.md)
+
+## Usage
+
+### Data Insertion
+
+当写入数据时，请使用含有`-State`后缀的函数的`INSERT SELECT`语句。
+
+**Function examples**
+
+```sql
+uniqState(UserID)
+quantilesState(0.5, 0.9)(SendTiming)
+```
+
+不同于相对应的`uniq`和`quantiles`，含有`-State`后缀的函数返回的是状态，而不是最终值。 也就是说，它们返回的是`AggregateFunction`类型的值。
+
+在`SELECT`查询的结果中，`AggregateFunction`类型的值能够表示所有ClickHouse输出格式所对应的特定二进制形式。 例如，如果使用`SELECT`查询将数据转储为`TabSeparated`格式，可以使用`INSERT`查询将其转储回去。
+
+### Data Selection
+
+当从`AggregatingMergeTree`表中查询数据时，请使用`GROUP BY`子句，并使用含有`-Merge`后缀的相同聚合函数来写入数据。
+
+后缀为`-Merge`的聚合函数，可以对一组状态值进行组合，然后返回完整的数据聚合结果。
+
+例如，以下的两个查询返回相同的结果：
+
+```sql
+SELECT uniq(UserID) FROM table
+
+SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP BY RegionID)
+```
+
+## Usage Example
+
+请参阅 [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md) 的说明
+
+
+[来源文章](https://clickhouse.yandex/docs/en/data_types/nested_data_structures/aggregatefunction/) <!--hide-->
\ No newline at end of file

From e2cfc3ecab083f4a04d1a40ce714327eb086e9ae Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 4 Nov 2019 23:43:13 +0800
Subject: [PATCH 181/222] Global context should be unique.

---
 dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h
index 64ed0199522..6f03241993d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeData.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeData.h
@@ -678,7 +678,7 @@ public:
 
     MergeTreeDataFormatVersion format_version;
 
-    Context global_context;
+    Context & global_context;
 
     /// Merging params - what additional actions to perform during merge.
     const MergingParams merging_params;

From 9869ee141ca29f14ff1b8b5b5bd5d85d7ca61920 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Mon, 4 Nov 2019 20:11:50 +0300
Subject: [PATCH 182/222] Fixed exception in case of using 1 argument while
 defining S3, URL and HDFS storages.

---
 dbms/src/Storages/StorageHDFS.cpp | 2 +-
 dbms/src/Storages/StorageS3.cpp   | 2 +-
 dbms/src/Storages/StorageURL.cpp  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp
index 21beb4f79b4..4b68bed871c 100644
--- a/dbms/src/Storages/StorageHDFS.cpp
+++ b/dbms/src/Storages/StorageHDFS.cpp
@@ -226,7 +226,7 @@ void registerStorageHDFS(StorageFactory & factory)
     {
         ASTs & engine_args = args.engine_args;
 
-        if (!(engine_args.size() == 1 || engine_args.size() == 2))
+        if (engine_args.size() != 2)
             throw Exception(
                 "Storage HDFS requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
diff --git a/dbms/src/Storages/StorageS3.cpp b/dbms/src/Storages/StorageS3.cpp
index ae774030c41..488a38c3e83 100644
--- a/dbms/src/Storages/StorageS3.cpp
+++ b/dbms/src/Storages/StorageS3.cpp
@@ -182,7 +182,7 @@ void registerStorageS3(StorageFactory & factory)
     {
         ASTs & engine_args = args.engine_args;
 
-        if (!(engine_args.size() == 1 || engine_args.size() == 2))
+        if (engine_args.size() != 2)
             throw Exception(
                 "Storage S3 requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp
index 074e99c533b..70c401ba417 100644
--- a/dbms/src/Storages/StorageURL.cpp
+++ b/dbms/src/Storages/StorageURL.cpp
@@ -204,7 +204,7 @@ void registerStorageURL(StorageFactory & factory)
     {
         ASTs & engine_args = args.engine_args;
 
-        if (!(engine_args.size() == 1 || engine_args.size() == 2))
+        if (engine_args.size() != 2)
             throw Exception(
                 "Storage URL requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 

From dd99468e7bbc11b0c72230bd29ebb1cc6bf66b91 Mon Sep 17 00:00:00 2001
From: Mikhail Korotov <millb@sandbox-410109003>
Date: Mon, 4 Nov 2019 20:13:49 +0300
Subject: [PATCH 183/222] Created integration test for part_log table

---
 .../test_part_log_table/__init__.py           |  0
 .../config_with_non_standard_part_log.xml     |  6 +++
 .../configs/config_with_standard_part_log.xml |  4 ++
 .../integration/test_part_log_table/test.py   | 42 +++++++++++++++++++
 4 files changed, 52 insertions(+)
 create mode 100644 dbms/tests/integration/test_part_log_table/__init__.py
 create mode 100644 dbms/tests/integration/test_part_log_table/configs/config_with_non_standard_part_log.xml
 create mode 100644 dbms/tests/integration/test_part_log_table/configs/config_with_standard_part_log.xml
 create mode 100644 dbms/tests/integration/test_part_log_table/test.py

diff --git a/dbms/tests/integration/test_part_log_table/__init__.py b/dbms/tests/integration/test_part_log_table/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/integration/test_part_log_table/configs/config_with_non_standard_part_log.xml b/dbms/tests/integration/test_part_log_table/configs/config_with_non_standard_part_log.xml
new file mode 100644
index 00000000000..2a8655de830
--- /dev/null
+++ b/dbms/tests/integration/test_part_log_table/configs/config_with_non_standard_part_log.xml
@@ -0,0 +1,6 @@
+<yandex>
+    <part_log>
+        <database>database_name</database>
+        <table>table_name</table>
+    </part_log>
+</yandex>
diff --git a/dbms/tests/integration/test_part_log_table/configs/config_with_standard_part_log.xml b/dbms/tests/integration/test_part_log_table/configs/config_with_standard_part_log.xml
new file mode 100644
index 00000000000..1e640a9e0b0
--- /dev/null
+++ b/dbms/tests/integration/test_part_log_table/configs/config_with_standard_part_log.xml
@@ -0,0 +1,4 @@
+<yandex>
+    <part_log>
+    </part_log>
+</yandex>
diff --git a/dbms/tests/integration/test_part_log_table/test.py b/dbms/tests/integration/test_part_log_table/test.py
new file mode 100644
index 00000000000..7beef43a33e
--- /dev/null
+++ b/dbms/tests/integration/test_part_log_table/test.py
@@ -0,0 +1,42 @@
+import time
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance("node1")
+node2 = cluster.add_instance("node2", main_configs=["configs/config_with_standard_part_log.xml"])
+node3 = cluster.add_instance("node3", main_configs=["configs/config_with_non_standard_part_log.xml"])
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+def test_config_without_part_log(start_cluster):
+    assert "Table system.part_log doesn't exist" in node1.query_and_get_error("SELECT * FROM system.part_log")
+    node1.query("CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() ORDER BY value")
+    assert "Table system.part_log doesn't exist" in node1.query_and_get_error("SELECT * FROM system.part_log")
+    node1.query("INSERT INTO test_table VALUES ('name', 1)")
+    assert "Table system.part_log doesn't exist" in node1.query_and_get_error("SELECT * FROM system.part_log")
+
+def test_config_with_standard_part_log(start_cluster):
+    assert "Table system.part_log doesn't exist" in node2.query_and_get_error("SELECT * FROM system.part_log")
+    node2.query("CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() Order by value")
+    assert "Table system.part_log doesn't exist" in node2.query_and_get_error("SELECT * FROM system.part_log")
+    node2.query("INSERT INTO test_table VALUES ('name', 1)")
+    time.sleep(10)
+    assert node2.query("SELECT * FROM system.part_log") != ""
+
+def test_config_with_non_standard_part_log(start_cluster):
+    node3.query("CREATE DATABASE database_name")
+    assert "table_name" not in node3.query("SHOW TABLES FROM database_name")
+    node3.query("CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() ORDER BY value")
+    assert "table_name" not in node3.query("SHOW TABLES FROM database_name")
+    node3.query("INSERT INTO test_table VALUES ('name', 1)")
+    time.sleep(10)
+    assert "table_name" in node3.query("SHOW TABLES FROM database_name")
+

From ac6472b03f428eb6673b14debf30ffe554504082 Mon Sep 17 00:00:00 2001
From: Nicolae Vartolomei <me@nvartolomei.com>
Date: Mon, 4 Nov 2019 14:47:37 +0000
Subject: [PATCH 184/222] Try and enable test_replicated_alters

---
 dbms/tests/integration/test_distributed_ddl/test.py        | 4 ----
 dbms/tests/integration/test_distributed_ddl_secure/test.py | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index a6eda22df4f..59c9bba1353 100755
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -50,7 +50,6 @@ def insert_reliable(instance, query_insert):
     raise last_exception
 
 
-TEST_REPLICATED_ALTERS=False # TODO: Check code and turn on
 cluster = ClickHouseCluster(__file__)
 
 
@@ -200,9 +199,6 @@ def test_replicated_alters(started_cluster):
     ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster")
     ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster")
 
-    if not TEST_REPLICATED_ALTERS:
-        return
-
     # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
     firewall_drops_rules = cluster.pm_random_drops.pop_rules()
 
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/test.py b/dbms/tests/integration/test_distributed_ddl_secure/test.py
index 0c8a1b0bf01..456861427a8 100755
--- a/dbms/tests/integration/test_distributed_ddl_secure/test.py
+++ b/dbms/tests/integration/test_distributed_ddl_secure/test.py
@@ -50,7 +50,6 @@ def insert_reliable(instance, query_insert):
     raise last_exception
 
 
-TEST_REPLICATED_ALTERS=False # TODO: Check code and turn on
 cluster = ClickHouseCluster(__file__)
 
 
@@ -200,9 +199,6 @@ def test_replicated_alters(started_cluster):
     ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster")
     ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster")
 
-    if not TEST_REPLICATED_ALTERS:
-        return
-
     # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
     firewall_drops_rules = cluster.pm_random_drops.pop_rules()
 

From a17f615942e803cc15956e5bafab97dffd736730 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Tue, 5 Nov 2019 02:45:42 +0800
Subject: [PATCH 185/222] continue translate aggregate function data type

---
 .../aggregatefunction.md                      | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/docs/zh/data_types/nested_data_structures/aggregatefunction.md b/docs/zh/data_types/nested_data_structures/aggregatefunction.md
index d5d835440c5..ed3cc5960e2 100644
--- a/docs/zh/data_types/nested_data_structures/aggregatefunction.md
+++ b/docs/zh/data_types/nested_data_structures/aggregatefunction.md
@@ -1,18 +1,18 @@
 # AggregateFunction(name, types_of_arguments...) {#data_type-aggregatefunction}
 
-聚合函数中的中间状态。可以在聚合函数中通过`-State`后缀来访问它。为了在后续访问聚合数据，请您必须使用含有`-Merge`后缀的相同聚合函数。
+聚合函数的中间状态，可以通过聚合函数名称加`-State`后缀的形式得到它。与此同时，当您需要访问该类型的最终状态数据时，您需要以相同的聚合函数名加`-Merge`后缀的形式来得到最终状态数据。
 
 `AggregateFunction` — 参数化的数据类型。
 
-**Parameters**
+**参数**
 
 - 聚合函数名
 
-    如果函数是参数化的，则还要指定其参数。
+    如果函数具备多个参数列表，请在此处指定其他参数列表中的值。
 
 - 聚合函数参数的类型
 
-**Example**
+**示例**
 
 ```sql
 CREATE TABLE t
@@ -23,32 +23,32 @@ CREATE TABLE t
 ) ENGINE = ...
 ```
 
-ClickHouse中支持的聚合函数包括 [uniq](../../query_language/agg_functions/reference.md#agg_function-uniq), anyIf ([any](../../query_language/agg_functions/reference.md#agg_function-any)+[If](../../query_language/agg_functions/combinators.md#agg-functions-combinator-if)) 及 [quantiles](../../query_language/agg_functions/reference.md)
+上述中的[uniq](../../query_language/agg_functions/reference.md#agg_function-uniq)， anyIf ([any](../../query_language/agg_functions/reference.md#agg_function-any)+[If](../../query_language/agg_functions/combinators.md#agg-functions-combinator-if)) 以及 [quantiles](../../query_language/agg_functions/reference.md) 都为ClickHouse中支持的聚合函数。
 
-## Usage
+## 使用指南
 
-### Data Insertion
+### 数据写入
 
-当写入数据时，请使用含有`-State`后缀的函数的`INSERT SELECT`语句。
+当需要写入数据时，您需要将数据包含在`INSERT SELECT`语句中，同时对于`AggregateFunction`类型的数据，您需要使用对应的以`-State`为后缀的函数进行处理。
 
-**Function examples**
+**函数使用示例**
 
 ```sql
 uniqState(UserID)
 quantilesState(0.5, 0.9)(SendTiming)
 ```
 
-不同于相对应的`uniq`和`quantiles`，含有`-State`后缀的函数返回的是状态，而不是最终值。 也就是说，它们返回的是`AggregateFunction`类型的值。
+不同于`uniq`和`quantiles`函数返回聚合结果的最终值，以`-State`后缀的函数总是返回`AggregateFunction`类型的数据的中间状态。
 
-在`SELECT`查询的结果中，`AggregateFunction`类型的值能够表示所有ClickHouse输出格式所对应的特定二进制形式。 例如，如果使用`SELECT`查询将数据转储为`TabSeparated`格式，可以使用`INSERT`查询将其转储回去。
+对于`SELECT`而言，`AggregateFunction`类型总是以特定的二进制形式展现在所有的输出格式中。例如，您可以使用`SELECT`语句将函数的状态数据转储为`TabSeparated`格式的同时使用`INSERT`语句将数据转储回去。
 
-### Data Selection
+### 数据查询
 
-当从`AggregatingMergeTree`表中查询数据时，请使用`GROUP BY`子句，并使用含有`-Merge`后缀的相同聚合函数来写入数据。
+当从`AggregatingMergeTree`表中查询数据时，对于`AggregateFunction`类型的字段，您需要使用以`-Merge`为后缀的相同聚合函数来聚合数据。对于非`AggregateFunction`类型的字段，请将它们包含在`GROUP BY`子句中。
 
-后缀为`-Merge`的聚合函数，可以对一组状态值进行组合，然后返回完整的数据聚合结果。
+以`-Merge`为后缀的聚合函数，可以将多个`AggregateFunction`类型的中间状态组合计算为最终的聚合结果。
 
-例如，以下的两个查询返回相同的结果：
+例如，如下的两个查询返回的结果总是一致：
 
 ```sql
 SELECT uniq(UserID) FROM table
@@ -56,9 +56,9 @@ SELECT uniq(UserID) FROM table
 SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP BY RegionID)
 ```
 
-## Usage Example
+## 使用示例
 
 请参阅 [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md) 的说明
 
 
-[来源文章](https://clickhouse.yandex/docs/en/data_types/nested_data_structures/aggregatefunction/) <!--hide-->
\ No newline at end of file
+[来源文章](https://clickhouse.yandex/docs/en/data_types/nested_data_structures/aggregatefunction/) <!--hide-->

From 0781065bd838ba14acf72611c53b52184211c575 Mon Sep 17 00:00:00 2001
From: Mikhail Korotov <millb@sandbox-410109003>
Date: Mon, 4 Nov 2019 22:10:02 +0300
Subject: [PATCH 186/222] improved changes

---
 dbms/tests/integration/test_part_log_table/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/tests/integration/test_part_log_table/test.py b/dbms/tests/integration/test_part_log_table/test.py
index 7beef43a33e..5d4c005714f 100644
--- a/dbms/tests/integration/test_part_log_table/test.py
+++ b/dbms/tests/integration/test_part_log_table/test.py
@@ -21,6 +21,7 @@ def test_config_without_part_log(start_cluster):
     node1.query("CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() ORDER BY value")
     assert "Table system.part_log doesn't exist" in node1.query_and_get_error("SELECT * FROM system.part_log")
     node1.query("INSERT INTO test_table VALUES ('name', 1)")
+    time.sleep(10)
     assert "Table system.part_log doesn't exist" in node1.query_and_get_error("SELECT * FROM system.part_log")
 
 def test_config_with_standard_part_log(start_cluster):

From 4dfffdd24adee4714d09673a0cd663250228b8b5 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 3 Nov 2019 22:26:46 +0300
Subject: [PATCH 187/222] Write current batch for distributed send atomically
 (using .tmp + rename)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise the following can happen after reboot:

    2019.11.01 11:46:12.217143 [ 187 ] {} <Error> dist.Distributed.DirectoryMonitor: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \n before: S\'^A\0^]\0\0<BE>4^A\0r<87>\0\0<A2><D7>^D^Y\0<F2>{^E<CD>\0\0Hy\0\0<F2>^_^C\0^_&\0\0<FF><D3>\0\0
    <8D><91>\0\0<C0>9\0\0<C0><B0>^A\0^G<AA>\0\0<B5><FE>^A\0<BF><A7>^A\0<9B><CB>^A\0I^R^A\0<B7><AB>^A\0<BC><8F>\0\0˲^B\0Zy\0\0<94><AA>\0\0<98>
    <8F>\0\0\f<A5>\0\0^QN\0\0<E3><C6>\0\0<B1>6^B\0ɳ\0\0W<99>\0\0<B9><A2>\0\0:<BB>\0\0)<B1>\0\0#<8B>\0\0aW\0\0<ED>#\0\0<F1>@\0\0ˀ^B\0<D7><FC>\0\0<DF>, Stack trace:

    0. 0x559e27222e60 StackTrace::StackTrace() /usr/bin/clickhouse
    1. 0x559e27222c45 DB::Exception::Exception(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int) /usr/bin/clickhouse
    2. 0x559e26de4473 ? /usr/bin/clickhouse
    3. 0x559e272494b5 DB::assertString(char const*, DB::ReadBuffer&) /usr/bin/clickhouse
    4. 0x559e2a5dab45 DB::StorageDistributedDirectoryMonitor::processFilesWithBatching(std::map<unsigned long, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > const&) /usr/bin/clickhouse
    5. 0x559e2a5db5fa DB::StorageDistributedDirectoryMonitor::processFiles() /usr/bin/clickhouse
    6. 0x559e2a5dba78 DB::StorageDistributedDirectoryMonitor::run() /usr/bin/clickhouse
    7. 0x559e2a5ddbbc ThreadFromGlobalPool::ThreadFromGlobalPool<void (DB::StorageDistributedDirectoryMonitor::*)(), DB::StorageDistributedDirectoryMonitor*>(void (DB::StorageDistributedDirectoryMonitor::*&&)(), DB::StorageDistributedDirectoryMonitor*&&)::{lambda()#1}::operator()() const /usr/bin/clickhouse
    8. 0x559e2726b07c ThreadPoolImpl<std::thread>::worker(std::_List_iterator<std::thread>) /usr/bin/clickhouse
    9. 0x559e2bbc3640 ? /usr/bin/clickhouse
    10. 0x7fbd62b3cfb7 start_thread /lib/x86_64-linux-gnu/libpthread-2.29.so
    11. 0x7fbd62a692ef __clone /lib/x86_64-linux-gnu/libc-2.29.so
     (version 19.17.1.1)

v2: remove fsync, to avoid possible stalls (https://github.com/ClickHouse/ClickHouse/pull/7600#discussion_r342010874)
---
 .../src/Storages/Distributed/DirectoryMonitor.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
index 183897c7574..00129757591 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -355,8 +355,19 @@ struct StorageDistributedDirectoryMonitor::Batch
             /// we must try to re-send exactly the same batches.
             /// So we save contents of the current batch into the current_batch_file_path file
             /// and truncate it afterwards if all went well.
-            WriteBufferFromFile out{parent.current_batch_file_path};
-            writeText(out);
+
+            /// Temporary file is required for atomicity.
+            String tmp_file{parent.current_batch_file_path + ".tmp"};
+
+            if (Poco::File{tmp_file}.exists())
+                LOG_ERROR(parent.log, "Temporary file `" << tmp_file << "` exists. Unclean shutdown?");
+
+            {
+                WriteBufferFromFile out{tmp_file, O_WRONLY | O_TRUNC | O_CREAT};
+                writeText(out);
+            }
+
+            Poco::File{tmp_file}.renameTo(parent.current_batch_file_path);
         }
         auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(parent.storage.global_context.getSettingsRef());
         auto connection = parent.pool->get(timeouts);

From fbed472018ea48feb473b016499de962cd93632b Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 00:53:35 +0300
Subject: [PATCH 188/222] Update 00076_ip_coding_functions.sql

---
 dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql
index a2820b56ba6..659267c61ed 100644
--- a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql
+++ b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql
@@ -125,4 +125,4 @@ CREATE TABLE addresses(addr String) ENGINE = Memory;
 INSERT INTO addresses(addr) VALUES ('00000000000000000000FFFFC1FC110A'), ('00000000000000000000FFFF4D583737'), ('00000000000000000000FFFF7F000001');
 SELECT cutIPv6(toFixedString(unhex(addr), 16), 0, 3) FROM addresses ORDER BY addr ASC;
 
-DROP TABLE addresses;
\ No newline at end of file
+DROP TABLE addresses;

From 979db8c965f948182fa7f19db8b6d740d4bdcff6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 01:13:10 +0300
Subject: [PATCH 189/222] Improve "arrayCompact" function

---
 dbms/src/Functions/array/arrayCompact.cpp | 195 ++++++++++++++--------
 1 file changed, 124 insertions(+), 71 deletions(-)

diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp
index f90faa457da..b4e11a42f1a 100644
--- a/dbms/src/Functions/array/arrayCompact.cpp
+++ b/dbms/src/Functions/array/arrayCompact.cpp
@@ -1,96 +1,149 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <Columns/ColumnsNumber.h>
-#include "FunctionArrayMapped.h"
+#include <Functions/array/FunctionArrayMapped.h>
 #include <Functions/FunctionFactory.h>
 
+
 namespace DB
 {
-    /// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays
-    namespace ErrorCodes
+/// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+struct ArrayCompactImpl
+{
+    static bool useDefaultImplementationForConstants() { return true; }
+    static bool needBoolean() { return false; }
+    static bool needExpression() { return false; }
+    static bool needOneArray() { return false; }
+
+    static DataTypePtr getReturnType(const DataTypePtr & nested_type, const DataTypePtr &)
     {
-        extern const int ILLEGAL_COLUMN;
+        return std::make_shared<DataTypeArray>(nested_type);
     }
 
-    struct ArrayCompactImpl
+    template <typename T>
+    static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
     {
-        static bool useDefaultImplementationForConstants() { return true; }
-        static bool needBoolean() { return false; }
-        static bool needExpression() { return false; }
-        static bool needOneArray() { return false; }
+        const ColumnVector<T> * src_values_column = checkAndGetColumn<ColumnVector<T>>(mapped.get());
 
-        static DataTypePtr getReturnType(const DataTypePtr & nested_type, const DataTypePtr & /*nested_type*/)
+        if (!src_values_column)
+            return false;
+
+        const IColumn::Offsets & src_offsets = array.getOffsets();
+        const typename ColumnVector<T>::Container & src_values = src_values_column->getData();
+
+        auto res_values_column = ColumnVector<T>::create(src_values.size());
+        typename ColumnVector<T>::Container & res_values = res_values_column->getData();
+        size_t src_offsets_size = src_offsets.size();
+        auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size);
+        IColumn::Offsets & res_offsets = res_offsets_column->getData();
+
+        size_t res_pos = 0;
+        size_t src_pos = 0;
+
+        for (size_t i = 0; i < src_offsets_size; ++i)
         {
-            return std::make_shared<DataTypeArray>(nested_type);
-        }
+            auto src_offset = src_offsets[i];
 
-        template <typename T>
-        static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
-        {
-            const ColumnVector<T> * column = checkAndGetColumn<ColumnVector<T>>(&*mapped);
-
-            if (!column)
-                return false;
-
-            const IColumn::Offsets & offsets = array.getOffsets();
-            const typename ColumnVector<T>::Container & data = column->getData();
-            auto column_data = ColumnVector<T>::create(data.size());
-            typename ColumnVector<T>::Container & res_values = column_data->getData();
-            auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size());
-            IColumn::Offsets & res_offsets = column_offsets->getData();
-
-            size_t res_pos = 0;
-            size_t pos = 0;
-            for (size_t i = 0; i < offsets.size(); ++i)
+            /// If array is not empty.
+            if (src_pos < src_offset)
             {
-                if (pos < offsets[i])
+                /// Insert first element unconditionally.
+                res_values[res_pos] = src_values[src_pos];
+
+                /// For the rest of elements, insert if the element is different from the previous.
+                ++src_pos;
+                ++res_pos;
+                for (; src_pos < src_offset; ++src_pos)
                 {
-                    res_values[res_pos] = data[pos];
-                    for (++pos, ++res_pos; pos < offsets[i]; ++pos)
+                    if (src_values[src_pos] != src_values[src_pos - 1])
                     {
-                        if (data[pos] != data[pos - 1])
-                        {
-                            res_values[res_pos++] = data[pos];
-                        }
+                        res_values[res_pos] = src_values[src_pos];
+                        ++res_pos;
                     }
                 }
-                res_offsets[i] = res_pos;
             }
-            for (size_t i = 0; i < data.size() - res_pos; ++i)
-            {
-                res_values.pop_back();
-            }
-            res_ptr = ColumnArray::create(std::move(column_data), std::move(column_offsets));
-            return true;
+            res_offsets[i] = res_pos;
         }
+        res_values.resize(res_pos);
 
-        static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
-        {
-            ColumnPtr res;
-
-            if (executeType< UInt8 >(mapped, array, res) ||
-                executeType< UInt16>(mapped, array, res) ||
-                executeType< UInt32>(mapped, array, res) ||
-                executeType< UInt64>(mapped, array, res) ||
-                executeType< Int8  >(mapped, array, res) ||
-                executeType< Int16 >(mapped, array, res) ||
-                executeType< Int32 >(mapped, array, res) ||
-                executeType< Int64 >(mapped, array, res) ||
-                executeType<Float32>(mapped, array, res) ||
-                executeType<Float64>(mapped, array, res))
-                return res;
-            else
-                throw Exception("Unexpected column for arrayCompact: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
-        }
-
-    };
-
-    struct NameArrayCompact { static constexpr auto name = "arrayCompact"; };
-    using FunctionArrayCompact = FunctionArrayMapped<ArrayCompactImpl, NameArrayCompact>;
-
-    void registerFunctionArrayCompact(FunctionFactory & factory)
-    {
-        factory.registerFunction<FunctionArrayCompact>();
+        res_ptr = ColumnArray::create(std::move(res_values_column), std::move(res_offsets_column));
+        return true;
     }
 
+    static void executeGeneric(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
+    {
+        const IColumn::Offsets & src_offsets = array.getOffsets();
+
+        auto res_values_column = mapped->cloneEmpty();
+        res_values_column->reserve(mapped->size());
+
+        size_t src_offsets_size = src_offsets.size();
+        auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size);
+        IColumn::Offsets & res_offsets = res_offsets_column->getData();
+
+        size_t res_pos = 0;
+        size_t src_pos = 0;
+
+        for (size_t i = 0; i < src_offsets_size; ++i)
+        {
+            auto src_offset = src_offsets[i];
+
+            /// If array is not empty.
+            if (src_pos < src_offset)
+            {
+                /// Insert first element unconditionally.
+                res_values_column->insertFrom(*mapped, src_pos);
+
+                /// For the rest of elements, insert if the element is different from the previous.
+                ++src_pos;
+                ++res_pos;
+                for (; src_pos < src_offset; ++src_pos)
+                {
+                    if (mapped->compareAt(src_pos - 1, src_pos, *mapped, 1))
+                    {
+                        res_values_column->insertFrom(*mapped, src_pos);
+                        ++res_pos;
+                    }
+                }
+            }
+            res_offsets[i] = res_pos;
+        }
+
+        res_ptr = ColumnArray::create(std::move(res_values_column), std::move(res_offsets_column));
+    }
+
+    static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
+    {
+        ColumnPtr res;
+
+        if (!(executeType< UInt8 >(mapped, array, res) ||
+            executeType< UInt16>(mapped, array, res) ||
+            executeType< UInt32>(mapped, array, res) ||
+            executeType< UInt64>(mapped, array, res) ||
+            executeType< Int8  >(mapped, array, res) ||
+            executeType< Int16 >(mapped, array, res) ||
+            executeType< Int32 >(mapped, array, res) ||
+            executeType< Int64 >(mapped, array, res) ||
+            executeType<Float32>(mapped, array, res) ||
+            executeType<Float64>(mapped, array, res)))
+        {
+            executeGeneric(mapped, array, res);
+        }
+        return res;
+    }
+};
+
+struct NameArrayCompact { static constexpr auto name = "arrayCompact"; };
+using FunctionArrayCompact = FunctionArrayMapped<ArrayCompactImpl, NameArrayCompact>;
+
+void registerFunctionArrayCompact(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionArrayCompact>();
+}
+
 }
 

From 92824e14e72a92152c037b7bf319867ff77f10a3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 01:18:59 +0300
Subject: [PATCH 190/222] Added a test

---
 .../01025_array_compact_generic.reference        | 16 ++++++++++++++++
 .../0_stateless/01025_array_compact_generic.sql  |  7 +++++++
 2 files changed, 23 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01025_array_compact_generic.reference
 create mode 100644 dbms/tests/queries/0_stateless/01025_array_compact_generic.sql

diff --git a/dbms/tests/queries/0_stateless/01025_array_compact_generic.reference b/dbms/tests/queries/0_stateless/01025_array_compact_generic.reference
new file mode 100644
index 00000000000..f849b0c9f0e
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01025_array_compact_generic.reference
@@ -0,0 +1,16 @@
+[]
+[1,nan,nan,2]
+[1,nan,nan,nan,2]
+[1,NULL,2]
+['hello','','world']
+[[[]],[[],[]],[[]]]
+[]
+['0']
+['0']
+['0']
+['0','1']
+['0','1']
+['0','1']
+['0','1','2']
+['0','1','2']
+['0','1','2']
diff --git a/dbms/tests/queries/0_stateless/01025_array_compact_generic.sql b/dbms/tests/queries/0_stateless/01025_array_compact_generic.sql
new file mode 100644
index 00000000000..a9e20d63f20
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01025_array_compact_generic.sql
@@ -0,0 +1,7 @@
+SELECT arrayCompact([]);
+SELECT arrayCompact([1, 1, nan, nan, 2, 2, 2]);
+SELECT arrayCompact([1, 1, nan, nan, -nan, 2, 2, 2]);
+SELECT arrayCompact([1, 1, NULL, NULL, 2, 2, 2]);
+SELECT arrayCompact(['hello', '', '', '', 'world', 'world']);
+SELECT arrayCompact([[[]], [[], []], [[], []], [[]]]);
+SELECT arrayCompact(x -> toString(intDiv(x, 3)), range(number)) FROM numbers(10);

From 0a3b7068fda2f91c8bba982f73403a7fa363c473 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 01:45:51 +0300
Subject: [PATCH 191/222] Added a test for function "char"

---
 dbms/tests/queries/0_stateless/01026_char_utf8.reference | 2 ++
 dbms/tests/queries/0_stateless/01026_char_utf8.sql       | 2 ++
 2 files changed, 4 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01026_char_utf8.reference
 create mode 100644 dbms/tests/queries/0_stateless/01026_char_utf8.sql

diff --git a/dbms/tests/queries/0_stateless/01026_char_utf8.reference b/dbms/tests/queries/0_stateless/01026_char_utf8.reference
new file mode 100644
index 00000000000..c9d966bc163
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01026_char_utf8.reference
@@ -0,0 +1,2 @@
+привет
+привет
diff --git a/dbms/tests/queries/0_stateless/01026_char_utf8.sql b/dbms/tests/queries/0_stateless/01026_char_utf8.sql
new file mode 100644
index 00000000000..d6a0de7448b
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01026_char_utf8.sql
@@ -0,0 +1,2 @@
+SELECT char(0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5, 0xD1, 0x82) AS hello;
+SELECT char(-48,-65,-47,-128,-48,-72,-48,-78,-48,-75,-47,-126) AS hello;

From 8556b2cee31a83c453fb73d198dac2fa20358659 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Tue, 5 Nov 2019 01:47:26 +0300
Subject: [PATCH 192/222] Added tests to check that ClickHouse don't crash on
 insufficient arguments of S3, URL and HDFS storages.

---
 .../0_stateless/01030_storage_hdfs_syntax.reference       | 0
 .../queries/0_stateless/01030_storage_hdfs_syntax.sql     | 8 ++++++++
 .../queries/0_stateless/01030_storage_s3_syntax.reference | 0
 .../tests/queries/0_stateless/01030_storage_s3_syntax.sql | 8 ++++++++
 .../0_stateless/01030_storage_url_syntax.reference        | 0
 .../queries/0_stateless/01030_storage_url_syntax.sql      | 8 ++++++++
 6 files changed, 24 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01030_storage_hdfs_syntax.reference
 create mode 100644 dbms/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql
 create mode 100644 dbms/tests/queries/0_stateless/01030_storage_s3_syntax.reference
 create mode 100644 dbms/tests/queries/0_stateless/01030_storage_s3_syntax.sql
 create mode 100644 dbms/tests/queries/0_stateless/01030_storage_url_syntax.reference
 create mode 100644 dbms/tests/queries/0_stateless/01030_storage_url_syntax.sql

diff --git a/dbms/tests/queries/0_stateless/01030_storage_hdfs_syntax.reference b/dbms/tests/queries/0_stateless/01030_storage_hdfs_syntax.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql b/dbms/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql
new file mode 100644
index 00000000000..9b16141338c
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql
@@ -0,0 +1,8 @@
+drop table if exists test_table_hdfs_syntax
+;
+create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('')
+; -- { serverError 42 }
+create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('','','')
+; -- { serverError 42 }
+drop table if exists test_table_hdfs_syntax
+;
diff --git a/dbms/tests/queries/0_stateless/01030_storage_s3_syntax.reference b/dbms/tests/queries/0_stateless/01030_storage_s3_syntax.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/01030_storage_s3_syntax.sql b/dbms/tests/queries/0_stateless/01030_storage_s3_syntax.sql
new file mode 100644
index 00000000000..504b5375b38
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01030_storage_s3_syntax.sql
@@ -0,0 +1,8 @@
+drop table if exists test_table_s3_syntax
+;
+create table test_table_s3_syntax (id UInt32) ENGINE = S3('')
+; -- { serverError 42 }
+create table test_table_s3_syntax (id UInt32) ENGINE = S3('','','')
+; -- { serverError 42 }
+drop table if exists test_table_s3_syntax
+;
diff --git a/dbms/tests/queries/0_stateless/01030_storage_url_syntax.reference b/dbms/tests/queries/0_stateless/01030_storage_url_syntax.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/01030_storage_url_syntax.sql b/dbms/tests/queries/0_stateless/01030_storage_url_syntax.sql
new file mode 100644
index 00000000000..11c4b01f1ca
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01030_storage_url_syntax.sql
@@ -0,0 +1,8 @@
+drop table if exists test_table_url_syntax
+;
+create table test_table_url_syntax (id UInt32) ENGINE = URL('')
+; -- { serverError 42 }
+create table test_table_url_syntax (id UInt32) ENGINE = URL('','','')
+; -- { serverError 42 }
+drop table if exists test_table_url_syntax
+;

From 4f83b56c144595c71c43af45c51f5a93377d9db5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 01:47:46 +0300
Subject: [PATCH 193/222] Added another test case

---
 .../0_stateless/01026_char_utf8.reference        | 16 ++++++++++++++++
 .../queries/0_stateless/01026_char_utf8.sql      |  1 +
 2 files changed, 17 insertions(+)

diff --git a/dbms/tests/queries/0_stateless/01026_char_utf8.reference b/dbms/tests/queries/0_stateless/01026_char_utf8.reference
index c9d966bc163..d81bd78f7df 100644
--- a/dbms/tests/queries/0_stateless/01026_char_utf8.reference
+++ b/dbms/tests/queries/0_stateless/01026_char_utf8.reference
@@ -1,2 +1,18 @@
 привет
 привет
+аривет
+бривет
+вривет
+гривет
+дривет
+еривет
+жривет
+зривет
+иривет
+йривет
+кривет
+лривет
+мривет
+нривет
+оривет
+привет
diff --git a/dbms/tests/queries/0_stateless/01026_char_utf8.sql b/dbms/tests/queries/0_stateless/01026_char_utf8.sql
index d6a0de7448b..1a598c1ed3c 100644
--- a/dbms/tests/queries/0_stateless/01026_char_utf8.sql
+++ b/dbms/tests/queries/0_stateless/01026_char_utf8.sql
@@ -1,2 +1,3 @@
 SELECT char(0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5, 0xD1, 0x82) AS hello;
 SELECT char(-48,-65,-47,-128,-48,-72,-48,-78,-48,-75,-47,-126) AS hello;
+SELECT char(-48, 0xB0 + number,-47,-128,-48,-72,-48,-78,-48,-75,-47,-126) AS hello FROM numbers(16);

From e4e22730ec3c0ea25af36fed0433bbc94dfb2be0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 01:53:08 +0300
Subject: [PATCH 194/222] Updated documentation

---
 docs/en/query_language/functions/encoding_functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/query_language/functions/encoding_functions.md b/docs/en/query_language/functions/encoding_functions.md
index 9f7413795d8..52458d2024f 100644
--- a/docs/en/query_language/functions/encoding_functions.md
+++ b/docs/en/query_language/functions/encoding_functions.md
@@ -1,7 +1,7 @@
 # Encoding functions
 
 ## char
-Accepts multiple arguments of `Number` types. Returns a string, each char of the results corresponds to the ascii character of the input numbers. It'll cast the first byte from the number, if the byte overflows the range of ascii(which is 127), it returns an unrecognized character(�).
+Accepts multiple arguments of numberic types. Returns a string with the length as the number of passed arguments and each byte has the value of corresponding argument.
 
 ## hex
 

From c081c8aacb3b07a6d1f2a256cbfd72db1fd447c9 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 01:59:02 +0300
Subject: [PATCH 195/222] Update AggregateFunctionGroupBitmapData.h

---
 .../src/AggregateFunctions/AggregateFunctionGroupBitmapData.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
index 73061e7b146..6d4cc8a3af5 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
@@ -584,11 +584,11 @@ public:
     /**
      * Replace value
      */
-    void rb_replace(const UInt32* from_vals, const UInt32* to_vals, size_t num)
+    void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
     {
         if (isSmall())
             toLarge();
-        for (size_t i = 0; i < num; i++)
+        for (size_t i = 0; i < num; ++i)
         {
             if (from_vals[i] == to_vals[i])
                 continue;

From f9d14f78c06cb1a1c3a13cbc0bc7bef5a604a416 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 02:09:28 +0300
Subject: [PATCH 196/222] Update FunctionsBitmap.h

---
 dbms/src/Functions/FunctionsBitmap.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h
index 358b7e760c0..7081fbe9f57 100644
--- a/dbms/src/Functions/FunctionsBitmap.h
+++ b/dbms/src/Functions/FunctionsBitmap.h
@@ -195,7 +195,7 @@ public:
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
         if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         const DataTypePtr data_type = bitmap_type->getArgumentsDataTypes()[0];
@@ -277,7 +277,7 @@ public:
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
         if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         auto arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
@@ -415,7 +415,7 @@ public:
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
         if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         for (size_t i = 0; i < 2; i++)
         {
@@ -560,7 +560,7 @@ public:
         auto bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
         if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         return std::make_shared<DataTypeNumber<ToType>>();
     }
@@ -724,7 +724,7 @@ public:
         auto bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
         if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         auto arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
         if (!(arg_type1))
@@ -814,13 +814,13 @@ public:
         auto bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
         if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         auto bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
         if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "Second argument for function " + getName() + " must be an bitmap but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
@@ -949,13 +949,13 @@ public:
         auto bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
         if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         auto bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
         if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
             throw Exception(
-                "Second argument for function " + getName() + " must be an bitmap but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())

From 0c1510d3e3a91c481d6d45caf711fda071cfd392 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 02:20:16 +0300
Subject: [PATCH 197/222] Update FunctionsBitmap.h

---
 dbms/src/Functions/FunctionsBitmap.h | 151 +++++++++++++--------------
 1 file changed, 75 insertions(+), 76 deletions(-)

diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h
index 7081fbe9f57..ba8ed6a223e 100644
--- a/dbms/src/Functions/FunctionsBitmap.h
+++ b/dbms/src/Functions/FunctionsBitmap.h
@@ -249,9 +249,9 @@ private:
 
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            const AggregateFunctionGroupBitmapData<T> & bd1
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data_1
                 = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(column->getData()[i]);
-            UInt64 count = bd1.rbs.rb_to_array(res_data);
+            UInt64 count = bitmap_data_1.rbs.rb_to_array(res_data);
             res_offset += count;
             res_offsets.emplace_back(res_offset);
         }
@@ -325,7 +325,7 @@ private:
     {
         const IColumn * columns[3];
         bool is_column_const[3];
-        const ColumnAggregateFunction * colAggFunc;
+        const ColumnAggregateFunction * col_agg_func;
         const PaddedPODArray<AggregateDataPtr> * container0;
         const PaddedPODArray<UInt32> * container1, * container2;
 
@@ -335,14 +335,11 @@ private:
             is_column_const[i] = isColumnConst(*columns[i]);
         }
         if (is_column_const[0])
-        {
-            colAggFunc = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(columns[0])->getDataColumnPtr().get());
-        }
+            col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(columns[0])->getDataColumnPtr().get());
         else
-        {
-            colAggFunc = typeid_cast<const ColumnAggregateFunction*>(columns[0]);
-        }
-        container0 = &colAggFunc->getData();
+            col_agg_func = typeid_cast<const ColumnAggregateFunction*>(columns[0]);
+
+        container0 = &col_agg_func->getData();
         if (is_column_const[1])
             container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(columns[1])->getDataColumnPtr().get())->getData();
         else
@@ -352,21 +349,21 @@ private:
         else
             container2 = &typeid_cast<const ColumnUInt32*>(columns[2])->getData();
 
-        auto col_to = ColumnAggregateFunction::create(colAggFunc->getAggregateFunction());
+        auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
         col_to->reserve(input_rows_count);
 
         for (size_t i = 0; i < input_rows_count; ++i)
         {
             const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
-            const AggregateFunctionGroupBitmapData<T>& bd0
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
                 = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
             const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
             const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
 
             col_to->insertDefault();
-            AggregateFunctionGroupBitmapData<T> & bd2
+            AggregateFunctionGroupBitmapData<T> & bitmap_data_2
                 = *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
-            Impl::apply(bd0, range_start, range_end, bd2);
+            Impl::apply(bitmap_data_0, range_start, range_end, bitmap_data_2);
         }
         block.getByPosition(result).column = std::move(col_to);
     }
@@ -377,9 +374,9 @@ struct BitmapSubsetInRangeImpl
 public:
     static constexpr auto name = "bitmapSubsetInRange";
     template <typename T>
-    static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
+    static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        bd0.rbs.rb_range(range_start, range_end, bd2.rbs);
+        bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
     }
 };
 
@@ -388,15 +385,16 @@ struct BitmapSubsetLimitImpl
 public:
     static constexpr auto name = "bitmapSubsetLimit";
     template <typename T>
-    static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
+    static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        bd0.rbs.rb_limit(range_start, range_end, bd2.rbs);
+        bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
     }
 };
 
 using FunctionBitmapSubsetInRange = FunctionBitmapSubset<BitmapSubsetInRangeImpl>;
 using FunctionBitmapSubsetLimit = FunctionBitmapSubset<BitmapSubsetLimitImpl>;
 
+
 class FunctionBitmapTransform : public IFunction
 {
 public:
@@ -417,11 +415,11 @@ public:
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        for (size_t i = 0; i < 2; i++)
+        for (size_t i = 0; i < 2; ++i)
         {
-            auto array_type = typeid_cast<const DataTypeArray *>(arguments[i+1].get());
-            String msg(i==0 ? "Second" : "Third");
-            msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i+1]->getName() + ".";
+            auto array_type = typeid_cast<const DataTypeArray *>(arguments[i + 1].get());
+            String msg(i == 0 ? "Second" : "Third");
+            msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i + 1]->getName() + ".";
             if (!array_type)
                 throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
@@ -458,12 +456,11 @@ private:
 
     template <typename T>
     void executeIntType(
-        Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
-        const
+        Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const
     {
         const IColumn * columns[3];
         bool is_column_const[3];
-        const ColumnAggregateFunction * colAggFunc;
+        const ColumnAggregateFunction * col_agg_func;
         const PaddedPODArray<AggregateDataPtr> * container0;
         const ColumnArray * array;
 
@@ -474,13 +471,13 @@ private:
         }
         if (is_column_const[0])
         {
-            colAggFunc = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(columns[0])->getDataColumnPtr().get());
+            col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(columns[0])->getDataColumnPtr().get());
         }
         else
         {
-            colAggFunc = typeid_cast<const ColumnAggregateFunction*>(columns[0]);
+            col_agg_func = typeid_cast<const ColumnAggregateFunction*>(columns[0]);
         }
-        container0 = &colAggFunc->getData();
+        container0 = &col_agg_func->getData();
 
         if (is_column_const[1])
             array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(columns[1])->getDataColumnPtr().get());
@@ -494,20 +491,22 @@ private:
         if (is_column_const[2])
             array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(columns[2])->getDataColumnPtr().get());
         else
-        {
             array = typeid_cast<const ColumnArray *>(block.getByPosition(arguments[2]).column.get());
-        }
+
         const ColumnArray::Offsets & to_offsets = array->getOffsets();
         const ColumnVector<UInt32>::Container & to_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
-        auto col_to = ColumnAggregateFunction::create(colAggFunc->getAggregateFunction());
+        auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
         col_to->reserve(input_rows_count);
 
-        size_t from_start, from_end, to_start, to_end;
+        size_t from_start;
+        size_t from_end;
+        size_t to_start;
+        size_t to_end;
         for (size_t i = 0; i < input_rows_count; ++i)
         {
             const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
-            const AggregateFunctionGroupBitmapData<T>& bd0
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(dataPtr0);
             if (is_column_const[1])
             {
                 from_start = 0;
@@ -515,7 +514,7 @@ private:
             }
             else
             {
-                from_start = i==0 ? 0 : from_offsets[i-1];
+                from_start = i == 0 ? 0 : from_offsets[i - 1];
                 from_end = from_offsets[i];
             }
             if (is_column_const[2])
@@ -525,17 +524,17 @@ private:
             }
             else
             {
-                to_start = i==0 ? 0 : to_offsets[i-1];
+                to_start = i == 0 ? 0 : to_offsets[i - 1];
                 to_end = to_offsets[i];
             }
             if (from_end - from_start != to_end - to_start)
                 throw Exception("From array size and to array size mismatch", ErrorCodes::LOGICAL_ERROR);
 
             col_to->insertDefault();
-            AggregateFunctionGroupBitmapData<T> & bd2
+            AggregateFunctionGroupBitmapData<T> & bitmap_data_2
                 = *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
-            bd2.rbs.merge(bd0.rbs);
-            bd2.rbs.rb_replace(&from_container[from_start], &to_container[to_start], from_end - from_start);
+            bitmap_data_2.rbs.merge(bitmap_data_0.rbs);
+            bitmap_data_2.rbs.rb_replace(&from_container[from_start], &to_container[to_start], from_end - from_start);
         }
         block.getByPosition(result).column = std::move(col_to);
     }
@@ -601,9 +600,9 @@ private:
             = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[0]).column.get());
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            const AggregateFunctionGroupBitmapData<T> & bd
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data
                 = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(column->getData()[i]);
-            vec_to[i] = Impl::apply(bd);
+            vec_to[i] = Impl::apply(bitmap_data);
         }
     }
 };
@@ -613,9 +612,9 @@ struct BitmapCardinalityImpl
 public:
     static constexpr auto name = "bitmapCardinality";
     template <typename T>
-    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bd)
+    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data)
     {
-        return bd.rbs.size();
+        return bitmap_data.rbs.size();
     }
 };
 
@@ -624,9 +623,9 @@ struct BitmapMinImpl
 public:
     static constexpr auto name = "bitmapMin";
     template <typename T>
-    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bd)
+    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data)
     {
-        return bd.rbs.rb_min();
+        return bitmap_data.rbs.rb_min();
     }
 };
 
@@ -635,9 +634,9 @@ struct BitmapMaxImpl
 public:
     static constexpr auto name = "bitmapMax";
     template <typename T>
-    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bd)
+    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data)
     {
-        return bd.rbs.rb_max();
+        return bitmap_data.rbs.rb_max();
     }
 };
 
@@ -645,10 +644,10 @@ template <typename T>
 struct BitmapAndCardinalityImpl
 {
     using ReturnType = UInt64;
-    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         // roaring_bitmap_and_cardinality( rb1, rb2 );
-        return bd1.rbs.rb_and_cardinality(bd2.rbs);
+        return bitmap_data_1.rbs.rb_and_cardinality(bitmap_data_2.rbs);
     }
 };
 
@@ -657,10 +656,10 @@ template <typename T>
 struct BitmapOrCardinalityImpl
 {
     using ReturnType = UInt64;
-    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         // return roaring_bitmap_or_cardinality( rb1, rb2 );
-        return bd1.rbs.rb_or_cardinality(bd2.rbs);
+        return bitmap_data_1.rbs.rb_or_cardinality(bitmap_data_2.rbs);
     }
 };
 
@@ -668,10 +667,10 @@ template <typename T>
 struct BitmapXorCardinalityImpl
 {
     using ReturnType = UInt64;
-    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         // return roaring_bitmap_xor_cardinality( rb1, rb2 );
-        return bd1.rbs.rb_xor_cardinality(bd2.rbs);
+        return bitmap_data_1.rbs.rb_xor_cardinality(bitmap_data_2.rbs);
     }
 };
 
@@ -679,10 +678,10 @@ template <typename T>
 struct BitmapAndnotCardinalityImpl
 {
     using ReturnType = UInt64;
-    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static UInt64 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         // roaring_bitmap_andnot_cardinality( rb1, rb2 );
-        return bd1.rbs.rb_andnot_cardinality(bd2.rbs);
+        return bitmap_data_1.rbs.rb_andnot_cardinality(bitmap_data_2.rbs);
     }
 };
 
@@ -690,9 +689,9 @@ template <typename T>
 struct BitmapHasAllImpl
 {
     using ReturnType = UInt8;
-    static UInt8 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static UInt8 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        return bd1.rbs.rb_is_subset(bd2.rbs);
+        return bitmap_data_1.rbs.rb_is_subset(bitmap_data_2.rbs);
     }
 };
 
@@ -700,9 +699,9 @@ template <typename T>
 struct BitmapHasAnyImpl
 {
     using ReturnType = UInt8;
-    static UInt8 apply(const AggregateFunctionGroupBitmapData<T> & bd1, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static UInt8 apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        return bd1.rbs.rb_intersect(bd2.rbs);
+        return bitmap_data_1.rbs.rb_intersect(bitmap_data_2.rbs);
     }
 };
 
@@ -788,9 +787,9 @@ private:
         {
             const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
             const UInt32 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
-            const AggregateFunctionGroupBitmapData<T>& bd0
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
-            vec_to[i] = bd0.rbs.rb_contains(data1);
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(dataPtr0);
+            vec_to[i] = bitmap_data_0.rbs.rb_contains(data1);
         }
     }
 };
@@ -885,11 +884,11 @@ private:
         {
             const AggregateDataPtr dataPtr0 = is_column_const[0] ? container0[0] : container0[i];
             const AggregateDataPtr dataPtr1 = is_column_const[1] ? container1[0] : container1[i];
-            const AggregateFunctionGroupBitmapData<T> & bd1
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data_1
                 = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
-            const AggregateFunctionGroupBitmapData<T> & bd2
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data_2
                 = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr1);
-            vec_to[i] = Impl<T>::apply(bd1, bd2);
+            vec_to[i] = Impl<T>::apply(bitmap_data_1, bitmap_data_2);
         }
     }
 };
@@ -897,36 +896,36 @@ private:
 template <typename T>
 struct BitmapAndImpl
 {
-    static void apply(AggregateFunctionGroupBitmapData<T> & toBd, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static void apply(AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        toBd.rbs.rb_and(bd2.rbs);
+        bitmap_data_1.rbs.rb_and(bitmap_data_2.rbs);
     }
 };
 
 template <typename T>
 struct BitmapOrImpl
 {
-    static void apply(AggregateFunctionGroupBitmapData<T> & toBd, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static void apply(AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        toBd.rbs.rb_or(bd2.rbs);
+        bitmap_data_1.rbs.rb_or(bitmap_data_2.rbs);
     }
 };
 
 template <typename T>
 struct BitmapXorImpl
 {
-    static void apply(AggregateFunctionGroupBitmapData<T> & toBd, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static void apply(AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        toBd.rbs.rb_xor(bd2.rbs);
+        bitmap_data_1.rbs.rb_xor(bitmap_data_2.rbs);
     }
 };
 
 template <typename T>
 struct BitmapAndnotImpl
 {
-    static void apply(AggregateFunctionGroupBitmapData<T> & toBd, const AggregateFunctionGroupBitmapData<T> & bd2)
+    static void apply(AggregateFunctionGroupBitmapData<T> & bitmap_data_1, const AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
-        toBd.rbs.rb_andnot(bd2.rbs);
+        bitmap_data_1.rbs.rb_andnot(bitmap_data_2.rbs);
     }
 };
 
@@ -1007,10 +1006,10 @@ private:
         for (size_t i = 0; i < input_rows_count; ++i)
         {
             col_to->insertFrom(columns[0]->getData()[i]);
-            AggregateFunctionGroupBitmapData<T> & toBd = *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
-            const AggregateFunctionGroupBitmapData<T> & bd2
+            AggregateFunctionGroupBitmapData<T> & bitmap_data_1 = *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
+            const AggregateFunctionGroupBitmapData<T> & bitmap_data_2
                 = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(columns[1]->getData()[i]);
-            Impl<T>::apply(toBd, bd2);
+            Impl<T>::apply(bitmap_data_1, bitmap_data_2);
         }
         block.getByPosition(result).column = std::move(col_to);
     }

From 2fe5f2302531192219fe8810cc1e6c0e6a583ebb Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 02:21:22 +0300
Subject: [PATCH 198/222] Update FunctionsBitmap.h

---
 dbms/src/Functions/FunctionsBitmap.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h
index ba8ed6a223e..322399e96a6 100644
--- a/dbms/src/Functions/FunctionsBitmap.h
+++ b/dbms/src/Functions/FunctionsBitmap.h
@@ -354,9 +354,9 @@ private:
 
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
+            const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
             const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(data_ptr_0);
             const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
             const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
 
@@ -504,9 +504,9 @@ private:
         size_t to_end;
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
+            const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
             const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(dataPtr0);
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_0);
             if (is_column_const[1])
             {
                 from_start = 0;
@@ -785,10 +785,10 @@ private:
 
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
+            const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
             const UInt32 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
             const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(dataPtr0);
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_0);
             vec_to[i] = bitmap_data_0.rbs.rb_contains(data1);
         }
     }
@@ -882,12 +882,12 @@ private:
 
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            const AggregateDataPtr dataPtr0 = is_column_const[0] ? container0[0] : container0[i];
-            const AggregateDataPtr dataPtr1 = is_column_const[1] ? container1[0] : container1[i];
+            const AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i];
+            const AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i];
             const AggregateFunctionGroupBitmapData<T> & bitmap_data_1
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr0);
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_0);
             const AggregateFunctionGroupBitmapData<T> & bitmap_data_2
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(dataPtr1);
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_1);
             vec_to[i] = Impl<T>::apply(bitmap_data_1, bitmap_data_2);
         }
     }

From 52b817d0dfde4f4bc20f5de4ff753f74c7428df2 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 02:22:38 +0300
Subject: [PATCH 199/222] Update bitmap_functions.md

---
 .../query_language/functions/bitmap_functions.md   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/en/query_language/functions/bitmap_functions.md b/docs/en/query_language/functions/bitmap_functions.md
index 312542044b8..a4aba098dbb 100644
--- a/docs/en/query_language/functions/bitmap_functions.md
+++ b/docs/en/query_language/functions/bitmap_functions.md
@@ -46,7 +46,7 @@ bitmapToArray(bitmap)
 
 **Example**
 
-``` sql
+```sql
 SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
 ```
 
@@ -98,7 +98,7 @@ bitmapSubsetLimit(bitmap, range_start, limit)
 
 **Example**
 
-``` sql
+```sql
 SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
 ```
 
@@ -130,7 +130,7 @@ Type: `UInt8`.
 
 **Example**
 
-``` sql
+```sql
 SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res
 ```
 ```text
@@ -236,7 +236,7 @@ bitmapMin(bitmap)
 
 **Example**
 
-``` sql
+```sql
 SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res
 ```
 
@@ -261,7 +261,7 @@ bitmapMax(bitmap)
 
 **Example**
 
-``` sql
+```sql
 SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res
 ```
 
@@ -288,7 +288,7 @@ bitmapTransform(bitmap, from_array, to_array)
 
 **Example**
 
-``` sql
+```sql
 SELECT bitmapToArray(bitmapTransform(bitmapBuild([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)))) AS res
 ```
 
@@ -312,7 +312,7 @@ bitmapAnd(bitmap,bitmap)
 
 **Example**
 
-``` sql
+```sql
 SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res
 ```
 

From 57d5fae52f9c422ae77c9dc09fd5e846ef017407 Mon Sep 17 00:00:00 2001
From: Ilya Korol <ilya.korol@softswiss.com>
Date: Tue, 5 Nov 2019 12:11:58 +0300
Subject: [PATCH 200/222] Make release date format the same as in previous
 releases

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 272b0d298d4..12a6c9920db 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## ClickHouse release v19.16.2.2, 30-10-2019
+## ClickHouse release v19.16.2.2, 2019-10-30
 
 ### Backward Incompatible Change
 * Add missing arity validation for count/counIf.

From d9d73fe78329607db7a7535edd4fb2508051cba1 Mon Sep 17 00:00:00 2001
From: Nicolae Vartolomei <me@nvartolomei.com>
Date: Tue, 5 Nov 2019 12:29:30 +0000
Subject: [PATCH 201/222] Simplify integration test for distributed alter

---
 dbms/tests/integration/test_distributed_ddl/test.py    | 10 +++++++++-
 .../integration/test_distributed_ddl_secure/test.py    | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 59c9bba1353..ccb3b012b43 100755
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -49,6 +49,9 @@ def insert_reliable(instance, query_insert):
 
     raise last_exception
 
+def sync_replicas(table, timeout=5):
+    for instance in cluster.instances.values():
+        instance.query("SYSTEM SYNC REPLICA {}".format(table), timeout=timeout)
 
 cluster = ClickHouseCluster(__file__)
 
@@ -76,8 +79,9 @@ def init_cluster(cluster):
 
         cluster.start()
 
+        # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.
         # Replace config files for testing ability to set host in DNS and IP formats
-        replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
+        # replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
 
         # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
         sacrifice = cluster.instances['ch4']
@@ -220,6 +224,8 @@ ENGINE = Distributed(cluster, default, merge_for_alter, i)
         k = (i / 2) * 2
         insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (i) VALUES ({})({})".format(k, k+1))
 
+    sync_replicas("merge_for_alter")
+
     assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
 
 
@@ -233,6 +239,8 @@ ENGINE = Distributed(cluster, default, merge_for_alter, i)
         k = (i / 2) * 2 + 4
         insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (p, i) VALUES (31, {})(31, {})".format(k, k+1))
 
+    sync_replicas("merge_for_alter")
+
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
 
 
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/test.py b/dbms/tests/integration/test_distributed_ddl_secure/test.py
index 456861427a8..fa8b35c605d 100755
--- a/dbms/tests/integration/test_distributed_ddl_secure/test.py
+++ b/dbms/tests/integration/test_distributed_ddl_secure/test.py
@@ -49,6 +49,9 @@ def insert_reliable(instance, query_insert):
 
     raise last_exception
 
+def sync_replicas(table, timeout=5):
+    for instance in cluster.instances.values():
+        instance.query("SYSTEM SYNC REPLICA {}".format(table), timeout=timeout)
 
 cluster = ClickHouseCluster(__file__)
 
@@ -76,8 +79,9 @@ def init_cluster(cluster):
 
         cluster.start()
 
+        # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.
         # Replace config files for testing ability to set host in DNS and IP formats
-        replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
+        # replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
 
         # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
         sacrifice = cluster.instances['ch4']
@@ -220,6 +224,8 @@ ENGINE = Distributed(cluster, default, merge_for_alter, i)
         k = (i / 2) * 2
         insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (i) VALUES ({})({})".format(k, k+1))
 
+    sync_replicas("merge_for_alter")
+
     assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
 
 
@@ -233,6 +239,8 @@ ENGINE = Distributed(cluster, default, merge_for_alter, i)
         k = (i / 2) * 2 + 4
         insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (p, i) VALUES (31, {})(31, {})".format(k, k+1))
 
+    sync_replicas("merge_for_alter")
+
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
 
 

From bba08b585d9cfb28e417bc9ff49c60e2bf68ed40 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 5 Nov 2019 16:16:22 +0300
Subject: [PATCH 202/222] Update changelog script.

* support python3
* synchronize with Description Check
---
 utils/make_changelog.py | 95 +++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/utils/make_changelog.py b/utils/make_changelog.py
index 67423d5e39c..9631d3af0af 100755
--- a/utils/make_changelog.py
+++ b/utils/make_changelog.py
@@ -217,49 +217,61 @@ def process_unknown_commits(commits, commits_info, users):
     text = 'Commits which are not from any pull request:\n\n'
     return text + '\n\n'.join(texts)
 
+# This function mirrors the PR description checks in ClickhousePullRequestTrigger.
+# Returns False if the PR should not be mentioned changelog.
+def parse_one_pull_request(item):
+    description = item['description']
+    lines = [line for line in map(lambda x: x.strip(), description.split('\n')) if line]
+    lines = [re.sub(r'\s+', ' ', l) for l in lines]
+
+    cat_pos = None
+    short_descr_pos = None
+    long_descr_pos = None
+
+    if lines:
+        for i in range(len(lines) - 1):
+            if re.match(r'(?i)^\**Category', lines[i]):
+                cat_pos = i
+            if re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]):
+                short_descr_pos = i
+            if re.match(r'(?i)^\**\s*Detailed description', lines[i]):
+                long_descr_pos = i
+
+    if cat_pos is None:
+        return False
+    cat = lines[cat_pos + 1]
+    cat = re.sub(r'^[-*\s]*', '', cat)
+
+    # Filter out the PR categories that are not for changelog.
+    if re.match(r'(?i)doc|((non|in|not|un)[-\s]*significant)', cat):
+        return False
+
+    short_descr = ''
+    if short_descr_pos:
+        short_descr_end = long_descr_pos or len(lines)
+        short_descr = lines[short_descr_pos + 1]
+        if short_descr_pos + 2 != short_descr_end:
+            short_descr += ' ...'
+
+    # If we have nothing meaningful
+    if not re.match('\w', short_descr):
+        short_descr = item['title']
+
+    # TODO: Add detailed description somewhere
+
+    item['entry'] = short_descr
+    item['category'] = cat
+
+    return True
+
 
 # List of pull requests -> text description.
 def process_pull_requests(pull_requests, users, repo):
     groups = {}
 
     for id, item in pull_requests.items():
-        lines = list(filter(len, map(lambda x: x.strip(), item['description'].split('\n'))))
-
-        cat_pos = None
-        short_descr_pos = None
-        long_descr_pos = None
-
-        if lines:
-            for i in range(len(lines) - 1):
-                if re.match('^\**Category', lines[i]):
-                    cat_pos = i
-                if re.match('^\**\s*Short description', lines[i]):
-                    short_descr_pos = i
-                if re.match('^\**\s*Detailed description', lines[i]):
-                    long_descr_pos = i
-
-        cat = ''
-        if cat_pos is not None:
-            # TODO: Sometimes have more than one
-            cat = lines[cat_pos + 1]
-        cat = cat.strip().lstrip('-').strip()
-
-        # We are not interested in documentation PRs in changelog.
-        if re.match('^\**\s*(?:Documentation|Doc\s)', cat):
-            continue;
-
-        short_descr = ''
-        if short_descr_pos:
-            short_descr_end = long_descr_pos or len(lines)
-            short_descr = lines[short_descr_pos + 1]
-            if short_descr_pos + 2 != short_descr_end:
-                short_descr += ' ...'
-
-        # If we have nothing meaningful
-        if not re.match('\w', short_descr):
-            short_descr = item['title']
-
-        # TODO: Add detailed description somewhere
+        if not parse_one_pull_request(item):
+            continue
 
         pattern = u"{} [#{}]({}) ({})"
         link = 'https://github.com/{}/pull/{}'.format(repo, id)
@@ -269,20 +281,21 @@ def process_pull_requests(pull_requests, users, repo):
             user = users[item['user']]
             author = u'[{}]({})'.format(user['name'] or user['login'], user['html_url'])
 
+        cat = item['category']
         if cat not in groups:
             groups[cat] = []
-        groups[cat].append(pattern.format(short_descr, id, link, author))
+        groups[cat].append(pattern.format(item['entry'], id, link, author))
 
-    categories_preferred_order = ['New Feature', 'Bug Fix', 'Improvement', 'Performance Improvement', 'Build/Testing/Packaging Improvement', 'Backward Incompatible Change', 'Other']
+    categories_preferred_order = ['Backward Incompatible Change', 'New Feature', 'Bug Fix', 'Improvement', 'Performance Improvement', 'Build/Testing/Packaging Improvement', 'Other']
 
     def categories_sort_key(name):
         if name in categories_preferred_order:
-            return categories_preferred_order.index(name)
+            return str(categories_preferred_order.index(name)).zfill(3)
         else:
             return name.lower()
 
     texts = []
-    for group, text in sorted(groups.items(), key = lambda (k, v):  categories_sort_key(k)):
+    for group, text in sorted(groups.items(), key = lambda kv: categories_sort_key(kv[0])):
         items = [u'* {}'.format(pr) for pr in text]
         texts.append(u'### {}\n{}'.format(group if group else u'[No category]', '\n'.join(items)))
 

From 2d7ae27232478379a21a84762cbf58331811d626 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 5 Nov 2019 16:23:09 +0300
Subject: [PATCH 203/222] Now concat always return string column

---
 dbms/src/Functions/concat.cpp                                  | 3 ++-
 .../01030_concatenate_equal_fixed_strings.reference            | 3 +++
 .../0_stateless/01030_concatenate_equal_fixed_strings.sql      | 3 +++
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.reference
 create mode 100644 dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.sql

diff --git a/dbms/src/Functions/concat.cpp b/dbms/src/Functions/concat.cpp
index c4fa2044321..00343de309f 100644
--- a/dbms/src/Functions/concat.cpp
+++ b/dbms/src/Functions/concat.cpp
@@ -214,7 +214,8 @@ protected:
                     + ", should be at least 2.",
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-        return getLeastSupertype(arguments);
+        /// We always return Strings from concat, even if arguments were fixed strings.
+        return std::make_shared<DataTypeString>();
     }
 
 private:
diff --git a/dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.reference b/dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.reference
new file mode 100644
index 00000000000..238c7914aae
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.reference
@@ -0,0 +1,3 @@
+aa	aaaa
+aa	4
+aa	String
diff --git a/dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.sql b/dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.sql
new file mode 100644
index 00000000000..9870fddff10
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01030_concatenate_equal_fixed_strings.sql
@@ -0,0 +1,3 @@
+SELECT toFixedString('aa' , 2 ) as a, concat(a, a);
+SELECT toFixedString('aa' , 2 ) as a, length(concat(a, a));
+SELECT toFixedString('aa' , 2 ) as a, toTypeName(concat(a, a));

From 36a9ac86b813fbbcd977ef43420471d413e0735f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 5 Nov 2019 16:05:48 +0300
Subject: [PATCH 204/222] More verbose error message in macros.

---
 dbms/src/Common/Macros.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/Macros.cpp b/dbms/src/Common/Macros.cpp
index ac64cc4ca46..a234bdd8be6 100644
--- a/dbms/src/Common/Macros.cpp
+++ b/dbms/src/Common/Macros.cpp
@@ -1,6 +1,7 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Macros.h>
 #include <Common/Exception.h>
+#include <IO/WriteHelpers.h>
 
 
 namespace DB
@@ -66,7 +67,9 @@ String Macros::expand(const String & s, size_t level, const String & database_na
         else if (macro_name == "table" && !table_name.empty())
             res += table_name;
         else
-            throw Exception("No macro " + macro_name + " in config", ErrorCodes::SYNTAX_ERROR);
+            throw Exception("No macro '" + macro_name +
+                "' in config while processing substitutions in '" + s + "' at "
+                + toString(begin), ErrorCodes::SYNTAX_ERROR);
 
         pos = end + 1;
     }

From 0df4a0208f7650c664dc9a3d1def8db322f69560 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 5 Nov 2019 18:03:17 +0300
Subject: [PATCH 205/222] Add changelog entry to PR template.

---
 .github/PULL_REQUEST_TEMPLATE.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 926a7b4e7ef..bd0282d50ce 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,8 +1,6 @@
 I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en
 
-For changelog. Remove if this is non-significant change.
-
-Category (leave one):
+Changelog category (leave one):
 - New Feature
 - Bug Fix
 - Improvement
@@ -11,11 +9,14 @@ Category (leave one):
 - Build/Testing/Packaging Improvement
 - Documentation
 - Other
+- Non-significant (changelog entry is not needed)
 
-Short description (up to few sentences):
+
+Changelog entry (up to few sentences, not needed for non-significant PRs):
 
 ...
 
+
 Detailed description (optional):
 
 ...

From 7b93641a10fa191ea15680b27b7287c0f9a8c496 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Tue, 5 Nov 2019 18:08:37 +0300
Subject: [PATCH 206/222] Fixed comment in configuration.

---
 .../configs/config.d/storage_configuration.xml                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml b/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml
index 2e6a1f80a6d..f2e4eb01534 100644
--- a/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml
+++ b/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml
@@ -45,7 +45,7 @@
             </volumes>
         </jbods_with_external>
 
-        <!-- Moving all parts jbod1 if accuired more than 70% -->
+        <!-- Moving all parts jbod1 if acquired more than 70% -->
         <moving_jbod_with_external>
             <volumes>
                 <main>

From a8731a9597d21762d38e412304813b2c0b9aba09 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 5 Nov 2019 18:41:00 +0300
Subject: [PATCH 207/222] Update category regex in changelog script

---
 utils/make_changelog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/make_changelog.py b/utils/make_changelog.py
index 9631d3af0af..76ba4ef0775 100755
--- a/utils/make_changelog.py
+++ b/utils/make_changelog.py
@@ -230,7 +230,7 @@ def parse_one_pull_request(item):
 
     if lines:
         for i in range(len(lines) - 1):
-            if re.match(r'(?i)^\**Category', lines[i]):
+            if re.match(r'(?i)category.*:$', lines[i]):
                 cat_pos = i
             if re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]):
                 short_descr_pos = i

From fbfbe161ecb342450f715204a32aa0781321852e Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Tue, 29 Oct 2019 23:16:51 +0800
Subject: [PATCH 208/222] Unify hash tables interface.

---
 dbms/programs/obfuscator/Obfuscator.cpp       |  12 +-
 .../AggregateFunctionEntropy.h                |   6 +-
 .../QuantileExactWeighted.h                   |   6 +-
 dbms/src/Columns/ColumnLowCardinality.cpp     |   2 +-
 dbms/src/Common/ColumnsHashing.h              |   2 +-
 dbms/src/Common/ColumnsHashingImpl.h          |  16 +--
 dbms/src/Common/HashTable/ClearableHashMap.h  |  16 +--
 dbms/src/Common/HashTable/ClearableHashSet.h  |  12 --
 .../Common/HashTable/FixedClearableHashMap.h  |  34 +++--
 .../Common/HashTable/FixedClearableHashSet.h  |  10 +-
 dbms/src/Common/HashTable/FixedHashMap.h      |  58 +++-----
 dbms/src/Common/HashTable/FixedHashSet.h      |   5 +-
 dbms/src/Common/HashTable/FixedHashTable.h    |  62 ++++----
 dbms/src/Common/HashTable/HashMap.h           |  56 +++-----
 dbms/src/Common/HashTable/HashSet.h           |   8 --
 dbms/src/Common/HashTable/HashTable.h         | 133 ++++++++----------
 dbms/src/Common/HashTable/SmallTable.h        |  11 +-
 dbms/src/Common/HashTable/StringHashMap.h     |  92 ++++++------
 dbms/src/Common/HashTable/StringHashTable.h   |  76 +++++++---
 dbms/src/Common/HashTable/TwoLevelHashMap.h   |  12 +-
 dbms/src/Common/HashTable/TwoLevelHashTable.h |   4 +-
 .../Common/HashTable/TwoLevelStringHashMap.h  |  16 +--
 .../HashTable/TwoLevelStringHashTable.h       |  33 +++--
 dbms/src/Common/SpaceSaving.h                 |   2 +-
 dbms/src/Common/tests/auto_array.cpp          |   4 +-
 .../src/Common/tests/parallel_aggregation.cpp |  30 ++--
 .../Common/tests/parallel_aggregation2.cpp    |  14 +-
 dbms/src/Common/tests/small_table.cpp         |   2 +-
 dbms/src/Core/tests/string_pool.cpp           |   6 +-
 dbms/src/DataTypes/DataTypeEnum.cpp           |   2 +-
 dbms/src/DataTypes/DataTypeEnum.h             |   2 +-
 .../ComplexKeyCacheDictionary.cpp             |   2 +-
 .../Dictionaries/ComplexKeyCacheDictionary.h  |  10 +-
 .../ComplexKeyHashedDictionary.cpp            |   4 +-
 dbms/src/Dictionaries/HashedDictionary.cpp    |   4 +-
 .../Dictionaries/RangeHashedDictionary.cpp    |  12 +-
 dbms/src/Functions/addressToLine.cpp          |   4 +-
 dbms/src/Functions/array/arrayIntersect.cpp   |   8 +-
 dbms/src/Functions/transform.cpp              |  18 +--
 dbms/src/Interpreters/Aggregator.h            |   4 -
 dbms/src/Interpreters/Join.cpp                |   2 +-
 dbms/src/Interpreters/tests/hash_map.cpp      |  12 +-
 dbms/src/Interpreters/tests/hash_map3.cpp     |   2 +-
 .../Interpreters/tests/hash_map_lookup.cpp    |  12 +-
 .../Interpreters/tests/hash_map_string.cpp    |  16 +--
 .../Interpreters/tests/hash_map_string_2.cpp  |   4 +-
 .../Interpreters/tests/hash_map_string_3.cpp  |   4 +-
 .../tests/hash_map_string_small.cpp           |   8 +-
 .../Interpreters/tests/string_hash_map.cpp    |   4 +-
 .../Interpreters/tests/two_level_hash_map.cpp |  16 +--
 .../Impl/JSONEachRowRowInputFormat.cpp        |   6 +-
 .../Formats/Impl/TSKVRowInputFormat.cpp       |   2 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |   4 +-
 dbms/src/Storages/StorageJoin.cpp             |   8 +-
 utils/test-data-generator/MarkovModel.h       |  12 +-
 55 files changed, 439 insertions(+), 483 deletions(-)

diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp
index be6125d77bf..2fde579f3c2 100644
--- a/dbms/programs/obfuscator/Obfuscator.cpp
+++ b/dbms/programs/obfuscator/Obfuscator.cpp
@@ -579,7 +579,7 @@ public:
         {
             for (auto & elem : table)
             {
-                Histogram & histogram = elem.getSecond();
+                Histogram & histogram = elem.getMapped();
 
                 if (histogram.buckets.size() < params.num_buckets_cutoff)
                 {
@@ -593,7 +593,7 @@ public:
         {
             for (auto & elem : table)
             {
-                Histogram & histogram = elem.getSecond();
+                Histogram & histogram = elem.getMapped();
                 if (!histogram.total)
                     continue;
 
@@ -625,7 +625,7 @@ public:
         {
             for (auto & elem : table)
             {
-                Histogram & histogram = elem.getSecond();
+                Histogram & histogram = elem.getMapped();
                 if (!histogram.total)
                     continue;
 
@@ -641,7 +641,7 @@ public:
         {
             for (auto & elem : table)
             {
-                Histogram & histogram = elem.getSecond();
+                Histogram & histogram = elem.getMapped();
                 if (!histogram.total)
                     continue;
 
@@ -676,7 +676,7 @@ public:
             while (true)
             {
                 it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()));
-                if (it && lookupResultGetMapped(it)->total + lookupResultGetMapped(it)->count_end != 0)
+                if (it && it->getMapped().total + it->getMapped().count_end != 0)
                     break;
 
                 if (context_size == 0)
@@ -710,7 +710,7 @@ public:
             if (num_bytes_after_desired_size > 0)
                 end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
 
-            CodePoint code = lookupResultGetMapped(it)->sample(determinator, end_probability_multiplier);
+            CodePoint code = it->getMapped().sample(determinator, end_probability_multiplier);
 
             if (code == END)
                 break;
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h
index 23f08a081db..49c7ff704f7 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h
@@ -55,7 +55,7 @@ struct EntropyData
     void merge(const EntropyData & rhs)
     {
         for (const auto & pair : rhs.map)
-            map[pair.getFirst()] += pair.getSecond();
+            map[pair.getKey()] += pair.getMapped();
     }
 
     void serialize(WriteBuffer & buf) const
@@ -77,12 +77,12 @@ struct EntropyData
     {
         UInt64 total_value = 0;
         for (const auto & pair : map)
-            total_value += pair.getSecond();
+            total_value += pair.getMapped();
 
         Float64 shannon_entropy = 0;
         for (const auto & pair : map)
         {
-            Float64 frequency = Float64(pair.getSecond()) / total_value;
+            Float64 frequency = Float64(pair.getMapped()) / total_value;
             shannon_entropy -= frequency * log2(frequency);
         }
 
diff --git a/dbms/src/AggregateFunctions/QuantileExactWeighted.h b/dbms/src/AggregateFunctions/QuantileExactWeighted.h
index 31a485ef695..6053bddc947 100644
--- a/dbms/src/AggregateFunctions/QuantileExactWeighted.h
+++ b/dbms/src/AggregateFunctions/QuantileExactWeighted.h
@@ -58,7 +58,7 @@ struct QuantileExactWeighted
     void merge(const QuantileExactWeighted & rhs)
     {
         for (const auto & pair : rhs.map)
-            map[pair.getFirst()] += pair.getSecond();
+            map[pair.getKey()] += pair.getMapped();
     }
 
     void serialize(WriteBuffer & buf) const
@@ -93,7 +93,7 @@ struct QuantileExactWeighted
         UInt64 sum_weight = 0;
         for (const auto & pair : map)
         {
-            sum_weight += pair.getSecond();
+            sum_weight += pair.getMapped();
             array[i] = pair.getValue();
             ++i;
         }
@@ -143,7 +143,7 @@ struct QuantileExactWeighted
         UInt64 sum_weight = 0;
         for (const auto & pair : map)
         {
-            sum_weight += pair.getSecond();
+            sum_weight += pair.getMapped();
             array[i] = pair.getValue();
             ++i;
         }
diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp
index 32ba2378100..e7998f164af 100644
--- a/dbms/src/Columns/ColumnLowCardinality.cpp
+++ b/dbms/src/Columns/ColumnLowCardinality.cpp
@@ -35,7 +35,7 @@ namespace
 
         data.resize(hash_map.size());
         for (const auto & val : hash_map)
-            data[val.getSecond()] = val.getFirst();
+            data[val.getMapped()] = val.getKey();
 
         for (auto & ind : index)
             ind = hash_map[ind];
diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h
index 28938bd43ac..6201c4d3e12 100644
--- a/dbms/src/Common/ColumnsHashing.h
+++ b/dbms/src/Common/ColumnsHashing.h
@@ -359,7 +359,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
 
         if constexpr (has_mapped)
         {
-            auto & mapped = *lookupResultGetMapped(it);
+            auto & mapped = it->getMapped();
             if (inserted)
             {
                 new (&mapped) Mapped();
diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h
index e204242d8fe..ccea488e030 100644
--- a/dbms/src/Common/ColumnsHashingImpl.h
+++ b/dbms/src/Common/ColumnsHashingImpl.h
@@ -174,13 +174,13 @@ protected:
 
         [[maybe_unused]] Mapped * cached = nullptr;
         if constexpr (has_mapped)
-            cached = lookupResultGetMapped(it);
+            cached = &it->getMapped();
 
         if (inserted)
         {
             if constexpr (has_mapped)
             {
-                new(lookupResultGetMapped(it)) Mapped();
+                new (&it->getMapped()) Mapped();
             }
         }
 
@@ -191,18 +191,18 @@ protected:
 
             if constexpr (has_mapped)
             {
-                cache.value.first = *lookupResultGetKey(it);
-                cache.value.second = *lookupResultGetMapped(it);
+                cache.value.first = it->getKey();
+                cache.value.second = it->getMapped();
                 cached = &cache.value.second;
             }
             else
             {
-                cache.value = *lookupResultGetKey(it);
+                cache.value = it->getKey();
             }
         }
 
         if constexpr (has_mapped)
-            return EmplaceResult(*lookupResultGetMapped(it), *cached, inserted);
+            return EmplaceResult(it->getMapped(), *cached, inserted);
         else
             return EmplaceResult(inserted);
     }
@@ -233,7 +233,7 @@ protected:
                 cache.value.first = key;
                 if (it)
                 {
-                    cache.value.second = *lookupResultGetMapped(it);
+                    cache.value.second = it->getMapped();
                 }
             }
             else
@@ -243,7 +243,7 @@ protected:
         }
 
         if constexpr (has_mapped)
-            return FindResult(it ? lookupResultGetMapped(it) : nullptr, it != nullptr);
+            return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
         else
             return FindResult(it != nullptr);
     }
diff --git a/dbms/src/Common/HashTable/ClearableHashMap.h b/dbms/src/Common/HashTable/ClearableHashMap.h
index e9f010cffe5..4370f6b6dc7 100644
--- a/dbms/src/Common/HashTable/ClearableHashMap.h
+++ b/dbms/src/Common/HashTable/ClearableHashMap.h
@@ -14,12 +14,6 @@ struct ClearableHashMapCell : public ClearableHashTableCell<Key, HashMapCell<Key
         : Base::BaseCell(value_, state), Base::version(state.version) {}
 };
 
-template<typename Key, typename Mapped, typename Hash>
-ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getFirst(); }
-
-template<typename Key, typename Mapped, typename Hash>
-ALWAYS_INLINE inline auto lookupResultGetMapped(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getSecond(); }
-
 template
 <
     typename Key,
@@ -31,20 +25,16 @@ template
 class ClearableHashMap : public HashTable<Key, ClearableHashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>
 {
 public:
-    using key_type = Key;
-    using mapped_type = Mapped;
-    using value_type = typename ClearableHashMap::cell_type::value_type;
-
-    mapped_type & operator[](Key x)
+    Mapped & operator[](const Key & x)
     {
         typename ClearableHashMap::LookupResult it;
         bool inserted;
         this->emplace(x, it, inserted);
 
         if (inserted)
-            new(lookupResultGetMapped(it)) mapped_type();
+            new (&it->getMapped()) Mapped();
 
-        return *lookupResultGetMapped(it);
+        return it->getMapped();
     }
 
     void clear()
diff --git a/dbms/src/Common/HashTable/ClearableHashSet.h b/dbms/src/Common/HashTable/ClearableHashSet.h
index 240c32632a9..824ec9d8e5f 100644
--- a/dbms/src/Common/HashTable/ClearableHashSet.h
+++ b/dbms/src/Common/HashTable/ClearableHashSet.h
@@ -48,12 +48,6 @@ struct ClearableHashTableCell : public BaseCell
     ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {}
 };
 
-template<typename Key, typename BaseCell>
-ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashTableCell<Key, BaseCell> * cell) { return &cell->key; }
-
-template<typename Key, typename BaseCell>
-ALWAYS_INLINE inline void * lookupResultGetMapped(ClearableHashTableCell<Key, BaseCell> *) { return nullptr; }
-
 template
 <
     typename Key,
@@ -64,9 +58,6 @@ template
 class ClearableHashSet : public HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
 {
 public:
-    using key_type = Key;
-    using value_type = typename ClearableHashSet::cell_type::value_type;
-
     using Base = HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>;
     using typename Base::LookupResult;
 
@@ -87,9 +78,6 @@ template
 class ClearableHashSetWithSavedHash: public HashTable<Key, ClearableHashTableCell<Key, HashSetCellWithSavedHash<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
 {
 public:
-    using key_type = Key;
-    using value_type = typename ClearableHashSetWithSavedHash::cell_type::value_type;
-
     void clear()
     {
         ++this->version;
diff --git a/dbms/src/Common/HashTable/FixedClearableHashMap.h b/dbms/src/Common/HashTable/FixedClearableHashMap.h
index e4a67b63446..4c8c733446e 100644
--- a/dbms/src/Common/HashTable/FixedClearableHashMap.h
+++ b/dbms/src/Common/HashTable/FixedClearableHashMap.h
@@ -11,6 +11,8 @@ struct FixedClearableHashMapCell
     using State = ClearableHashSetState;
 
     using value_type = PairNoInit<Key, Mapped>;
+    using mapped_type = Mapped;
+
     UInt32 version;
     Mapped mapped;
 
@@ -18,11 +20,12 @@ struct FixedClearableHashMapCell
     FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
     FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
 
-    Mapped & getSecond() { return mapped; }
-    const Mapped & getSecond() const { return mapped; }
+    const VoidKey getKey() const { return {}; }
+    Mapped & getMapped() { return mapped; }
+    const Mapped & getMapped() const { return mapped; }
+
     bool isZero(const State & state) const { return version != state.version; }
     void setZero() { version = 0; }
-    static constexpr bool need_zero_value_storage = false;
 
     struct CellExt
     {
@@ -35,32 +38,33 @@ struct FixedClearableHashMapCell
         }
         Key key;
         FixedClearableHashMapCell * ptr;
-        const Key & getFirst() const { return key; }
-        Mapped & getSecond() { return ptr->mapped; }
-        const Mapped & getSecond() const { return *ptr->mapped; }
+        const Key & getKey() const { return key; }
+        Mapped & getMapped() { return ptr->mapped; }
+        const Mapped & getMapped() const { return *ptr->mapped; }
         const value_type getValue() const { return {key, *ptr->mapped}; }
     };
 };
 
 
 template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
-class FixedClearableHashMap : public FixedHashMap<Key, FixedClearableHashMapCell<Key, Mapped>, Allocator>
+class FixedClearableHashMap : public FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>
 {
 public:
-    using key_type = Key;
-    using mapped_type = Mapped;
-    using value_type = typename FixedClearableHashMap::cell_type::value_type;
+    using Base = FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>;
+    using Self = FixedClearableHashMap;
+    using LookupResult = typename Base::LookupResult;
 
-    mapped_type & operator[](Key x)
+    using Base::Base;
+
+    Mapped & operator[](const Key & x)
     {
-        typename FixedClearableHashMap::iterator it;
+        LookupResult it;
         bool inserted;
         this->emplace(x, it, inserted);
-
         if (inserted)
-            new (&it->second) mapped_type();
+            new (&it->getMapped()) Mapped();
 
-        return it->second;
+        return it->getMapped();
     }
 
     void clear()
diff --git a/dbms/src/Common/HashTable/FixedClearableHashSet.h b/dbms/src/Common/HashTable/FixedClearableHashSet.h
index 063798ae370..32cb6df924a 100644
--- a/dbms/src/Common/HashTable/FixedClearableHashSet.h
+++ b/dbms/src/Common/HashTable/FixedClearableHashSet.h
@@ -10,19 +10,23 @@ struct FixedClearableHashTableCell
     using State = ClearableHashSetState;
 
     using value_type = Key;
-    using mapped_type = void;
+    using mapped_type = VoidMapped;
     UInt32 version;
 
     FixedClearableHashTableCell() {}
     FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
 
+    const VoidKey getKey() const { return {}; }
+    VoidMapped getMapped() const { return {}; }
+
     bool isZero(const State & state) const { return version != state.version; }
     void setZero() { version = 0; }
-    static constexpr bool need_zero_value_storage = false;
 
     struct CellExt
     {
         Key key;
+        const VoidKey getKey() const { return {}; }
+        VoidMapped getMapped() const { return {}; }
         const value_type & getValue() const { return key; }
         void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
     };
@@ -34,8 +38,6 @@ class FixedClearableHashSet : public FixedHashTable<Key, FixedClearableHashTable
 {
 public:
     using Base = FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>;
-    using key_type = Key;
-    using value_type = typename FixedClearableHashSet::cell_type::value_type;
     using LookupResult = typename Base::LookupResult;
 
     void clear()
diff --git a/dbms/src/Common/HashTable/FixedHashMap.h b/dbms/src/Common/HashTable/FixedHashMap.h
index 986b4af67c0..15c315bc6d6 100644
--- a/dbms/src/Common/HashTable/FixedHashMap.h
+++ b/dbms/src/Common/HashTable/FixedHashMap.h
@@ -13,18 +13,19 @@ struct FixedHashMapCell
     using value_type = PairNoInit<Key, Mapped>;
     using mapped_type = TMapped;
 
-    Mapped mapped;
     bool full;
+    Mapped mapped;
 
     FixedHashMapCell() {}
     FixedHashMapCell(const Key &, const State &) : full(true) {}
     FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
 
-    Mapped & getSecond() { return mapped; }
-    const Mapped & getSecond() const { return mapped; }
+    const VoidKey getKey() const { return {}; }
+    Mapped & getMapped() { return mapped; }
+    const Mapped & getMapped() const { return mapped; }
+
     bool isZero(const State &) const { return !full; }
     void setZero() { full = false; }
-    static constexpr bool need_zero_value_storage = false;
 
     /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
     ///  Note that we have to assemble a continuous layout for the value_type on each call of getValue().
@@ -40,36 +41,23 @@ struct FixedHashMapCell
         Key key;
         FixedHashMapCell * ptr;
 
-        const Key & getFirst() const { return key; }
-        Mapped & getSecond() { return ptr->mapped; }
-        const Mapped & getSecond() const { return ptr->mapped; }
+        const Key & getKey() const { return key; }
+        Mapped & getMapped() { return ptr->mapped; }
+        const Mapped & getMapped() const { return ptr->mapped; }
         const value_type getValue() const { return {key, ptr->mapped}; }
     };
 };
 
-template<typename Key, typename Mapped, typename State>
-ALWAYS_INLINE inline void * lookupResultGetKey(FixedHashMapCell<Key, Mapped, State> *)
-{ return nullptr; }
-
-template<typename Key, typename Mapped, typename State>
-ALWAYS_INLINE inline auto lookupResultGetMapped(FixedHashMapCell<Key, Mapped, State> * cell)
-{ return &cell->getSecond(); }
-
-template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
-class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>
+template <typename Key, typename Mapped, typename Cell = FixedHashMapCell<Key, Mapped>, typename Allocator = HashTableAllocator>
+class FixedHashMap : public FixedHashTable<Key, Cell, Allocator>
 {
 public:
-    using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>;
+    using Base = FixedHashTable<Key, Cell, Allocator>;
     using Self = FixedHashMap;
-    using key_type = Key;
-    using Cell = typename Base::cell_type;
-    using value_type = typename Cell::value_type;
-    using mapped_type = typename Cell::Mapped;
+    using LookupResult = typename Base::LookupResult;
 
     using Base::Base;
 
-    using LookupResult = typename Base::LookupResult;
-
     template <typename Func>
     void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
     {
@@ -77,8 +65,8 @@ public:
         {
             typename Self::LookupResult res_it;
             bool inserted;
-            that.emplace(it->getFirst(), res_it, inserted, it.getHash());
-            func(*lookupResultGetMapped(res_it), it->getSecond(), inserted);
+            that.emplace(it->getKey(), res_it, inserted, it.getHash());
+            func(res_it->getMapped(), it->getMapped(), inserted);
         }
     }
 
@@ -87,11 +75,11 @@ public:
     {
         for (auto it = this->begin(), end = this->end(); it != end; ++it)
         {
-            auto res_it = that.find(it->getFirst(), it.getHash());
+            auto res_it = that.find(it->getKey(), it.getHash());
             if (!res_it)
-                func(it->getSecond(), it->getSecond(), false);
+                func(it->getMapped(), it->getMapped(), false);
             else
-                func(*lookupResultGetMapped(res_it), it->getSecond(), true);
+                func(res_it->getMapped(), it->getMapped(), true);
         }
     }
 
@@ -99,24 +87,24 @@ public:
     void forEachValue(Func && func)
     {
         for (auto & v : *this)
-            func(v.getFirst(), v.getSecond());
+            func(v.getKey(), v.getMapped());
     }
 
     template <typename Func>
     void forEachMapped(Func && func)
     {
         for (auto & v : *this)
-            func(v.getSecond());
+            func(v.getMapped());
     }
 
-    mapped_type & ALWAYS_INLINE operator[](Key x)
+    Mapped & ALWAYS_INLINE operator[](const Key & x)
     {
-        typename Base::LookupResult it;
+        LookupResult it;
         bool inserted;
         this->emplace(x, it, inserted);
         if (inserted)
-            new (it) mapped_type();
+            new (&it->getMapped()) Mapped();
 
-        return it;
+        return it->getMapped();
     }
 };
diff --git a/dbms/src/Common/HashTable/FixedHashSet.h b/dbms/src/Common/HashTable/FixedHashSet.h
index 14e92b5c5fd..ce3666944dd 100644
--- a/dbms/src/Common/HashTable/FixedHashSet.h
+++ b/dbms/src/Common/HashTable/FixedHashSet.h
@@ -6,14 +6,15 @@ template <typename Key, typename Allocator = HashTableAllocator>
 class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>
 {
 public:
-    using Base = FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>;
+    using Cell = FixedHashTableCell<Key>;
+    using Base = FixedHashTable<Key, Cell, Allocator>;
     using Self = FixedHashSet;
 
     void merge(const Self & rhs)
     {
         for (size_t i = 0; i < Base::BUFFER_SIZE; ++i)
             if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this))
-                Base::buf[i] = rhs.buf[i];
+                new (&Base::buf[i]) Cell(rhs.buf[i]);
     }
 
     /// NOTE: Currently this method isn't used. When it does, the ReadBuffer should
diff --git a/dbms/src/Common/HashTable/FixedHashTable.h b/dbms/src/Common/HashTable/FixedHashTable.h
index aadce906dc2..5779eaa4981 100644
--- a/dbms/src/Common/HashTable/FixedHashTable.h
+++ b/dbms/src/Common/HashTable/FixedHashTable.h
@@ -8,12 +8,15 @@ struct FixedHashTableCell
     using State = TState;
 
     using value_type = Key;
-    using mapped_type = void;
+    using mapped_type = VoidMapped;
     bool full;
 
     FixedHashTableCell() {}
     FixedHashTableCell(const Key &, const State &) : full(true) {}
 
+    const VoidKey getKey() const { return {}; }
+    VoidMapped getMapped() const { return {}; }
+
     bool isZero(const State &) const { return !full; }
     void setZero() { full = false; }
     static constexpr bool need_zero_value_storage = false;
@@ -28,6 +31,8 @@ struct FixedHashTableCell
     {
         Key key;
 
+        const VoidKey getKey() const { return {}; }
+        VoidMapped getMapped() const { return {}; }
         const value_type & getValue() const { return key; }
         void update(Key && key_, FixedHashTableCell *) { key = key_; }
     };
@@ -53,7 +58,7 @@ struct FixedHashTableCell
 template <typename Key, typename Cell, typename Allocator>
 class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State
 {
-    static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8);
+    static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
 
 protected:
     friend class const_iterator;
@@ -61,12 +66,11 @@ protected:
     friend class Reader;
 
     using Self = FixedHashTable;
-    using cell_type = Cell;
 
     size_t m_size = 0; /// Amount of elements
-    Cell * buf; /// A piece of memory for all elements except the element with zero key.
+    Cell * buf; /// A piece of memory for all elements.
 
-    void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); }
+    void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
 
     void free()
     {
@@ -111,7 +115,7 @@ protected:
             ++ptr;
 
             /// Skip empty cells in the main buffer.
-            auto buf_end = container->buf + container->BUFFER_SIZE;
+            auto buf_end = container->buf + container->NUM_CELLS;
             while (ptr < buf_end && ptr->isZero(*container))
                 ++ptr;
 
@@ -140,8 +144,9 @@ protected:
 
 public:
     using key_type = Key;
-    using value_type = typename Cell::value_type;
     using mapped_type = typename Cell::mapped_type;
+    using value_type = typename Cell::value_type;
+    using cell_type = Cell;
 
     using LookupResult = Cell *;
     using ConstLookupResult = const Cell *;
@@ -239,7 +244,7 @@ public:
             return end();
 
         const Cell * ptr = buf;
-        auto buf_end = buf + BUFFER_SIZE;
+        auto buf_end = buf + NUM_CELLS;
         while (ptr < buf_end && ptr->isZero(*this))
             ++ptr;
 
@@ -254,21 +259,21 @@ public:
             return end();
 
         Cell * ptr = buf;
-        auto buf_end = buf + BUFFER_SIZE;
+        auto buf_end = buf + NUM_CELLS;
         while (ptr < buf_end && ptr->isZero(*this))
             ++ptr;
 
         return iterator(this, ptr);
     }
 
-    const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); }
+    const_iterator end() const { return const_iterator(this, buf + NUM_CELLS); }
     const_iterator cend() const { return end(); }
-    iterator end() { return iterator(this, buf + BUFFER_SIZE); }
+    iterator end() { return iterator(this, buf + NUM_CELLS); }
 
 
 public:
     /// The last parameter is unused but exists for compatibility with HashTable interface.
-    void ALWAYS_INLINE emplace(Key x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
+    void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
     {
         it = &buf[x];
 
@@ -288,40 +293,31 @@ public:
         std::pair<LookupResult, bool> res;
         emplace(Cell::getKey(x), res.first, res.second);
         if (res.second)
-            insertSetMapped(lookupResultGetMapped(res.first), x);
+            insertSetMapped(res.first->getMapped(), x);
 
         return res;
     }
 
-    LookupResult ALWAYS_INLINE find(Key x)
-    {
-        return !buf[x].isZero(*this) ? &buf[x] : nullptr;
-    }
+    LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; }
 
-    ConstLookupResult ALWAYS_INLINE find(Key x) const
-    {
-        return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
-    }
+    ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
 
-    LookupResult ALWAYS_INLINE find(Key, size_t hash_value)
-    {
-        return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr;
-    }
+    LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; }
 
-    ConstLookupResult ALWAYS_INLINE find(Key key, size_t hash_value) const
+    ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const
     {
         return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
     }
 
-    bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); }
-    bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
+    bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
+    bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
 
     void write(DB::WriteBuffer & wb) const
     {
         Cell::State::write(wb);
         DB::writeVarUInt(m_size, wb);
 
-        for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
+        for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
             if (!ptr->isZero(*this))
             {
                 DB::writeVarUInt(ptr - buf);
@@ -334,7 +330,7 @@ public:
         Cell::State::writeText(wb);
         DB::writeText(m_size, wb);
 
-        for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
+        for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
         {
             if (!ptr->isZero(*this))
             {
@@ -393,7 +389,7 @@ public:
         destroyElements();
         m_size = 0;
 
-        memset(static_cast<void *>(buf), 0, BUFFER_SIZE * sizeof(*buf));
+        memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
     }
 
     /// After executing this function, the table can only be destroyed,
@@ -405,9 +401,9 @@ public:
         free();
     }
 
-    size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); }
+    size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }
 
-    size_t getBufferSizeInCells() const { return BUFFER_SIZE; }
+    size_t getBufferSizeInCells() const { return NUM_CELLS; }
 
 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
     size_t getCollisions() const { return 0; }
diff --git a/dbms/src/Common/HashTable/HashMap.h b/dbms/src/Common/HashTable/HashMap.h
index f273d5bcdc7..cdc4a003af8 100644
--- a/dbms/src/Common/HashTable/HashMap.h
+++ b/dbms/src/Common/HashTable/HashMap.h
@@ -52,12 +52,13 @@ struct HashMapCell
     HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
     HashMapCell(const value_type & value_, const State &) : value(value_) {}
 
-    const Key & getFirst() const { return value.first; }
-    Mapped & getSecond() { return value.second; }
-    const Mapped & getSecond() const { return value.second; }
-
+    /// Get the key (externally).
+    const Key & getKey() const { return value.first; }
+    Mapped & getMapped() { return value.second; }
+    const Mapped & getMapped() const { return value.second; }
     const value_type & getValue() const { return value; }
 
+    /// Get the key (internally).
     static const Key & getKey(const value_type & value) { return value.first; }
 
     bool keyEquals(const Key & key_) const { return value.first == key_; }
@@ -110,15 +111,6 @@ struct HashMapCell
     }
 };
 
-template<typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCell<Key, Mapped, Hash, State> * cell)
-{ return &cell->getFirst(); }
-
-template<typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCell<Key, Mapped, Hash, State> * cell)
-{ return &cell->getSecond(); }
-
-
 template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
 struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
 {
@@ -136,15 +128,6 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
     size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
 };
 
-template<typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
-{ return &cell->getFirst(); }
-
-template<typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
-{ return &cell->getSecond(); }
-
-
 template <
     typename Key,
     typename Cell,
@@ -156,14 +139,9 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
 public:
     using Self = HashMapTable;
     using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
-
-    using key_type = Key;
-    using value_type = typename Cell::value_type;
-    using mapped_type = typename Cell::Mapped;
-
     using LookupResult = typename Base::LookupResult;
 
-    using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
+    using Base::Base;
 
     /// Merge every cell's value of current map into the destination map via emplace.
     ///  Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
@@ -178,8 +156,8 @@ public:
         {
             typename Self::LookupResult res_it;
             bool inserted;
-            that.emplace(it->getFirst(), res_it, inserted, it.getHash());
-            func(*lookupResultGetMapped(res_it), it->getSecond(), inserted);
+            that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash());
+            func(res_it->getMapped(), it->getMapped(), inserted);
         }
     }
 
@@ -193,11 +171,11 @@ public:
     {
         for (auto it = this->begin(), end = this->end(); it != end; ++it)
         {
-            auto res_it = that.find(it->getFirst(), it.getHash());
+            auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash());
             if (!res_it)
-                func(it->getSecond(), it->getSecond(), false);
+                func(it->getMapped(), it->getMapped(), false);
             else
-                func(*lookupResultGetMapped(res_it), it->getSecond(), true);
+                func(res_it->getMapped(), it->getMapped(), true);
         }
     }
 
@@ -206,7 +184,7 @@ public:
     void forEachValue(Func && func)
     {
         for (auto & v : *this)
-            func(v.getFirst(), v.getSecond());
+            func(v.getKey(), v.getMapped());
     }
 
     /// Call func(Mapped &) for each hash map element.
@@ -214,12 +192,12 @@ public:
     void forEachMapped(Func && func)
     {
         for (auto & v : *this)
-            func(v.getSecond());
+            func(v.getMapped());
     }
 
-    mapped_type & ALWAYS_INLINE operator[](Key x)
+    typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
     {
-        typename HashMapTable::LookupResult it;
+        LookupResult it;
         bool inserted;
         this->emplace(x, it, inserted);
 
@@ -238,9 +216,9 @@ public:
           *  the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
           */
         if (inserted)
-            new(lookupResultGetMapped(it)) mapped_type();
+            new (&it->getMapped()) typename Cell::Mapped();
 
-        return *lookupResultGetMapped(it);
+        return it->getMapped();
     }
 };
 
diff --git a/dbms/src/Common/HashTable/HashSet.h b/dbms/src/Common/HashTable/HashSet.h
index 4b3aa5204ea..2589329e5ef 100644
--- a/dbms/src/Common/HashTable/HashSet.h
+++ b/dbms/src/Common/HashTable/HashSet.h
@@ -84,14 +84,6 @@ struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState>
     size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
 };
 
-template<typename Key, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookupResultGetKey(HashSetCellWithSavedHash<Key, Hash, State> * cell)
-{ return &cell->key; }
-
-template<typename Key, typename Hash, typename State>
-ALWAYS_INLINE inline void * lookupResultGetMapped(HashSetCellWithSavedHash<Key, Hash, State> *)
-{ return nullptr; }
-
 template
 <
     typename Key,
diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index 398b4b594da..5521cc043ad 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -78,66 +78,48 @@ void set(T & x) { x = 0; }
 }
 
 /**
-  * lookupResultGetKey/Mapped -- functions to get key/"mapped" values from the
-  * LookupResult returned by find() and emplace() methods of HashTable.
-  * Must not be called for a null LookupResult.
+  * getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and
+  * emplace() methods of HashTable. Must not be called for a null LookupResult.
   *
-  * We don't use iterators for lookup result to avoid creating temporary
-  * objects. Instead, LookupResult is a pointer of some kind. There are global
-  * functions lookupResultGetKey/Mapped, overloaded for this pointer type, that
-  * return pointers to key/"mapped" values. They are implemented as global
-  * functions and not as methods, because they have to be overloaded for POD
-  * types, e.g. in StringHashTable where different components have different
-  * Cell format.
+  * We don't use iterators for lookup result. Instead, LookupResult is a pointer of some kind. There
+  * are methods getKey/Mapped, that return references or values to key/"mapped" values.
   *
-  * Different hash table implementations support this interface to a varying
-  * degree:
+  * Different hash table implementations support this interface to a varying degree:
   *
-  * 1) Hash tables that store neither the key in its original form, nor a
-  *    "mapped" value: FixedHashTable or StringHashTable.
-  *    Neither GetKey nor GetMapped are supported, the only valid operation is
-  *    checking LookupResult for null.
+  * 1) Hash tables that store neither the key in its original form, nor a "mapped" value:
+  *    FixedHashTable or StringHashTable. Neither GetKey nor GetMapped are supported, the only valid
+  *    operation is checking LookupResult for null.
   *
-  * 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap.
-  *    Only GetMapped is supported.
+  * 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. Only GetMapped is
+  *    supported.
   *
-  * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the
-  *    normal HashTable.
-  *    GetKey returns the key, and GetMapped returns a zero void pointer. This
-  *    simplifies generic code that works with mapped values: it can overload
-  *    on the return type of GetMapped(), and doesn't need other parameters. One
-  *    example is insertSetMapped() function.
+  * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the normal HashTable.
+  *    GetKey returns the key, and GetMapped returns a zero void pointer. This simplifies generic
+  *    code that works with mapped values: it can overload on the return type of GetMapped(), and
+  *    doesn't need other parameters. One example is insertSetMapped() function.
   *
-  * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap.
-  *    Both GetKey and GetMapped are supported.
+  * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. Both GetKey and
+  *    GetMapped are supported.
   *
   * The implementation side goes as follows:
-  * for (1), LookupResult = void *, no getters;
-  * for (2), LookupResult = Mapped *, GetMapped is a default implementation that
-  * takes any pointer-like object;
-  * for (3) and (4), LookupResult = Cell *, and both getters are implemented.
-  * They have to be specialized for each particular Cell class to supersede the
-  * default verision that takes a generic pointer-like object.
+  *
+  * for (1), LookupResult->getKey = const VoidKey, LookupResult->getMapped = VoidMapped;
+  *
+  * for (2), LookupResult->getKey = const VoidKey, LookupResult->getMapped = Mapped &;
+  *
+  * for (3) and (4), LookupResult->getKey = const Key [&], LookupResult->getMapped = Mapped &;
+  * VoidKey and VoidMapped may have specialized function overloads for generic code.
   */
 
-/**
-  * The default implementation of GetMapped that is used for the above case (2).
-  */
-template<typename PointerLike>
-ALWAYS_INLINE inline auto lookupResultGetMapped(PointerLike && ptr) { return &*ptr; }
-
-/**
-  * Generic const wrapper for lookupResultGetMapped, that calls a non-const
-  * version. Should be safe, given that these functions only do pointer
-  * arithmetics.
-  */
-template<typename T>
-ALWAYS_INLINE inline auto lookupResultGetMapped(const T * obj)
+struct VoidKey {};
+struct VoidMapped
 {
-    auto mapped_ptr = lookupResultGetMapped(const_cast<T *>(obj));
-    const auto const_mapped_ptr = mapped_ptr;
-    return const_mapped_ptr;
-}
+    template <typename T>
+    auto & operator=(const T &)
+    {
+        return *this;
+    }
+};
 
 /** Compile-time interface for cell of the hash table.
   * Different cell types are used to implement different hash tables.
@@ -152,7 +134,7 @@ struct HashTableCell
 
     using key_type = Key;
     using value_type = Key;
-    using mapped_type = void;
+    using mapped_type = VoidMapped;
 
     Key key;
 
@@ -161,10 +143,12 @@ struct HashTableCell
     /// Create a cell with the given key / key and value.
     HashTableCell(const Key & key_, const State &) : key(key_) {}
 
-    /// Get what the value_type of the container will be.
+    /// Get the key (externally).
+    const Key & getKey() const { return key; }
+    VoidMapped getMapped() const { return {}; }
     const value_type & getValue() const { return key; }
 
-    /// Get the key.
+    /// Get the key (internally).
     static const Key & getKey(const value_type & value) { return value; }
 
     /// Are the keys at the cells equal?
@@ -207,23 +191,15 @@ struct HashTableCell
     void readText(DB::ReadBuffer & rb)    { DB::readDoubleQuoted(key, rb); }
 };
 
-template<typename Key, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookupResultGetKey(HashTableCell<Key, Hash, State> * cell)
-{ return &cell->key; }
-
-template<typename Key, typename Hash, typename State>
-ALWAYS_INLINE inline void * lookupResultGetMapped(HashTableCell<Key, Hash, State> *)
-{ return nullptr; }
-
 /**
   * A helper function for HashTable::insert() to set the "mapped" value.
-  * Overloaded on the mapped type, does nothing if it's void.
+  * Overloaded on the mapped type, does nothing if it's VoidMapped.
   */
 template <typename ValueType>
-void insertSetMapped(void * /* dest */, const ValueType & /* src */) {}
+void insertSetMapped(VoidMapped /* dest */, const ValueType & /* src */) {}
 
 template <typename MappedType, typename ValueType>
-void insertSetMapped(MappedType * dest, const ValueType & src) { *dest = src.second; }
+void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.second; }
 
 
 /** Determines the size of the hash table, and when and how much it should be resized.
@@ -276,7 +252,7 @@ struct HashTableGrower
 /** When used as a Grower, it turns a hash table into something like a lookup table.
   * It remains non-optimal - the cells store the keys.
   * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
-  * TODO Make a proper lookup table.
+  * NOTE: Better to use FixedHashTable instead.
   */
 template <size_t key_bits>
 struct HashTableFixedGrower
@@ -366,7 +342,6 @@ protected:
 
     using HashValue = size_t;
     using Self = HashTable;
-    using cell_type = Cell;
 
     size_t m_size = 0;        /// Amount of elements
     Cell * buf;               /// A piece of memory for all elements except the element with zero key.
@@ -586,9 +561,10 @@ protected:
 
 public:
     using key_type = Key;
+    using mapped_type = typename Cell::mapped_type;
     using value_type = typename Cell::value_type;
+    using cell_type = Cell;
 
-    // Use lookupResultGetMapped/Key to work with these values.
     using LookupResult = Cell *;
     using ConstLookupResult = const Cell *;
 
@@ -751,7 +727,7 @@ protected:
     /// If the key is zero, insert it into a special place and return true.
     /// We don't have to persist a zero key, because it's not actually inserted.
     /// That's why we just take a Key by value, an not a key holder.
-    bool ALWAYS_INLINE emplaceIfZero(Key x, LookupResult & it, bool & inserted, size_t hash_value)
+    bool ALWAYS_INLINE emplaceIfZero(const Key & x, LookupResult & it, bool & inserted, size_t hash_value)
     {
         /// If it is claimed that the zero key can not be inserted into the table.
         if (!Cell::need_zero_value_storage)
@@ -793,7 +769,7 @@ protected:
         keyHolderPersistKey(key_holder);
         const auto & key = keyHolderGetKey(key_holder);
 
-        new(&buf[place_value]) Cell(key, *this);
+        new (&buf[place_value]) Cell(key, *this);
         buf[place_value].setHash(hash_value);
         inserted = true;
         ++m_size;
@@ -846,7 +822,7 @@ public:
         }
 
         if (res.second)
-            insertSetMapped(lookupResultGetMapped(res.first), x);
+            insertSetMapped(res.first->getMapped(), x);
 
         return res;
     }
@@ -869,11 +845,11 @@ public:
       *
       * Example usage:
       *
-      * Map::iterator it;
+      * Map::LookupResult it;
       * bool inserted;
       * map.emplace(key, it, inserted);
       * if (inserted)
-      *     new(&it->second) Mapped(value);
+      *     new (&it->getMapped()) Mapped(value);
       */
     template <typename KeyHolder>
     void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
@@ -903,7 +879,7 @@ public:
             resize();
     }
 
-    LookupResult ALWAYS_INLINE find(Key x)
+    LookupResult ALWAYS_INLINE find(const Key & x)
     {
         if (Cell::isZero(x, *this))
             return this->hasZero() ? this->zeroValue() : nullptr;
@@ -913,12 +889,12 @@ public:
         return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
     }
 
-    ConstLookupResult ALWAYS_INLINE find(Key x) const
+    ConstLookupResult ALWAYS_INLINE find(const Key & x) const
     {
         return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
     }
 
-    LookupResult ALWAYS_INLINE find(Key x, size_t hash_value)
+    LookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value)
     {
         if (Cell::isZero(x, *this))
             return this->hasZero() ? this->zeroValue() : nullptr;
@@ -927,7 +903,12 @@ public:
         return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
     }
 
-    bool ALWAYS_INLINE has(Key x) const
+    ConstLookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value) const
+    {
+        return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
+    }
+
+    bool ALWAYS_INLINE has(const Key & x) const
     {
         if (Cell::isZero(x, *this))
             return this->hasZero();
@@ -938,7 +919,7 @@ public:
     }
 
 
-    bool ALWAYS_INLINE has(Key x, size_t hash_value) const
+    bool ALWAYS_INLINE has(const Key & x, size_t hash_value) const
     {
         if (Cell::isZero(x, *this))
             return this->hasZero();
diff --git a/dbms/src/Common/HashTable/SmallTable.h b/dbms/src/Common/HashTable/SmallTable.h
index 8f02c29c31e..d9d0fb19a2f 100644
--- a/dbms/src/Common/HashTable/SmallTable.h
+++ b/dbms/src/Common/HashTable/SmallTable.h
@@ -38,7 +38,6 @@ protected:
     friend class Reader;
 
     using Self = SmallTable;
-    using cell_type = Cell;
 
     size_t m_size = 0;        /// Amount of elements.
     Cell buf[capacity];       /// A piece of memory for all elements.
@@ -72,8 +71,9 @@ protected:
 
 public:
     using key_type = Key;
+    using mapped_type = typename Cell::mapped_type;
     using value_type = typename Cell::value_type;
-
+    using cell_type = Cell;
 
     class Reader final : private Cell::State
     {
@@ -391,16 +391,17 @@ class SmallMapTable : public SmallTable<Key, Cell, capacity>
 {
 public:
     using key_type = Key;
-    using mapped_type = typename Cell::Mapped;
+    using mapped_type = typename Cell::mapped_type;
     using value_type = typename Cell::value_type;
+    using cell_type = Cell;
 
     mapped_type & ALWAYS_INLINE operator[](Key x)
     {
         typename SmallMapTable::iterator it;
         bool inserted;
         this->emplace(x, it, inserted);
-        new(&it->getSecond()) mapped_type();
-        return it->getSecond();
+        new (&it->getMapped()) mapped_type();
+        return it->getMapped();
     }
 };
 
diff --git a/dbms/src/Common/HashTable/StringHashMap.h b/dbms/src/Common/HashTable/StringHashMap.h
index 4fcc46eee24..3ee59c89a36 100644
--- a/dbms/src/Common/HashTable/StringHashMap.h
+++ b/dbms/src/Common/HashTable/StringHashMap.h
@@ -8,43 +8,60 @@ template <typename Key, typename TMapped>
 struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
 {
     using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
+    using value_type = typename Base::value_type;
     using Base::Base;
     static constexpr bool need_zero_value_storage = false;
+    // external
+    const StringRef getKey() const { return toStringRef(this->value.first); }
+    // internal
+    static const Key & getKey(const value_type & value_) { return value_.first; }
 };
 
-template<typename Key, typename Mapped>
-auto lookupResultGetMapped(StringHashMapCell<Key, Mapped> * cell) { return &cell->getSecond(); }
-
 template <typename TMapped>
 struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
 {
     using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
+    using value_type = typename Base::value_type;
     using Base::Base;
     static constexpr bool need_zero_value_storage = false;
     bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
     // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
     static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; }
     void setZero() { this->value.first.low = 0; }
+    // external
+    const StringRef getKey() const { return toStringRef(this->value.first); }
+    // internal
+    static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
 };
 
 template <typename TMapped>
 struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
 {
     using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
+    using value_type = typename Base::value_type;
     using Base::Base;
     static constexpr bool need_zero_value_storage = false;
     bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
     // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
     static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; }
     void setZero() { this->value.first.a = 0; }
+    // external
+    const StringRef getKey() const { return toStringRef(this->value.first); }
+    // internal
+    static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
 };
 
 template <typename TMapped>
 struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
 {
     using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
+    using value_type = typename Base::value_type;
     using Base::Base;
     static constexpr bool need_zero_value_storage = false;
+    // external
+    using Base::getKey;
+    // internal
+    static const StringRef & getKey(const value_type & value_) { return value_.first; }
 };
 
 template <typename TMapped, typename Allocator>
@@ -61,13 +78,10 @@ template <typename TMapped, typename Allocator = HashTableAllocator>
 class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
 {
 public:
+    using Key = StringRef;
     using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
     using Self = StringHashMap;
-    using Key = StringRef;
-    using key_type = StringRef;
-    using mapped_type = TMapped;
-    using value_type = typename Base::Ts::value_type;
-    using LookupResult = mapped_type *;
+    using LookupResult = typename Base::LookupResult;
 
     using Base::Base;
 
@@ -80,18 +94,13 @@ public:
     template <typename Func>
     void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
     {
-        if (this->m0.hasZero())
+        if (this->m0.hasZero() && that.m0.hasZero())
+            func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
+        else if (this->m0.hasZero())
         {
-            const bool emplace_new_zero = !that.m0.hasZero();
-            if (emplace_new_zero)
-            {
-                that.m0.setHasZero();
-            }
-
-            func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(),
-                 emplace_new_zero);
+            that.m0.setHasZero();
+            func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
         }
-
         this->m1.mergeToViaEmplace(that.m1, func);
         this->m2.mergeToViaEmplace(that.m2, func);
         this->m3.mergeToViaEmplace(that.m3, func);
@@ -106,32 +115,25 @@ public:
     template <typename Func>
     void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
     {
-        if (this->m0.hasZero())
-        {
-            if (that.m0.hasZero())
-            {
-                func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true);
-            }
-            else
-            {
-                func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false);
-            }
-        }
-
+        if (this->m0.size() && that.m0.size())
+            func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
+        else if (this->m0.size())
+            func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
         this->m1.mergeToViaFind(that.m1, func);
         this->m2.mergeToViaFind(that.m2, func);
         this->m3.mergeToViaFind(that.m3, func);
         this->ms.mergeToViaFind(that.ms, func);
     }
 
-    mapped_type & ALWAYS_INLINE operator[](Key x)
+    TMapped & ALWAYS_INLINE operator[](const Key & x)
     {
+        LookupResult it;
         bool inserted;
-        LookupResult it = nullptr;
-        emplace(x, it, inserted);
+        this->emplace(x, it, inserted);
         if (inserted)
-            new (it) mapped_type();
-        return *it;
+            new (&it->getMapped()) TMapped();
+
+        return it->getMapped();
     }
 
     template <typename Func>
@@ -139,27 +141,27 @@ public:
     {
         if (this->m0.size())
         {
-            func(StringRef{}, this->m0.zeroValue()->getSecond());
+            func(StringRef{}, this->m0.zeroValue()->getMapped());
         }
 
         for (auto & v : this->m1)
         {
-            func(toStringRef(v.getFirst()), v.getSecond());
+            func(v.getKey(), v.getMapped());
         }
 
         for (auto & v : this->m2)
         {
-            func(toStringRef(v.getFirst()), v.getSecond());
+            func(v.getKey(), v.getMapped());
         }
 
         for (auto & v : this->m3)
         {
-            func(toStringRef(v.getFirst()), v.getSecond());
+            func(v.getKey(), v.getMapped());
         }
 
         for (auto & v : this->ms)
         {
-            func(v.getFirst(), v.getSecond());
+            func(v.getKey(), v.getMapped());
         }
     }
 
@@ -167,14 +169,14 @@ public:
     void ALWAYS_INLINE forEachMapped(Func && func)
     {
         if (this->m0.size())
-            func(this->m0.zeroValue()->getSecond());
+            func(this->m0.zeroValue()->getMapped());
         for (auto & v : this->m1)
-            func(v.getSecond());
+            func(v.getMapped());
         for (auto & v : this->m2)
-            func(v.getSecond());
+            func(v.getMapped());
         for (auto & v : this->m3)
-            func(v.getSecond());
+            func(v.getMapped());
         for (auto & v : this->ms)
-            func(v.getSecond());
+            func(v.getMapped());
     }
 };
diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h
index b23edb396ae..d80b26c6a7c 100644
--- a/dbms/src/Common/HashTable/StringHashTable.h
+++ b/dbms/src/Common/HashTable/StringHashTable.h
@@ -3,9 +3,7 @@
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/HashTable.h>
 
-struct StringKey0
-{
-};
+#include <variant>
 
 using StringKey8 = UInt64;
 using StringKey16 = DB::UInt128;
@@ -112,7 +110,7 @@ public:
     using ConstLookupResult = const Cell *;
 
     template <typename KeyHolder>
-    void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */)
+    void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0)
     {
         if (!hasZero())
         {
@@ -125,11 +123,16 @@ public:
     }
 
     template <typename Key>
-    LookupResult ALWAYS_INLINE find(Key, size_t /* hash */)
+    LookupResult ALWAYS_INLINE find(const Key &, size_t = 0)
     {
         return hasZero() ? zeroValue() : nullptr;
     }
 
+    template <typename Key>
+    ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const
+    {
+        return hasZero() ? zeroValue() : nullptr;
+    }
 
     void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
     void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
@@ -148,6 +151,26 @@ struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
     void increaseSize() { this->size_degree += 1; }
 };
 
+template <typename Mapped>
+struct StringHashTableLookupResult
+{
+    Mapped * mapped_ptr;
+    StringHashTableLookupResult() {}
+    StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
+    StringHashTableLookupResult(std::nullptr_t) {}
+    const VoidKey getKey() const { return {}; }
+    auto & getMapped() { return *mapped_ptr; }
+    auto & operator*() { return *this; }
+    auto & operator*() const { return *this; }
+    auto * operator->() { return this; }
+    auto * operator->() const { return this; }
+    operator bool() const { return mapped_ptr; }
+    friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
+    friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
+    friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }
+    friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
+};
+
 template <typename SubMaps>
 class StringHashTable : private boost::noncopyable
 {
@@ -177,8 +200,12 @@ protected:
 public:
     using Key = StringRef;
     using key_type = Key;
+    using mapped_type = typename Ts::mapped_type;
     using value_type = typename Ts::value_type;
-    using LookupResult = typename Ts::mapped_type *;
+    using cell_type = typename Ts::cell_type;
+
+    using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>;
+    using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>;
 
     StringHashTable() {}
 
@@ -199,16 +226,15 @@ public:
     // 2. Use switch case extension to generate fast dispatching table
     // 3. Funcs are named callables that can be force_inlined
     // NOTE: It relies on Little Endianness
-    template <typename KeyHolder, typename Func>
-    decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
+    template <typename Self, typename KeyHolder, typename Func>
+    static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
     {
         const StringRef & x = keyHolderGetKey(key_holder);
         const size_t sz = x.size;
         if (sz == 0)
         {
-            static constexpr StringKey0 key0{};
             keyHolderDiscardKey(key_holder);
-            return func(m0, key0, 0);
+            return func(self.m0, VoidKey{}, 0);
         }
 
         const char * p = x.data;
@@ -239,7 +265,7 @@ public:
                     n[0] >>= s;
                 }
                 keyHolderDiscardKey(key_holder);
-                return func(m1, k8, hash(k8));
+                return func(self.m1, k8, hash(k8));
             }
             case 1: // 9..16 bytes
             {
@@ -248,7 +274,7 @@ public:
                 memcpy(&n[1], lp, 8);
                 n[1] >>= s;
                 keyHolderDiscardKey(key_holder);
-                return func(m2, k16, hash(k16));
+                return func(self.m2, k16, hash(k16));
             }
             case 2: // 17..24 bytes
             {
@@ -257,11 +283,11 @@ public:
                 memcpy(&n[2], lp, 8);
                 n[2] >>= s;
                 keyHolderDiscardKey(key_holder);
-                return func(m3, k24, hash(k24));
+                return func(self.m3, k24, hash(k24));
             }
             default: // >= 25 bytes
             {
-                return func(ms, std::forward<KeyHolder>(key_holder), hash(x));
+                return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
             }
         }
     }
@@ -279,14 +305,14 @@ public:
         {
             typename Map::LookupResult result;
             map.emplace(key_holder, result, inserted, hash);
-            mapped = lookupResultGetMapped(result);
+            mapped = &result->getMapped();
         }
     };
 
     template <typename KeyHolder>
     void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
     {
-        this->dispatch(key_holder, EmplaceCallable(it, inserted));
+        this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
     }
 
     struct FindCallable
@@ -295,15 +321,25 @@ public:
         // any key holders here, only with normal keys. The key type is still
         // different for every subtable, this is why it is a template parameter.
         template <typename Submap, typename SubmapKey>
-        LookupResult ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
+        auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
         {
-            return lookupResultGetMapped(map.find(key, hash));
+            return &map.find(key, hash)->getMapped();
         }
     };
 
-    LookupResult ALWAYS_INLINE find(Key x)
+    LookupResult ALWAYS_INLINE find(const Key & x)
     {
-        return dispatch(x, FindCallable{});
+        return dispatch(*this, x, FindCallable{});
+    }
+
+    ConstLookupResult ALWAYS_INLINE find(const Key & x) const
+    {
+        return dispatch(*this, x, FindCallable{});
+    }
+
+    bool ALWAYS_INLINE has(const Key & x, size_t = 0) const
+    {
+        return dispatch(*this, x, FindCallable{}) != nullptr;
     }
 
     void write(DB::WriteBuffer & wb) const
diff --git a/dbms/src/Common/HashTable/TwoLevelHashMap.h b/dbms/src/Common/HashTable/TwoLevelHashMap.h
index f90cb6d2306..7bebf0d8af5 100644
--- a/dbms/src/Common/HashTable/TwoLevelHashMap.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashMap.h
@@ -16,10 +16,6 @@ template
 class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>
 {
 public:
-    using key_type = Key;
-    using mapped_type = typename Cell::Mapped;
-    using value_type = typename Cell::value_type;
-
     using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>;
     using LookupResult = typename Impl::LookupResult;
 
@@ -32,16 +28,16 @@ public:
             this->impls[i].forEachMapped(func);
     }
 
-    mapped_type & ALWAYS_INLINE operator[](Key x)
+    typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
     {
-        typename TwoLevelHashMapTable::LookupResult it;
+        LookupResult it;
         bool inserted;
         this->emplace(x, it, inserted);
 
         if (inserted)
-            new(lookupResultGetMapped(it)) mapped_type();
+            new (&it->getMapped()) typename Cell::Mapped();
 
-        return *lookupResultGetMapped(it);
+        return it->getMapped();
     }
 };
 
diff --git a/dbms/src/Common/HashTable/TwoLevelHashTable.h b/dbms/src/Common/HashTable/TwoLevelHashTable.h
index 988fa139caa..e6e2ce45639 100644
--- a/dbms/src/Common/HashTable/TwoLevelHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h
@@ -82,7 +82,9 @@ protected:
 
 public:
     using key_type = typename Impl::key_type;
+    using mapped_type = typename Impl::mapped_type;
     using value_type = typename Impl::value_type;
+    using cell_type = typename Impl::cell_type;
 
     using LookupResult = typename Impl::LookupResult;
     using ConstLookupResult = typename Impl::ConstLookupResult;
@@ -217,7 +219,7 @@ public:
         emplace(Cell::getKey(x), res.first, res.second, hash_value);
 
         if (res.second)
-            insertSetMapped(lookupResultGetMapped(res.first), x);
+            insertSetMapped(res.first->getMapped(), x);
 
         return res;
     }
diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashMap.h b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h
index 29bc4b394a7..55d54e51b6a 100644
--- a/dbms/src/Common/HashTable/TwoLevelStringHashMap.h
+++ b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h
@@ -8,16 +8,12 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMap
 {
 public:
     using Key = StringRef;
-    using key_type = Key;
     using Self = TwoLevelStringHashMap;
     using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
-    using Base::Base;
-    using typename Base::Impl;
-    using mapped_type = TMapped;
-    using value_type = typename Base::value_type;
-
     using LookupResult = typename Base::LookupResult;
 
+    using Base::Base;
+
     template <typename Func>
     void ALWAYS_INLINE forEachMapped(Func && func)
     {
@@ -25,13 +21,13 @@ public:
             return this->impls[i].forEachMapped(func);
     }
 
-    mapped_type & ALWAYS_INLINE operator[](Key x)
+    TMapped & ALWAYS_INLINE operator[](const Key & x)
     {
         bool inserted;
         LookupResult it;
-        emplace(x, it, inserted);
+        this->emplace(x, it, inserted);
         if (inserted)
-            new (lookupResultGetMapped(it)) mapped_type();
-        return *lookupResultGetMapped(it);
+            new (&it->getMapped()) TMapped();
+        return it->getMapped();
     }
 };
diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h
index 2aeb266c66b..88241c6c5fe 100644
--- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h
@@ -19,8 +19,7 @@ public:
     // TODO: currently hashing contains redundant computations when doing distributed or external aggregations
     size_t hash(const Key & x) const
     {
-        return const_cast<Self &>(*this).dispatch(x,
-            [&](const auto &, const auto &, size_t hash) { return hash; });
+        return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; });
     }
 
     size_t operator()(const Key & x) const { return hash(x); }
@@ -30,8 +29,12 @@ public:
 
 public:
     using key_type = typename Impl::key_type;
+    using mapped_type = typename Impl::mapped_type;
     using value_type = typename Impl::value_type;
+    using cell_type = typename Impl::cell_type;
+
     using LookupResult = typename Impl::LookupResult;
+    using ConstLookupResult = typename Impl::ConstLookupResult;
 
     Impl impls[NUM_BUCKETS];
 
@@ -71,16 +74,15 @@ public:
 
     // This function is mostly the same as StringHashTable::dispatch, but with
     // added bucket computation. See the comments there.
-    template <typename Func, typename KeyHolder>
-    decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
+    template <typename Self, typename Func, typename KeyHolder>
+    static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
     {
         const StringRef & x = keyHolderGetKey(key_holder);
         const size_t sz = x.size;
         if (sz == 0)
         {
-            static constexpr StringKey0 key0{};
             keyHolderDiscardKey(key_holder);
-            return func(impls[0].m0, key0, 0);
+            return func(self.impls[0].m0, VoidKey{}, 0);
         }
 
         const char * p = x.data;
@@ -113,7 +115,7 @@ public:
                 auto res = hash(k8);
                 auto buck = getBucketFromHash(res);
                 keyHolderDiscardKey(key_holder);
-                return func(impls[buck].m1, k8, res);
+                return func(self.impls[buck].m1, k8, res);
             }
             case 1:
             {
@@ -124,7 +126,7 @@ public:
                 auto res = hash(k16);
                 auto buck = getBucketFromHash(res);
                 keyHolderDiscardKey(key_holder);
-                return func(impls[buck].m2, k16, res);
+                return func(self.impls[buck].m2, k16, res);
             }
             case 2:
             {
@@ -135,13 +137,13 @@ public:
                 auto res = hash(k24);
                 auto buck = getBucketFromHash(res);
                 keyHolderDiscardKey(key_holder);
-                return func(impls[buck].m3, k24, res);
+                return func(self.impls[buck].m3, k24, res);
             }
             default:
             {
                 auto res = hash(x);
                 auto buck = getBucketFromHash(res);
-                return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
+                return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
             }
         }
     }
@@ -149,12 +151,17 @@ public:
     template <typename KeyHolder>
     void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
     {
-        dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted});
+        dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted});
     }
 
-    LookupResult ALWAYS_INLINE find(Key x)
+    LookupResult ALWAYS_INLINE find(const Key x)
     {
-        return dispatch(x, typename Impl::FindCallable{});
+        return dispatch(*this, x, typename Impl::FindCallable{});
+    }
+
+    ConstLookupResult ALWAYS_INLINE find(const Key x) const
+    {
+        return dispatch(*this, x, typename Impl::FindCallable{});
     }
 
     void write(DB::WriteBuffer & wb) const
diff --git a/dbms/src/Common/SpaceSaving.h b/dbms/src/Common/SpaceSaving.h
index 93ddfee6b19..9ad7f6275d6 100644
--- a/dbms/src/Common/SpaceSaving.h
+++ b/dbms/src/Common/SpaceSaving.h
@@ -369,7 +369,7 @@ private:
         if (!it)
             return nullptr;
 
-        return *lookupResultGetMapped(it);
+        return it->getMapped();
     }
 
     void rebuildCounterMap()
diff --git a/dbms/src/Common/tests/auto_array.cpp b/dbms/src/Common/tests/auto_array.cpp
index fd23afc0236..8cc332200f7 100644
--- a/dbms/src/Common/tests/auto_array.cpp
+++ b/dbms/src/Common/tests/auto_array.cpp
@@ -155,10 +155,10 @@ int main(int argc, char ** argv)
             map.emplace(rand(), it, inserted);
             if (inserted)
             {
-                new(lookupResultGetMapped(it)) Arr(n);
+                new (&it->getMapped()) Arr(n);
 
                 for (size_t j = 0; j < n; ++j)
-                    (*lookupResultGetMapped(it))[j] = field;
+                    (it->getMapped())[j] = field;
             }
         }
 
diff --git a/dbms/src/Common/tests/parallel_aggregation.cpp b/dbms/src/Common/tests/parallel_aggregation.cpp
index 7ecb054b481..21fc9179d21 100644
--- a/dbms/src/Common/tests/parallel_aggregation.cpp
+++ b/dbms/src/Common/tests/parallel_aggregation.cpp
@@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator
     {
         if (prev_it != end && *it == *prev_it)
         {
-            ++*lookupResultGetMapped(found);
+            ++found->getMapped();
             continue;
         }
         prev_it = it;
 
         bool inserted;
         map.emplace(*it, found, inserted);
-        ++*lookupResultGetMapped(found);
+        ++found->getMapped();
     }
 }
 
@@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_
     {
         if (*it == *prev_it)
         {
-            ++*lookupResultGetMapped(found);
+            ++found->getMapped();
             continue;
         }
         prev_it = it;
 
         bool inserted;
         map.emplace(*it, found, inserted);
-        ++*lookupResultGetMapped(found);
+        ++found->getMapped();
     }
 }
 
@@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket)
 {
     for (size_t i = 1; i < num_threads; ++i)
         for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it)
-            maps[0].impls[bucket][it->getFirst()] += it->getSecond();
+            maps[0].impls[bucket][it->getKey()] += it->getMapped();
 }
 
 void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end)
@@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_
         auto found = local_map.find(*it);
 
         if (found)
-            ++*lookupResultGetMapped(found);
+            ++found->getMapped();
         else if (local_map.size() < threshold)
             ++local_map[*it];    /// TODO You could do one lookup, not two.
         else
@@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const
         Map::LookupResult found;
         bool inserted;
         local_map.emplace(*it, found, inserted);
-        ++*lookupResultGetMapped(found);
+        ++found->getMapped();
 
         if (inserted && local_map.size() == threshold)
         {
             std::lock_guard<Mutex> lock(mutex);
             for (auto & value_type : local_map)
-                global_map[value_type.getFirst()] += value_type.getSecond();
+                global_map[value_type.getKey()] += value_type.getMapped();
 
             local_map.clear();
         }
@@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour
                 auto found = local_map.find(*it);
 
                 if (found)
-                    ++*lookupResultGetMapped(found);
+                    ++found->getMapped();
                 else
                 {
                     size_t hash_value = global_map.hash(*it);
@@ -311,7 +311,7 @@ int main(int argc, char ** argv)
 
         for (size_t i = 1; i < num_threads; ++i)
             for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
-                maps[0][it->getFirst()] += it->getSecond();
+                maps[0][it->getKey()] += it->getMapped();
 
         watch.stop();
         double time_merged = watch.elapsedSeconds();
@@ -365,7 +365,7 @@ int main(int argc, char ** argv)
 
         for (size_t i = 1; i < num_threads; ++i)
             for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
-                maps[0][it->getFirst()] += it->getSecond();
+                maps[0][it->getKey()] += it->getMapped();
 
         watch.stop();
 
@@ -435,7 +435,7 @@ int main(int argc, char ** argv)
                     continue;
 
                 finish = false;
-                maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond();
+                maps[0][iterators[i]->getKey()] += iterators[i]->getMapped();
                 ++iterators[i];
             }
 
@@ -623,7 +623,7 @@ int main(int argc, char ** argv)
 
         for (size_t i = 0; i < num_threads; ++i)
             for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
-                global_map[it->getFirst()] += it->getSecond();
+                global_map[it->getKey()] += it->getMapped();
 
         pool.wait();
 
@@ -689,7 +689,7 @@ int main(int argc, char ** argv)
 
         for (size_t i = 0; i < num_threads; ++i)
             for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
-                global_map[it->getFirst()] += it->getSecond();
+                global_map[it->getKey()] += it->getMapped();
 
         pool.wait();
 
@@ -760,7 +760,7 @@ int main(int argc, char ** argv)
 
         for (size_t i = 0; i < num_threads; ++i)
             for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
-                global_map[it->getFirst()] += it->getSecond();
+                global_map[it->getKey()] += it->getMapped();
 
         pool.wait();
 
diff --git a/dbms/src/Common/tests/parallel_aggregation2.cpp b/dbms/src/Common/tests/parallel_aggregation2.cpp
index 56eb34bbf0c..e46c9c7c4fc 100644
--- a/dbms/src/Common/tests/parallel_aggregation2.cpp
+++ b/dbms/src/Common/tests/parallel_aggregation2.cpp
@@ -51,9 +51,9 @@ struct AggregateIndependent
                     map.emplace(*it, place, inserted);
 
                     if (inserted)
-                        creator(*lookupResultGetMapped(place));
+                        creator(place->getMapped());
                     else
-                        updater(*lookupResultGetMapped(place));
+                        updater(place->getMapped());
                 }
             });
         }
@@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
                 {
                     if (it != begin && *it == prev_key)
                     {
-                        updater(*lookupResultGetMapped(place));
+                        updater(place->getMapped());
                         continue;
                     }
                     prev_key = *it;
@@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization
                     map.emplace(*it, place, inserted);
 
                     if (inserted)
-                        creator(*lookupResultGetMapped(place));
+                        creator(place->getMapped());
                     else
-                        updater(*lookupResultGetMapped(place));
+                        updater(place->getMapped());
                 }
             });
         }
@@ -131,7 +131,7 @@ struct MergeSequential
             auto begin = source_maps[i]->begin();
             auto end = source_maps[i]->end();
             for (auto it = begin; it != end; ++it)
-                merger((*source_maps[0])[it->getFirst()], it->getSecond());
+                merger((*source_maps[0])[it->getKey()], it->getMapped());
         }
 
         result_map = source_maps[0];
@@ -161,7 +161,7 @@ struct MergeSequentialTransposed    /// In practice not better than usual.
                     continue;
 
                 finish = false;
-                merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond());
+                merger((*result_map)[iterators[i]->getKey()], iterators[i]->getMapped());
                 ++iterators[i];
             }
 
diff --git a/dbms/src/Common/tests/small_table.cpp b/dbms/src/Common/tests/small_table.cpp
index 32b4e8c48fe..9266e928d09 100644
--- a/dbms/src/Common/tests/small_table.cpp
+++ b/dbms/src/Common/tests/small_table.cpp
@@ -42,7 +42,7 @@ int main(int, char **)
         cont[1] = "Goodbye.";
 
         for (auto x : cont)
-            std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl;
+            std::cerr << x.getKey() << " -> " << x.getMapped() << std::endl;
 
         DB::WriteBufferFromOwnString wb;
         cont.writeText(wb);
diff --git a/dbms/src/Core/tests/string_pool.cpp b/dbms/src/Core/tests/string_pool.cpp
index 2db1233e8fe..a389fb01a5e 100644
--- a/dbms/src/Core/tests/string_pool.cpp
+++ b/dbms/src/Core/tests/string_pool.cpp
@@ -211,7 +211,7 @@ int main(int argc, char ** argv)
         {
             RefsHashMap::LookupResult inserted_it;
             bool inserted;
-            set.emplace(StringRef(*lookupResultGetMapped(it)), inserted_it, inserted);
+            set.emplace(StringRef(*it), inserted_it, inserted);
         }
 
         std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
@@ -222,7 +222,7 @@ int main(int argc, char ** argv)
         size_t i = 0;
         for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
         {
-            devnull.write(it->getFirst().data, it->getFirst().size);
+            devnull.write(it->getKey().data, it->getKey().size);
             devnull << std::endl;
         }
 
@@ -249,7 +249,7 @@ int main(int argc, char ** argv)
         size_t i = 0;
         for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
         {
-            devnull.write(it->getFirst().data, it->getFirst().size);
+            devnull.write(it->getKey().data, it->getKey().size);
             devnull << std::endl;
         }
     }
diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp
index 416636f30db..5365b99fed3 100644
--- a/dbms/src/DataTypes/DataTypeEnum.cpp
+++ b/dbms/src/DataTypes/DataTypeEnum.cpp
@@ -75,7 +75,7 @@ void DataTypeEnum<Type>::fillMaps()
 
         if (!inserted_value.second)
             throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
-                    + " and " + toString(*lookupResultGetMapped(inserted_value.first)),
+                    + " and " + toString(inserted_value.first->getMapped()),
                 ErrorCodes::SYNTAX_ERROR};
 
         const auto inserted_name = value_to_name_map.insert(
diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h
index ac96c08dc75..2e5ba74b3e6 100644
--- a/dbms/src/DataTypes/DataTypeEnum.h
+++ b/dbms/src/DataTypes/DataTypeEnum.h
@@ -81,7 +81,7 @@ public:
         if (!it)
             throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR};
 
-        return *lookupResultGetMapped(it);
+        return it->getMapped();
     }
 
     Field castToName(const Field & value_or_name) const override;
diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index b27adc20636..8ed917e8f89 100644
--- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -216,7 +216,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
 
     std::vector<size_t> required_rows(outdated_keys.size());
     std::transform(
-        std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
+        std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
 
     /// request new values
     update(
diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h
index 6837bd9eab4..d8146548c2b 100644
--- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -311,7 +311,7 @@ private:
 
         std::vector<size_t> required_rows(outdated_keys.size());
         std::transform(
-            std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
+            std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
 
         /// request new values
         update(
@@ -437,7 +437,7 @@ private:
             std::vector<size_t> required_rows(outdated_keys.size());
             std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
             {
-                return pair.getSecond().front();
+                return pair.getMapped().front();
             });
 
             update(
@@ -469,7 +469,7 @@ private:
         {
             const StringRef key = keys_array[row];
             const auto it = map.find(key);
-            const auto string_ref = it ? *lookupResultGetMapped(it) : get_default(row);
+            const auto string_ref = it ? it->getMapped() : get_default(row);
             out->insertData(string_ref.data, string_ref.size);
         }
     }
@@ -576,7 +576,7 @@ private:
         /// Check which ids have not been found and require setting null_value
         for (const auto & key_found_pair : remaining_keys)
         {
-            if (key_found_pair.getSecond())
+            if (key_found_pair.getMapped())
             {
                 ++found_num;
                 continue;
@@ -584,7 +584,7 @@ private:
 
             ++not_found_num;
 
-            auto key = key_found_pair.getFirst();
+            auto key = key_found_pair.getKey();
             const auto hash = StringRefHash{}(key);
             const auto find_result = findCellIdx(key, now, hash);
             const auto & cell_idx = find_result.cell_idx;
diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 41a5caaa768..1dafde39a24 100644
--- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -561,7 +561,7 @@ void ComplexKeyHashedDictionary::getItemsImpl(
         const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
 
         const auto it = attr.find(key);
-        set_value(i, it ? static_cast<OutputType>(*lookupResultGetMapped(it)) : get_default(i));
+        set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
 
         /// free memory allocated for the key
         temporary_keys_pool.rollback(key.size);
@@ -729,7 +729,7 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
     std::vector<StringRef> keys;
     keys.reserve(attr.size());
     for (const auto & key : attr)
-        keys.push_back(key.getFirst());
+        keys.push_back(key.getKey());
 
     return keys;
 }
diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp
index 1c6fd602ba9..78c871bebc4 100644
--- a/dbms/src/Dictionaries/HashedDictionary.cpp
+++ b/dbms/src/Dictionaries/HashedDictionary.cpp
@@ -13,8 +13,8 @@ template <typename T> auto first(const T & value) -> decltype(value.first) { ret
 template <typename T> auto second(const T & value) -> decltype(value.second) { return value.second; }
 
 /// HashMap
-template <typename T> auto first(const T & value) -> decltype(value.getFirst()) { return value.getFirst(); }
-template <typename T> auto second(const T & value) -> decltype(value.getSecond()) { return value.getSecond(); }
+template <typename T> auto first(const T & value) -> decltype(value.getKey()) { return value.getKey(); }
+template <typename T> auto second(const T & value) -> decltype(value.getMapped()) { return value.getMapped(); }
 
 }
 
diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp
index ae67027c210..b1412d98f75 100644
--- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp
@@ -127,7 +127,7 @@ void RangeHashedDictionary::getString(
         if (it)
         {
             const auto date = dates[i];
-            const auto & ranges_and_values = *lookupResultGetMapped(it);
+            const auto & ranges_and_values = it->getMapped();
             const auto val_it
                 = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
                   {
@@ -398,7 +398,7 @@ void RangeHashedDictionary::getItemsImpl(
         if (it)
         {
             const auto date = dates[i];
-            const auto & ranges_and_values = *lookupResultGetMapped(it);
+            const auto & ranges_and_values = it->getMapped();
             const auto val_it
                 = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
                   {
@@ -425,7 +425,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
 
     if (it)
     {
-        auto & values = *lookupResultGetMapped(it);
+        auto & values = it->getMapped();
 
         const auto insert_it
             = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
@@ -498,7 +498,7 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i
 
             if (it)
             {
-                auto & values = *lookupResultGetMapped(it);
+                auto & values = it->getMapped();
 
                 const auto insert_it = std::lower_bound(
                     std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
@@ -610,9 +610,9 @@ void RangeHashedDictionary::getIdsAndDates(
 
     for (const auto & key : attr)
     {
-        for (const auto & value : key.getSecond())
+        for (const auto & value : key.getMapped())
         {
-            ids.push_back(key.getFirst());
+            ids.push_back(key.getKey());
             start_dates.push_back(value.range.left);
             end_dates.push_back(value.range.right);
 
diff --git a/dbms/src/Functions/addressToLine.cpp b/dbms/src/Functions/addressToLine.cpp
index e2ab7576eef..c3f3153b80e 100644
--- a/dbms/src/Functions/addressToLine.cpp
+++ b/dbms/src/Functions/addressToLine.cpp
@@ -140,8 +140,8 @@ private:
         std::lock_guard lock(mutex);
         map.emplace(addr, it, inserted);
         if (inserted)
-            *lookupResultGetMapped(it) = impl(addr);
-        return *lookupResultGetMapped(it);
+            it->getMapped() = impl(addr);
+        return it->getMapped();
     }
 };
 
diff --git a/dbms/src/Functions/array/arrayIntersect.cpp b/dbms/src/Functions/array/arrayIntersect.cpp
index 6aec4f94dc8..8881abb1552 100644
--- a/dbms/src/Functions/array/arrayIntersect.cpp
+++ b/dbms/src/Functions/array/arrayIntersect.cpp
@@ -467,15 +467,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
 
         for (const auto & pair : map)
         {
-            if (pair.getSecond() == args)
+            if (pair.getMapped() == args)
             {
                 ++result_offset;
                 if constexpr (is_numeric_column)
-                    result_data.insertValue(pair.getFirst());
+                    result_data.insertValue(pair.getKey());
                 else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
-                    result_data.insertData(pair.getFirst().data, pair.getFirst().size);
+                    result_data.insertData(pair.getKey().data, pair.getKey().size);
                 else
-                    result_data.deserializeAndInsertFromArena(pair.getFirst().data);
+                    result_data.deserializeAndInsertFromArena(pair.getKey().data);
 
                 if (all_nullable)
                     null_map.push_back(0);
diff --git a/dbms/src/Functions/transform.cpp b/dbms/src/Functions/transform.cpp
index df3daa62f95..583430c2ef7 100644
--- a/dbms/src/Functions/transform.cpp
+++ b/dbms/src/Functions/transform.cpp
@@ -508,7 +508,7 @@ private:
         {
             auto it = table.find(src[i]);
             if (it)
-                memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));    /// little endian.
+                memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));    /// little endian.
             else
                 dst[i] = dst_default;
         }
@@ -524,7 +524,7 @@ private:
         {
             auto it = table.find(src[i]);
             if (it)
-                memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));    /// little endian.
+                memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));    /// little endian.
             else
                 dst[i] = dst_default[i];
         }
@@ -540,7 +540,7 @@ private:
         {
             auto it = table.find(src[i]);
             if (it)
-                memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));
+                memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
             else
                 dst[i] = src[i];
         }
@@ -557,7 +557,7 @@ private:
         for (size_t i = 0; i < size; ++i)
         {
             auto it = table.find(src[i]);
-            StringRef ref = it ? *lookupResultGetMapped(it) : dst_default;
+            StringRef ref = it ? it->getMapped() : dst_default;
             dst_data.resize(current_dst_offset + ref.size);
             memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
             current_dst_offset += ref.size;
@@ -581,7 +581,7 @@ private:
             StringRef ref;
 
             if (it)
-                ref = *lookupResultGetMapped(it);
+                ref = it->getMapped();
             else
             {
                 ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
@@ -611,7 +611,7 @@ private:
             current_src_offset = src_offsets[i];
             auto it = table.find(ref);
             if (it)
-                memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));
+                memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
             else
                 dst[i] = dst_default;
         }
@@ -632,7 +632,7 @@ private:
             current_src_offset = src_offsets[i];
             auto it = table.find(ref);
             if (it)
-                memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));
+                memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
             else
                 dst[i] = dst_default[i];
         }
@@ -655,7 +655,7 @@ private:
 
             auto it = table.find(src_ref);
 
-            StringRef dst_ref = it ? *lookupResultGetMapped(it) : (with_default ? dst_default : src_ref);
+            StringRef dst_ref = it ? it->getMapped() : (with_default ? dst_default : src_ref);
             dst_data.resize(current_dst_offset + dst_ref.size);
             memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
             current_dst_offset += dst_ref.size;
@@ -697,7 +697,7 @@ private:
             StringRef dst_ref;
 
             if (it)
-                dst_ref = *lookupResultGetMapped(it);
+                dst_ref = it->getMapped();
             else
             {
                 dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h
index 0f0faaecb44..359b9f14c2a 100644
--- a/dbms/src/Interpreters/Aggregator.h
+++ b/dbms/src/Interpreters/Aggregator.h
@@ -180,8 +180,6 @@ struct AggregationMethodOneNumber
     using Data = TData;
     using Key = typename Data::key_type;
     using Mapped = typename Data::mapped_type;
-    using iterator = typename Data::iterator;
-    using const_iterator = typename Data::const_iterator;
 
     Data data;
 
@@ -356,8 +354,6 @@ struct AggregationMethodKeysFixed
     using Data = TData;
     using Key = typename Data::key_type;
     using Mapped = typename Data::mapped_type;
-    using iterator = typename Data::iterator;
-    using const_iterator = typename Data::const_iterator;
     static constexpr bool has_nullable_keys = has_nullable_keys_;
     static constexpr bool has_low_cardinality = has_low_cardinality_;
 
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 3267e6a779b..afc056933ce 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -1237,7 +1237,7 @@ private:
 
         for (; it != end; ++it)
         {
-            const Mapped & mapped = it->getSecond();
+            const Mapped & mapped = it->getMapped();
             if (mapped.getUsed())
                 continue;
 
diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp
index 910bf2c0649..7c52953fa9f 100644
--- a/dbms/src/Interpreters/tests/hash_map.cpp
+++ b/dbms/src/Interpreters/tests/hash_map.cpp
@@ -162,8 +162,8 @@ int main(int argc, char ** argv)
             map.emplace(data[i], it, inserted);
             if (inserted)
             {
-                new(lookupResultGetMapped(it)) Value;
-                std::swap(*lookupResultGetMapped(it), value);
+                new (&it->getMapped()) Value;
+                std::swap(it->getMapped(), value);
                 INIT
             }
         }
@@ -193,8 +193,8 @@ int main(int argc, char ** argv)
             map.emplace(data[i], it, inserted);
             if (inserted)
             {
-                new(lookupResultGetMapped(it)) Value;
-                std::swap(*lookupResultGetMapped(it), value);
+                new (&it->getMapped()) Value;
+                std::swap(it->getMapped(), value);
                 INIT
             }
         }
@@ -225,8 +225,8 @@ int main(int argc, char ** argv)
             map.emplace(data[i], it, inserted);
             if (inserted)
             {
-                new(lookupResultGetMapped(it)) Value;
-                std::swap(*lookupResultGetMapped(it), value);
+                new (&it->getMapped()) Value;
+                std::swap(it->getMapped(), value);
                 INIT
             }
         }
diff --git a/dbms/src/Interpreters/tests/hash_map3.cpp b/dbms/src/Interpreters/tests/hash_map3.cpp
index 4b076152b07..2207edc6cc1 100644
--- a/dbms/src/Interpreters/tests/hash_map3.cpp
+++ b/dbms/src/Interpreters/tests/hash_map3.cpp
@@ -85,7 +85,7 @@ int main(int, char **)
     std::cerr << "Collisions: " << map.getCollisions() << std::endl;
 
     for (auto x : map)
-        std::cerr << x.getFirst().toString() << " -> " << x.getSecond() << std::endl;
+        std::cerr << x.getKey().toString() << " -> " << x.getMapped() << std::endl;
 
     return 0;
 }
diff --git a/dbms/src/Interpreters/tests/hash_map_lookup.cpp b/dbms/src/Interpreters/tests/hash_map_lookup.cpp
index b34c23e6c41..9ec191840e2 100644
--- a/dbms/src/Interpreters/tests/hash_map_lookup.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_lookup.cpp
@@ -55,15 +55,15 @@ void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
 
         map.emplace(data[i], it, inserted);
         if (inserted)
-            *lookupResultGetMapped(it) = 1;
+            it->getMapped() = 1;
         else
-            ++*lookupResultGetMapped(it);
+            ++it->getMapped();
     }
 
     for (size_t i = 0, size = data.size(); i < size; ++i)
     {
         auto it = map.find(data[i]);
-        ++*lookupResultGetMapped(it);
+        ++it->getMapped();
     }
     watch.stop();
     std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
@@ -81,10 +81,10 @@ void insert(Map & map, StringRef & k)
     typename Map::LookupResult it;
     map.emplace(k, it, inserted, nullptr);
     if (inserted)
-        *lookupResultGetMapped(it) = 1;
+        it->getMapped() = 1;
     else
-        ++*lookupResultGetMapped(it);
-    std::cout << *lookupResultGetMapped(map.find(k))<< std::endl;
+        ++it->getMapped();
+    std::cout << map.find(k)->getMapped() << std::endl;
 }
 
 int main(int argc, char ** argv)
diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp
index 61980a614ab..aad3b0a6442 100644
--- a/dbms/src/Interpreters/tests/hash_map_string.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string.cpp
@@ -337,8 +337,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(data[i], it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
@@ -366,8 +366,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(data[i], it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
@@ -396,8 +396,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(data[i], it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
@@ -426,8 +426,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(data[i], it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp
index 66a087d6824..85176906436 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp
@@ -595,8 +595,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
     {
         map.emplace(static_cast<const Key &>(data[i]), it, inserted);
         if (inserted)
-            *lookupResultGetMapped(it) = 0;
-        ++*lookupResultGetMapped(it);
+            it->getMapped() = 0;
+        ++it->getMapped();
     }
 
     watch.stop();
diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
index 7bd27bc6785..3c58771d87b 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
@@ -442,8 +442,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
     {
         map.emplace(static_cast<const Key &>(data[i]), it, inserted);
         if (inserted)
-            *lookupResultGetMapped(it) = 0;
-        ++*lookupResultGetMapped(it);
+            it->getMapped() = 0;
+        ++it->getMapped();
     }
 
     watch.stop();
diff --git a/dbms/src/Interpreters/tests/hash_map_string_small.cpp b/dbms/src/Interpreters/tests/hash_map_string_small.cpp
index 529cef13c11..cbfc6bcdc67 100644
--- a/dbms/src/Interpreters/tests/hash_map_string_small.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_small.cpp
@@ -144,8 +144,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(data[i], it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
@@ -173,8 +173,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
diff --git a/dbms/src/Interpreters/tests/string_hash_map.cpp b/dbms/src/Interpreters/tests/string_hash_map.cpp
index b16e1a91aa5..2191ad84705 100644
--- a/dbms/src/Interpreters/tests/string_hash_map.cpp
+++ b/dbms/src/Interpreters/tests/string_hash_map.cpp
@@ -151,8 +151,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, DB::Arena &, const cha
         {
             map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
         watch.stop();
 
diff --git a/dbms/src/Interpreters/tests/two_level_hash_map.cpp b/dbms/src/Interpreters/tests/two_level_hash_map.cpp
index ed9df82d0b1..e1370cd3932 100644
--- a/dbms/src/Interpreters/tests/two_level_hash_map.cpp
+++ b/dbms/src/Interpreters/tests/two_level_hash_map.cpp
@@ -67,8 +67,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(data[i], it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
@@ -82,7 +82,7 @@ int main(int argc, char ** argv)
         size_t elems = 0;
         for (const auto & kv : map)
         {
-            sum_counts += kv.getSecond();
+            sum_counts += kv.getMapped();
             ++elems;
         }
 
@@ -103,8 +103,8 @@ int main(int argc, char ** argv)
         {
             map.emplace(i, it, inserted);
             if (inserted)
-                *lookupResultGetMapped(it) = 0;
-            ++*lookupResultGetMapped(it);
+                it->getMapped() = 0;
+            ++it->getMapped();
         }
 
         watch.stop();
@@ -118,11 +118,11 @@ int main(int argc, char ** argv)
         size_t elems = 0;
         for (const auto & kv : map)
         {
-            sum_counts += kv.getSecond();
+            sum_counts += kv.getMapped();
             ++elems;
 
-            if (kv.getFirst() > n)
-                std::cerr << kv.getFirst() << std::endl;
+            if (kv.getKey() > n)
+                std::cerr << kv.getKey() << std::endl;
         }
 
         std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index e49f9315887..a4a6911f8eb 100644
--- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -65,9 +65,9 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
 
     if (prev_positions.size() > key_index
         && prev_positions[key_index]
-        && name == *lookupResultGetKey(prev_positions[key_index]))
+        && name == prev_positions[key_index]->getKey())
     {
-        return *lookupResultGetMapped(prev_positions[key_index]);
+        return prev_positions[key_index]->getMapped();
     }
     else
     {
@@ -78,7 +78,7 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
             if (key_index < prev_positions.size())
                 prev_positions[key_index] = it;
 
-            return *lookupResultGetMapped(it);
+            return it->getMapped();
         }
         else
             return UNKNOWN_FIELD;
diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
index 60df642836c..0b1b7e3e44b 100644
--- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@@ -131,7 +131,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
                 }
                 else
                 {
-                    index = *lookupResultGetMapped(it);
+                    index = it->getMapped();
 
                     if (seen_columns[index])
                         throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index e29ae01b3b3..ecad8b05600 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -57,7 +57,7 @@ void buildScatterSelector(
                 throw Exception("Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).", ErrorCodes::TOO_MANY_PARTS);
 
             partition_num_to_first_row.push_back(i);
-            *lookupResultGetMapped(it) = partitions_count;
+            it->getMapped() = partitions_count;
 
             ++partitions_count;
 
@@ -70,7 +70,7 @@ void buildScatterSelector(
         }
 
         if (partitions_count > 1)
-            selector[i] = *lookupResultGetMapped(it);
+            selector[i] = it->getMapped();
     }
 }
 
diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp
index d3f41f3b0b5..33127f97874 100644
--- a/dbms/src/Storages/StorageJoin.cpp
+++ b/dbms/src/Storages/StorageJoin.cpp
@@ -333,9 +333,9 @@ private:
             {
                 for (size_t j = 0; j < columns.size(); ++j)
                     if (j == key_pos)
-                        columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst()));
+                        columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey()));
                     else
-                        columns[j]->insertFrom(*it->getSecond().block->getByPosition(column_indices[j]).column.get(), it->getSecond().row_num);
+                        columns[j]->insertFrom(*it->getMapped().block->getByPosition(column_indices[j]).column.get(), it->getMapped().row_num);
                 ++rows_added;
             }
             else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
@@ -343,11 +343,11 @@ private:
                 throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED);
             }
             else
-                for (auto ref_it = it->getSecond().begin(); ref_it.ok(); ++ref_it)
+                for (auto ref_it = it->getMapped().begin(); ref_it.ok(); ++ref_it)
                 {
                     for (size_t j = 0; j < columns.size(); ++j)
                         if (j == key_pos)
-                            columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst()));
+                            columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey()));
                         else
                             columns[j]->insertFrom(*ref_it->block->getByPosition(column_indices[j]).column.get(), ref_it->row_num);
                     ++rows_added;
diff --git a/utils/test-data-generator/MarkovModel.h b/utils/test-data-generator/MarkovModel.h
index 7ef69b2a1f0..338aee2e61f 100644
--- a/utils/test-data-generator/MarkovModel.h
+++ b/utils/test-data-generator/MarkovModel.h
@@ -105,7 +105,7 @@ public:
             if (table.end() == it)
                 return pos - data;
 
-            *pos = it->getSecond().sample(random());
+            *pos = it->getMapped().sample(random());
 
             /// Zero byte marks end of string.
             if (0 == *pos)
@@ -125,12 +125,12 @@ public:
         for (auto & elem : table)
         {
             UInt32 new_total = 0;
-            for (auto & frequency : elem.getSecond().data)
+            for (auto & frequency : elem.getMapped().data)
             {
                 frequency.count = transform(frequency.count);
                 new_total += frequency.count;
             }
-            elem.getSecond().total = new_total;
+            elem.getMapped().total = new_total;
         }
     }
 
@@ -142,10 +142,10 @@ public:
 
         for (const auto & elem : table)
         {
-            writeBinary(elem.getFirst(), out);
-            writeBinary(UInt8(elem.getSecond().data.size()), out);
+            writeBinary(elem.getKey(), out);
+            writeBinary(UInt8(elem.getMapped().data.size()), out);
 
-            for (const auto & frequency : elem.getSecond().data)
+            for (const auto & frequency : elem.getMapped().data)
             {
                 writeBinary(frequency.byte, out);
                 writeVarUInt(frequency.count, out);

From cf3490fe954c6fbe8e48c88ba05f7fd0bd3aa32d Mon Sep 17 00:00:00 2001
From: Alexander Kazakov <Akazz@users.noreply.github.com>
Date: Tue, 5 Nov 2019 19:24:05 +0300
Subject: [PATCH 209/222] Update CHANGELOG.md for 19.15.3/4 releases

Updated for 19.15.3/4 releases
---
 CHANGELOG.md | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12a6c9920db..4ed7e6f4c3b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -248,6 +248,47 @@ fix comments to make obvious that it may throw.
 [#7350](https://github.com/ClickHouse/ClickHouse/pull/7350)
 ([tavplubix](https://github.com/tavplubix))
 
+## ClickHouse release 19.15.4.10, 2019-10-31
+
+### Bug Fix
+* Added handling of SQL_TINYINT and SQL_BIGINT, and fix handling of SQL_FLOAT data source types in ODBC Bridge.
+[#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon))
+* Allowed to have some parts on destination disk or volume in MOVE PARTITION.
+[#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon))
+* Fixed NULL-values in nullable columns through ODBC-bridge.
+[#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk))
+* Fixed INSERT into Distributed non local node with MATERIALIZED columns.
+[#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat))
+* Fixed function getMultipleValuesFromConfig.
+[#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Mikhail Korotov](https://github.com/millb))
+* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout.
+[#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily Nemkov](https://github.com/Enmk))
+* Wait for all jobs to finish on exception (fixes rare segfaults).
+[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([tavplubix](https://github.com/tavplubix))
+* Don't push to MVs when inserting into Kafka table.
+[#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7))
+* Disable memory tracker for exception stack.
+[#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Nikolai Kochetov](https://github.com/KochetovNicolai))
+* Fixed bad code in transforming query for external database.
+[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([alexey-milovidov](https://github.com/alexey-milovidov))
+* Avoid use of uninitialized values in MetricsTransmitter.
+[#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat Khuzhin](https://github.com/azat))
+* Added example config with macros for tests ([alexey-milovidov](https://github.com/alexey-milovidov))
+
+## ClickHouse release 19.15.3.6, 2019-10-09
+
+### Bug Fix
+* Fixed bad_variant in hashed dictionary.
+([alesapin](https://github.com/alesapin))
+* Fixed up bug with segmentation fault in ATTACH PART query.
+([alesapin](https://github.com/alesapin))
+* Fixed time calculation in `MergeTreeData`.
+([Vladimir Chebotarev](https://github.com/excitoon))
+* Commit to Kafka explicitly after the writing is finalized.
+[#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7))
+* Serialize NULL values correctly in min/max indexes of MergeTree parts.
+[#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander Kuzmenkov](https://github.com/akuzm))
+
 ## ClickHouse release 19.15.2.2, 2019-10-01
 
 ### New Feature

From 3d968b0259b3a8b9ebe03653a634156ea852dce9 Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Tue, 5 Nov 2019 20:42:35 +0300
Subject: [PATCH 210/222] fix final mark with empty primary key

---
 .../Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 60ed25ed43c..d7dc209c707 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -546,7 +546,15 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
         if (data.hasPrimaryKey())
             ranges.ranges = markRangesFromPKRange(part, key_condition, settings);
         else
-            ranges.ranges = MarkRanges{MarkRange{0, part->getMarksCount()}};
+        {
+            size_t total_marks_count = part->getMarksCount();
+            if (total_marks_count)
+            {
+                if (part->index_granularity.hasFinalMark())
+                    --total_marks_count;
+                ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
+            }
+        }
 
         for (const auto & index_and_condition : useful_indices)
             ranges.ranges = filterMarksUsingIndex(

From c958662c6e8cd661060a3dbeccf64ce21f62d447 Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Tue, 5 Nov 2019 21:21:49 +0300
Subject: [PATCH 211/222] add test for empty primaty key

---
 .../01030_final_mark_empty_primary_key.reference          | 1 +
 .../0_stateless/01030_final_mark_empty_primary_key.sql    | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.reference
 create mode 100644 dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.sql

diff --git a/dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.reference b/dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.reference
new file mode 100644
index 00000000000..c2a6ccc0198
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.reference
@@ -0,0 +1 @@
+4999950000
diff --git a/dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.sql b/dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.sql
new file mode 100644
index 00000000000..c64b40dfd1d
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01030_final_mark_empty_primary_key.sql
@@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS empty_pk;
+CREATE TABLE empty_pk (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 256;
+
+INSERT INTO empty_pk SELECT number FROM numbers(100000);
+
+SELECT sum(x) from empty_pk;
+
+DROP TABLE empty_pk;

From c6ac30e4501b44742cc6433622af8f0b29bfd7a1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 21:38:16 +0300
Subject: [PATCH 212/222] Added another test case by Sundy-Li request

---
 dbms/tests/queries/0_stateless/01026_char_utf8.reference | 1 +
 dbms/tests/queries/0_stateless/01026_char_utf8.sql       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/dbms/tests/queries/0_stateless/01026_char_utf8.reference b/dbms/tests/queries/0_stateless/01026_char_utf8.reference
index d81bd78f7df..54d80620517 100644
--- a/dbms/tests/queries/0_stateless/01026_char_utf8.reference
+++ b/dbms/tests/queries/0_stateless/01026_char_utf8.reference
@@ -16,3 +16,4 @@
 нривет
 оривет
 привет
+你好
diff --git a/dbms/tests/queries/0_stateless/01026_char_utf8.sql b/dbms/tests/queries/0_stateless/01026_char_utf8.sql
index 1a598c1ed3c..7ee92683be1 100644
--- a/dbms/tests/queries/0_stateless/01026_char_utf8.sql
+++ b/dbms/tests/queries/0_stateless/01026_char_utf8.sql
@@ -1,3 +1,4 @@
 SELECT char(0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5, 0xD1, 0x82) AS hello;
 SELECT char(-48,-65,-47,-128,-48,-72,-48,-78,-48,-75,-47,-126) AS hello;
 SELECT char(-48, 0xB0 + number,-47,-128,-48,-72,-48,-78,-48,-75,-47,-126) AS hello FROM numbers(16);
+SELECT char(0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd) AS hello;

From ffa69441ae96f2c094b6ed90b210531107cad8fd Mon Sep 17 00:00:00 2001
From: Nicolae Vartolomei <me@nvartolomei.com>
Date: Tue, 5 Nov 2019 18:47:15 +0000
Subject: [PATCH 213/222] Remove duplication between distributed_ddl and
 distributed_ddl_secure tests

---
 dbms/tests/integration/pytest.ini             |   2 +-
 .../test_distributed_ddl/cluster.py           | 109 +++++
 .../configs_secure}/config.d/clusters.xml     |   0
 .../configs_secure}/config.d/ddl.xml          |   0
 .../configs_secure}/config.d/macro.xml        |   0
 .../configs_secure}/config.d/query_log.xml    |   0
 .../configs_secure}/config.d/ssl_conf.xml     |   0
 .../config.d/zookeeper_session_timeout.xml    |   0
 .../configs_secure}/dhparam.pem               |   0
 .../configs_secure}/server.crt                |   0
 .../configs_secure}/server.key                |   0
 .../configs_secure}/users.d/query_log.xml     |   0
 .../users.d/restricted_user.xml               |   0
 .../integration/test_distributed_ddl/test.py  | 361 +++++-----------
 .../test_replicated_alter.py                  |  93 +++++
 .../test_distributed_ddl_secure/__init__.py   |   0
 .../test_distributed_ddl_secure/test.py       | 395 ------------------
 17 files changed, 308 insertions(+), 652 deletions(-)
 create mode 100644 dbms/tests/integration/test_distributed_ddl/cluster.py
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/config.d/clusters.xml (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/config.d/ddl.xml (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/config.d/macro.xml (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/config.d/query_log.xml (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/config.d/ssl_conf.xml (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/config.d/zookeeper_session_timeout.xml (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/dhparam.pem (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/server.crt (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/server.key (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/users.d/query_log.xml (100%)
 rename dbms/tests/integration/{test_distributed_ddl_secure/configs => test_distributed_ddl/configs_secure}/users.d/restricted_user.xml (100%)
 create mode 100644 dbms/tests/integration/test_distributed_ddl/test_replicated_alter.py
 delete mode 100644 dbms/tests/integration/test_distributed_ddl_secure/__init__.py
 delete mode 100755 dbms/tests/integration/test_distributed_ddl_secure/test.py

diff --git a/dbms/tests/integration/pytest.ini b/dbms/tests/integration/pytest.ini
index 31364843b29..de681b6e750 100644
--- a/dbms/tests/integration/pytest.ini
+++ b/dbms/tests/integration/pytest.ini
@@ -1,4 +1,4 @@
 [pytest]
-python_files = test.py
+python_files = test*.py
 norecursedirs = _instances
 timeout = 600
diff --git a/dbms/tests/integration/test_distributed_ddl/cluster.py b/dbms/tests/integration/test_distributed_ddl/cluster.py
new file mode 100644
index 00000000000..fed672d2274
--- /dev/null
+++ b/dbms/tests/integration/test_distributed_ddl/cluster.py
@@ -0,0 +1,109 @@
+import os
+import os.path as p
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+from helpers.test_tools import TSV
+
+
+class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
+    def __init__(self, base_path, config_dir):
+        ClickHouseCluster.__init__(self, base_path)
+
+        self.test_config_dir = config_dir
+
+    def prepare(self, replace_hostnames_with_ips=True):
+        try:
+            for i in xrange(4):
+                self.add_instance(
+                    'ch{}'.format(i+1),
+                    config_dir=self.test_config_dir,
+                    macros={"layer": 0, "shard": i/2 + 1, "replica": i%2 + 1},
+                    with_zookeeper=True)
+
+            self.start()
+
+            # Replace config files for testing ability to set host in DNS and IP formats
+            if replace_hostnames_with_ips:
+                self.replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
+
+            # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
+            sacrifice = self.instances['ch4']
+            self.pm_random_drops = PartitionManager()
+            self.pm_random_drops._add_rule({'probability': 0.01, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
+            self.pm_random_drops._add_rule({'probability': 0.01, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
+
+            # Initialize databases and service tables
+            instance = self.instances['ch1']
+
+            self.ddl_check_query(instance, """
+        CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
+            (database String, name String, engine String, metadata_modification_time DateTime)
+            ENGINE = Distributed('cluster_no_replicas', 'system', 'tables')
+                """)
+
+            self.ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test ON CLUSTER 'cluster'")
+
+        except Exception as e:
+            print e
+            raise
+
+    def sync_replicas(self, table, timeout=5):
+        for instance in self.instances.values():
+            instance.query("SYSTEM SYNC REPLICA {}".format(table), timeout=timeout)
+
+    def check_all_hosts_successfully_executed(self, tsv_content, num_hosts=None):
+        if num_hosts is None:
+            num_hosts = len(self.instances)
+
+        M = TSV.toMat(tsv_content)
+        hosts = [(l[0], l[1]) for l in M] # (host, port)
+        codes = [l[2] for l in M]
+        messages = [l[3] for l in M]
+
+        assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content
+        assert len(set(codes)) == 1, "\n" + tsv_content
+        assert codes[0] == "0", "\n" + tsv_content
+
+    def ddl_check_query(self, instance, query, num_hosts=None):
+        contents = instance.query(query)
+        self.check_all_hosts_successfully_executed(contents, num_hosts)
+        return contents
+
+    def replace_domains_to_ip_addresses_in_cluster_config(self, instances_to_replace):
+        clusters_config = open(p.join(self.base_dir, '{}/config.d/clusters.xml'.format(self.test_config_dir))).read()
+
+        for inst_name, inst in self.instances.items():
+            clusters_config = clusters_config.replace(inst_name, str(inst.ip_address))
+
+        for inst_name in instances_to_replace:
+            inst = self.instances[inst_name]
+            self.instances[inst_name].exec_in_container(['bash', '-c', 'echo "$NEW_CONFIG" > /etc/clickhouse-server/config.d/clusters.xml'], environment={"NEW_CONFIG": clusters_config}, privileged=True)
+            # print cluster.instances[inst_name].exec_in_container(['cat', "/etc/clickhouse-server/config.d/clusters.xml"])
+
+    @staticmethod
+    def ddl_check_there_are_no_dublicates(instance):
+        query = "SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)"
+        rows = instance.query(query)
+        assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name, instance.ip_address, query)
+
+    @staticmethod
+    def insert_reliable(instance, query_insert):
+        """
+        Make retries in case of UNKNOWN_STATUS_OF_INSERT or zkutil::KeeperException errors
+        """
+
+        for i in xrange(100):
+            try:
+                instance.query(query_insert)
+                return
+            except Exception as e:
+                last_exception = e
+                s = str(e)
+                if not (s.find('Unknown status, client must retry') >= 0 or s.find('zkutil::KeeperException')):
+                    raise e
+
+        raise last_exception
\ No newline at end of file
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/clusters.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/clusters.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/clusters.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/clusters.xml
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/ddl.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/ddl.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/ddl.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/ddl.xml
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/macro.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/macro.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/macro.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/macro.xml
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/query_log.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/query_log.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/query_log.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/query_log.xml
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/ssl_conf.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/ssl_conf.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/ssl_conf.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/ssl_conf.xml
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/zookeeper_session_timeout.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/zookeeper_session_timeout.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/config.d/zookeeper_session_timeout.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/config.d/zookeeper_session_timeout.xml
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/dhparam.pem b/dbms/tests/integration/test_distributed_ddl/configs_secure/dhparam.pem
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/dhparam.pem
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/dhparam.pem
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/server.crt b/dbms/tests/integration/test_distributed_ddl/configs_secure/server.crt
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/server.crt
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/server.crt
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/server.key b/dbms/tests/integration/test_distributed_ddl/configs_secure/server.key
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/server.key
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/server.key
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/users.d/query_log.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/users.d/query_log.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/users.d/query_log.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/users.d/query_log.xml
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/configs/users.d/restricted_user.xml b/dbms/tests/integration/test_distributed_ddl/configs_secure/users.d/restricted_user.xml
similarity index 100%
rename from dbms/tests/integration/test_distributed_ddl_secure/configs/users.d/restricted_user.xml
rename to dbms/tests/integration/test_distributed_ddl/configs_secure/users.d/restricted_user.xml
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index ccb3b012b43..6e57ed1b2df 100755
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -1,125 +1,34 @@
 import os
-import os.path as p
 import sys
 import time
-import datetime
-import pytest
 from contextlib import contextmanager
 
+import pytest
+
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from helpers.cluster import ClickHouseCluster
-from helpers.network import PartitionManager, PartitionManagerDisbaler
+
+from helpers.network import PartitionManager
 from helpers.test_tools import TSV
+from .cluster import ClickHouseClusterWithDDLHelpers
 
 
-def check_all_hosts_sucesfully_executed(tsv_content, num_hosts=None):
-    if num_hosts is None:
-        num_hosts = len(cluster.instances)
+@pytest.fixture(scope="module", params=["configs", "configs_secure"])
+def test_cluster(request):
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
 
-    M = TSV.toMat(tsv_content)
-    hosts = [(l[0], l[1]) for l in M] # (host, port)
-    codes = [l[2] for l in M]
-    messages = [l[3] for l in M]
-
-    assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content
-    assert len(set(codes)) == 1, "\n" + tsv_content
-    assert codes[0] == "0", "\n" + tsv_content
-
-
-def ddl_check_query(instance, query, num_hosts=None):
-    contents = instance.query(query)
-    check_all_hosts_sucesfully_executed(contents, num_hosts)
-    return contents
-
-def ddl_check_there_are_no_dublicates(instance):
-    rows = instance.query("SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)")
-    assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name, instance.ip_address)
-
-# Make retries in case of UNKNOWN_STATUS_OF_INSERT or zkutil::KeeperException errors
-def insert_reliable(instance, query_insert):
-    for i in xrange(100):
-        try:
-            instance.query(query_insert)
-            return
-        except Exception as e:
-            last_exception = e
-            s = str(e)
-            if not (s.find('Unknown status, client must retry') >= 0 or s.find('zkutil::KeeperException')):
-                raise e
-
-    raise last_exception
-
-def sync_replicas(table, timeout=5):
-    for instance in cluster.instances.values():
-        instance.query("SYSTEM SYNC REPLICA {}".format(table), timeout=timeout)
-
-cluster = ClickHouseCluster(__file__)
-
-
-def replace_domains_to_ip_addresses_in_cluster_config(instances_to_replace):
-    clusters_config = open(p.join(cluster.base_dir, 'configs/config.d/clusters.xml')).read()
-
-    for inst_name, inst in cluster.instances.items():
-        clusters_config = clusters_config.replace(inst_name, str(inst.ip_address))
-
-    for inst_name in instances_to_replace:
-        inst = cluster.instances[inst_name]
-        cluster.instances[inst_name].exec_in_container(['bash', '-c', 'echo "$NEW_CONFIG" > /etc/clickhouse-server/config.d/clusters.xml'], environment={"NEW_CONFIG": clusters_config}, privileged=True)
-        # print cluster.instances[inst_name].exec_in_container(['cat', "/etc/clickhouse-server/config.d/clusters.xml"])
-
-
-def init_cluster(cluster):
     try:
-        for i in xrange(4):
-            cluster.add_instance(
-                'ch{}'.format(i+1),
-                config_dir="configs",
-                macros={"layer": 0, "shard": i/2 + 1, "replica": i%2 + 1},
-                with_zookeeper=True)
-
-        cluster.start()
-
-        # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.
-        # Replace config files for testing ability to set host in DNS and IP formats
-        # replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
-
-        # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
-        sacrifice = cluster.instances['ch4']
-        cluster.pm_random_drops = PartitionManager()
-        cluster.pm_random_drops._add_rule({'probability': 0.01, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
-        cluster.pm_random_drops._add_rule({'probability': 0.01, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
-
-        # Initialize databases and service tables
-        instance = cluster.instances['ch1']
-
-        ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
-    (database String, name String, engine String, metadata_modification_time DateTime)
-    ENGINE = Distributed('cluster_no_replicas', 'system', 'tables')
-        """)
-
-        ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test ON CLUSTER 'cluster'")
-
-    except Exception as e:
-        print e
-        raise
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        init_cluster(cluster)
+        cluster.prepare()
 
         yield cluster
 
         instance = cluster.instances['ch1']
-        ddl_check_query(instance, "DROP DATABASE test ON CLUSTER 'cluster'")
-        ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
+        cluster.ddl_check_query(instance, "DROP DATABASE test ON CLUSTER 'cluster'")
+        cluster.ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
 
         # Check query log to ensure that DDL queries are not executed twice
         time.sleep(1.5)
         for instance in cluster.instances.values():
-            ddl_check_there_are_no_dublicates(instance)
+            cluster.ddl_check_there_are_no_dublicates(instance)
 
         cluster.pm_random_drops.heal_all()
 
@@ -127,57 +36,57 @@ def started_cluster():
         cluster.shutdown()
 
 
-def test_default_database(started_cluster):
-    instance = cluster.instances['ch3']
+def test_default_database(test_cluster):
+    instance = test_cluster.instances['ch3']
 
-    ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test2 ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "CREATE TABLE null ON CLUSTER 'cluster2' (s String DEFAULT 'escape\t\nme') ENGINE = Null")
+    test_cluster.ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test2 ON CLUSTER 'cluster' FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER 'cluster' FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "CREATE TABLE null ON CLUSTER 'cluster2' (s String DEFAULT 'escape\t\nme') ENGINE = Null")
 
     contents = instance.query("SELECT hostName() AS h, database FROM all_tables WHERE name = 'null' ORDER BY h")
     assert TSV(contents) == TSV("ch1\tdefault\nch2\ttest2\nch3\tdefault\nch4\ttest2\n")
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER cluster2")
-    ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER cluster2")
+    test_cluster.ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
 
 
-def test_create_view(started_cluster):
-    instance = cluster.instances['ch3']
-    ddl_check_query(instance, "CREATE VIEW test.super_simple_view ON CLUSTER 'cluster' AS SELECT * FROM system.numbers FORMAT TSV")
-    ddl_check_query(instance, "CREATE MATERIALIZED VIEW test.simple_mat_view ON CLUSTER 'cluster' ENGINE = Memory AS SELECT * FROM system.numbers FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE test.simple_mat_view ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test.super_simple_view2 ON CLUSTER 'cluster' FORMAT TSV")
+def test_create_view(test_cluster):
+    instance = test_cluster.instances['ch3']
+    test_cluster.ddl_check_query(instance, "CREATE VIEW test.super_simple_view ON CLUSTER 'cluster' AS SELECT * FROM system.numbers FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "CREATE MATERIALIZED VIEW test.simple_mat_view ON CLUSTER 'cluster' ENGINE = Memory AS SELECT * FROM system.numbers FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "DROP TABLE test.simple_mat_view ON CLUSTER 'cluster' FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test.super_simple_view2 ON CLUSTER 'cluster' FORMAT TSV")
 
-    ddl_check_query(instance, "CREATE TABLE test.super_simple ON CLUSTER 'cluster' (i Int8) ENGINE = Memory")
-    ddl_check_query(instance, "RENAME TABLE test.super_simple TO test.super_simple2 ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE test.super_simple2 ON CLUSTER 'cluster'")
+    test_cluster.ddl_check_query(instance, "CREATE TABLE test.super_simple ON CLUSTER 'cluster' (i Int8) ENGINE = Memory")
+    test_cluster.ddl_check_query(instance, "RENAME TABLE test.super_simple TO test.super_simple2 ON CLUSTER 'cluster' FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "DROP TABLE test.super_simple2 ON CLUSTER 'cluster'")
 
 
-def test_on_server_fail(started_cluster):
-    instance = cluster.instances['ch1']
-    kill_instance = cluster.instances['ch2']
+def test_on_server_fail(test_cluster):
+    instance = test_cluster.instances['ch1']
+    kill_instance = test_cluster.instances['ch2']
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test.test_server_fail ON CLUSTER 'cluster'")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test.test_server_fail ON CLUSTER 'cluster'")
 
     kill_instance.get_docker_handle().stop()
     request = instance.get_query_request("CREATE TABLE test.test_server_fail ON CLUSTER 'cluster' (i Int8) ENGINE=Null", timeout=30)
     kill_instance.get_docker_handle().start()
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test.__nope__ ON CLUSTER 'cluster'")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test.__nope__ ON CLUSTER 'cluster'")
 
     # Check query itself
-    check_all_hosts_sucesfully_executed(request.get_answer())
+    test_cluster.check_all_hosts_successfully_executed(request.get_answer())
 
     # And check query artefacts
     contents = instance.query("SELECT hostName() AS h FROM all_tables WHERE database='test' AND name='test_server_fail' ORDER BY h")
     assert TSV(contents) == TSV("ch1\nch2\nch3\nch4\n")
 
-    ddl_check_query(instance, "DROP TABLE test.test_server_fail ON CLUSTER 'cluster'")
+    test_cluster.ddl_check_query(instance, "DROP TABLE test.test_server_fail ON CLUSTER 'cluster'")
 
 
-def _test_on_connection_losses(cluster, zk_timeout):
-    instance = cluster.instances['ch1']
-    kill_instance = cluster.instances['ch2']
+def _test_on_connection_losses(test_cluster, zk_timeout):
+    instance = test_cluster.instances['ch1']
+    kill_instance = test_cluster.instances['ch2']
 
     with PartitionManager() as pm:
         pm.drop_instance_zk_connections(kill_instance)
@@ -185,171 +94,111 @@ def _test_on_connection_losses(cluster, zk_timeout):
         time.sleep(zk_timeout)
         pm.restore_instance_zk_connections(kill_instance)
 
-    check_all_hosts_sucesfully_executed(request.get_answer())
+    test_cluster.check_all_hosts_successfully_executed(request.get_answer())
 
 
-def test_on_connection_loss(started_cluster):
-    _test_on_connection_losses(cluster, 1.5) # connection loss will occur only (3 sec ZK timeout in config)
+def test_on_connection_loss(test_cluster):
+    _test_on_connection_losses(test_cluster, 1.5) # connection loss will occur only (3 sec ZK timeout in config)
 
 
-def test_on_session_expired(started_cluster):
-    _test_on_connection_losses(cluster, 4) # session should be expired (3 sec ZK timeout in config)
+def test_on_session_expired(test_cluster):
+    _test_on_connection_losses(test_cluster, 4) # session should be expired (3 sec ZK timeout in config)
 
 
-def test_replicated_alters(started_cluster):
-    instance = cluster.instances['ch2']
+def test_simple_alters(test_cluster):
+    instance = test_cluster.instances['ch2']
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS merge_for_alter ON CLUSTER cluster")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS merge ON CLUSTER cluster_without_replication")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster_without_replication")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster_without_replication")
 
-    # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
-    firewall_drops_rules = cluster.pm_random_drops.pop_rules()
-
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS merge_for_alter ON CLUSTER cluster (p Date, i Int32)
-ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', p, p, 1)
-""")
-
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER cluster (p Date, i Int32)
-ENGINE = Distributed(cluster, default, merge_for_alter, i)
-""")
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_merge_64 ON CLUSTER cluster (p Date, i Int64, s String)
-ENGINE = Distributed(cluster, default, merge_for_alter, i)
-""")
-
-    for i in xrange(4):
-        k = (i / 2) * 2
-        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (i) VALUES ({})({})".format(k, k+1))
-
-    sync_replicas("merge_for_alter")
-
-    assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
-
-
-    ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster MODIFY COLUMN i Int64")
-    ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster ADD COLUMN s DEFAULT toString(i)")
-
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
-
-
-    for i in xrange(4):
-        k = (i / 2) * 2 + 4
-        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (p, i) VALUES (31, {})(31, {})".format(k, k+1))
-
-    sync_replicas("merge_for_alter")
-
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
-
-
-    ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster DETACH PARTITION 197002")
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
-
-    ddl_check_query(instance, "DROP TABLE merge_for_alter ON CLUSTER cluster")
-
-    # Enable random ZK packet drops
-    cluster.pm_random_drops.push_rules(firewall_drops_rules)
-
-    ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster")
-    ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster")
-
-
-def test_simple_alters(started_cluster):
-    instance = cluster.instances['ch2']
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS merge ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster_without_replication")
-
-    ddl_check_query(instance, """
+    test_cluster.ddl_check_query(instance, """
 CREATE TABLE IF NOT EXISTS merge ON CLUSTER cluster_without_replication (p Date, i Int32)
 ENGINE = MergeTree(p, p, 1)
 """)
-    ddl_check_query(instance, """
+    test_cluster.ddl_check_query(instance, """
 CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER cluster_without_replication (p Date, i Int32)
 ENGINE = Distributed(cluster_without_replication, default, merge, i)
 """)
-    ddl_check_query(instance, """
+    test_cluster.ddl_check_query(instance, """
 CREATE TABLE IF NOT EXISTS all_merge_64 ON CLUSTER cluster_without_replication (p Date, i Int64, s String)
 ENGINE = Distributed(cluster_without_replication, default, merge, i)
 """)
 
     for i in xrange(4):
         k = (i / 2) * 2
-        cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (i) VALUES ({})({})".format(k, k+1))
+        test_cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (i) VALUES ({})({})".format(k, k+1))
 
     assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
 
 
     time.sleep(5)
-    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication MODIFY COLUMN i Int64")
+    test_cluster.ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication MODIFY COLUMN i Int64")
     time.sleep(5)
-    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication ADD COLUMN s DEFAULT toString(i) FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication ADD COLUMN s DEFAULT toString(i) FORMAT TSV")
 
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
 
 
     for i in xrange(4):
         k = (i / 2) * 2 + 4
-        cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (p, i) VALUES (31, {})(31, {})".format(k, k+1))
+        test_cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (p, i) VALUES (31, {})(31, {})".format(k, k+1))
 
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
 
 
-    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication DETACH PARTITION 197002")
+    test_cluster.ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication DETACH PARTITION 197002")
     assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
 
-    ddl_check_query(instance, "DROP TABLE merge ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster_without_replication")
+    test_cluster.ddl_check_query(instance, "DROP TABLE merge ON CLUSTER cluster_without_replication")
+    test_cluster.ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster_without_replication")
+    test_cluster.ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster_without_replication")
 
 
-def test_macro(started_cluster):
-    instance = cluster.instances['ch2']
-    ddl_check_query(instance, "CREATE TABLE tab ON CLUSTER '{cluster}' (value UInt8) ENGINE = Memory")
+def test_macro(test_cluster):
+    instance = test_cluster.instances['ch2']
+    test_cluster.ddl_check_query(instance, "CREATE TABLE tab ON CLUSTER '{cluster}' (value UInt8) ENGINE = Memory")
 
     for i in xrange(4):
-        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO tab VALUES ({})".format(i))
+        test_cluster.insert_reliable(test_cluster.instances['ch{}'.format(i + 1)], "INSERT INTO tab VALUES ({})".format(i))
 
-    ddl_check_query(instance, "CREATE TABLE distr ON CLUSTER '{cluster}' (value UInt8) ENGINE = Distributed('{cluster}', 'default', 'tab', value % 4)")
+    test_cluster.ddl_check_query(instance, "CREATE TABLE distr ON CLUSTER '{cluster}' (value UInt8) ENGINE = Distributed('{cluster}', 'default', 'tab', value % 4)")
 
     assert TSV(instance.query("SELECT value FROM distr ORDER BY value")) == TSV('0\n1\n2\n3\n')
-    assert TSV( cluster.instances['ch3'].query("SELECT value FROM distr ORDER BY value")) == TSV('0\n1\n2\n3\n')
+    assert TSV(test_cluster.instances['ch3'].query("SELECT value FROM distr ORDER BY value")) == TSV('0\n1\n2\n3\n')
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS distr ON CLUSTER '{cluster}'")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS tab ON CLUSTER '{cluster}'")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS distr ON CLUSTER '{cluster}'")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS tab ON CLUSTER '{cluster}'")
 
 
-def test_implicit_macros(started_cluster):
+def test_implicit_macros(test_cluster):
     # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
-    firewall_drops_rules = cluster.pm_random_drops.pop_rules()
+    firewall_drops_rules = test_cluster.pm_random_drops.pop_rules()
 
-    instance = cluster.instances['ch2']
+    instance = test_cluster.instances['ch2']
 
-    ddl_check_query(instance, "DROP DATABASE IF EXISTS test_db ON CLUSTER '{cluster}'")
-    ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test_db ON CLUSTER '{cluster}'")
-    
-    ddl_check_query(instance, """
+    test_cluster.ddl_check_query(instance, "DROP DATABASE IF EXISTS test_db ON CLUSTER '{cluster}'")
+    test_cluster.ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test_db ON CLUSTER '{cluster}'")
+
+    test_cluster.ddl_check_query(instance, """
 CREATE TABLE IF NOT EXISTS test_db.test_macro ON CLUSTER '{cluster}' (p Date, i Int32)
 ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/{layer}-{shard}/{table}', '{replica}', p, p, 1)
 """)
 
     # Check that table was created at correct path in zookeeper
-    assert cluster.get_kazoo_client('zoo1').exists('/clickhouse/tables/test_db/0-1/test_macro') is not None
+    assert test_cluster.get_kazoo_client('zoo1').exists('/clickhouse/tables/test_db/0-1/test_macro') is not None
 
     # Enable random ZK packet drops
-    cluster.pm_random_drops.push_rules(firewall_drops_rules)
+    test_cluster.pm_random_drops.push_rules(firewall_drops_rules)
 
 
-def test_allowed_databases(started_cluster):
-    instance = cluster.instances['ch2']
+def test_allowed_databases(test_cluster):
+    instance = test_cluster.instances['ch2']
     instance.query("CREATE DATABASE IF NOT EXISTS db1 ON CLUSTER cluster")
     instance.query("CREATE DATABASE IF NOT EXISTS db2 ON CLUSTER cluster")
 
     instance.query("CREATE TABLE db1.t1 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"})
-    
+
     with pytest.raises(Exception):
         instance.query("CREATE TABLE db2.t2 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"})
     with pytest.raises(Exception):
@@ -359,45 +208,45 @@ def test_allowed_databases(started_cluster):
 
     instance.query("DROP DATABASE db1 ON CLUSTER cluster", settings={"user" : "restricted_user"})
 
-def test_kill_query(started_cluster):
-    instance = cluster.instances['ch3']
+def test_kill_query(test_cluster):
+    instance = test_cluster.instances['ch3']
 
-    ddl_check_query(instance, "KILL QUERY ON CLUSTER 'cluster' WHERE NOT elapsed FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "KILL QUERY ON CLUSTER 'cluster' WHERE NOT elapsed FORMAT TSV")
 
-def test_detach_query(started_cluster):
-    instance = cluster.instances['ch3']
+def test_detach_query(test_cluster):
+    instance = test_cluster.instances['ch3']
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_attach ON CLUSTER cluster FORMAT TSV")
-    ddl_check_query(instance, "CREATE TABLE test_attach ON CLUSTER cluster (i Int8)ENGINE = Log")
-    ddl_check_query(instance, "DETACH TABLE test_attach ON CLUSTER cluster FORMAT TSV")
-    ddl_check_query(instance, "ATTACH TABLE test_attach ON CLUSTER cluster")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test_attach ON CLUSTER cluster FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "CREATE TABLE test_attach ON CLUSTER cluster (i Int8)ENGINE = Log")
+    test_cluster.ddl_check_query(instance, "DETACH TABLE test_attach ON CLUSTER cluster FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "ATTACH TABLE test_attach ON CLUSTER cluster")
 
 
-def test_optimize_query(started_cluster):
-    instance = cluster.instances['ch3']
+def test_optimize_query(test_cluster):
+    instance = test_cluster.instances['ch3']
 
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_optimize ON CLUSTER cluster FORMAT TSV")
-    ddl_check_query(instance, "CREATE TABLE test_optimize ON CLUSTER cluster (p Date, i Int32) ENGINE = MergeTree(p, p, 8192)")
-    ddl_check_query(instance, "OPTIMIZE TABLE test_optimize ON CLUSTER cluster FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test_optimize ON CLUSTER cluster FORMAT TSV")
+    test_cluster.ddl_check_query(instance, "CREATE TABLE test_optimize ON CLUSTER cluster (p Date, i Int32) ENGINE = MergeTree(p, p, 8192)")
+    test_cluster.ddl_check_query(instance, "OPTIMIZE TABLE test_optimize ON CLUSTER cluster FORMAT TSV")
 
 
-def test_create_as_select(started_cluster):
-    instance = cluster.instances['ch2']
-    ddl_check_query(instance, "CREATE TABLE test_as_select ON CLUSTER cluster ENGINE = Memory AS (SELECT 1 AS x UNION ALL SELECT 2 AS x)")
+def test_create_as_select(test_cluster):
+    instance = test_cluster.instances['ch2']
+    test_cluster.ddl_check_query(instance, "CREATE TABLE test_as_select ON CLUSTER cluster ENGINE = Memory AS (SELECT 1 AS x UNION ALL SELECT 2 AS x)")
     assert TSV(instance.query("SELECT x FROM test_as_select ORDER BY x")) == TSV("1\n2\n")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_as_select ON CLUSTER cluster")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test_as_select ON CLUSTER cluster")
 
 
-def test_create_reserved(started_cluster):
-    instance = cluster.instances['ch2']
-    ddl_check_query(instance, "CREATE TABLE test_reserved ON CLUSTER cluster (`p` Date, `image` Nullable(String), `index` Nullable(Float64), `invalidate` Nullable(Int64)) ENGINE = MergeTree(`p`, `p`, 8192)")
-    ddl_check_query(instance, "CREATE TABLE test_as_reserved ON CLUSTER cluster ENGINE = Memory AS (SELECT * from test_reserved)")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_reserved ON CLUSTER cluster")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_as_reserved ON CLUSTER cluster")
+def test_create_reserved(test_cluster):
+    instance = test_cluster.instances['ch2']
+    test_cluster.ddl_check_query(instance, "CREATE TABLE test_reserved ON CLUSTER cluster (`p` Date, `image` Nullable(String), `index` Nullable(Float64), `invalidate` Nullable(Int64)) ENGINE = MergeTree(`p`, `p`, 8192)")
+    test_cluster.ddl_check_query(instance, "CREATE TABLE test_as_reserved ON CLUSTER cluster ENGINE = Memory AS (SELECT * from test_reserved)")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test_reserved ON CLUSTER cluster")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test_as_reserved ON CLUSTER cluster")
 
 
 if __name__ == '__main__':
-    with contextmanager(started_cluster)() as cluster:
-       for name, instance in cluster.instances.items():
+    with contextmanager(test_cluster)() as ctx_cluster:
+       for name, instance in ctx_cluster.instances.items():
            print name, instance.ip_address
        raw_input("Cluster created, press any key to destroy...")
diff --git a/dbms/tests/integration/test_distributed_ddl/test_replicated_alter.py b/dbms/tests/integration/test_distributed_ddl/test_replicated_alter.py
new file mode 100644
index 00000000000..8db9182ad0e
--- /dev/null
+++ b/dbms/tests/integration/test_distributed_ddl/test_replicated_alter.py
@@ -0,0 +1,93 @@
+import os
+import sys
+import time
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from helpers.test_tools import TSV
+from .cluster import ClickHouseClusterWithDDLHelpers
+
+
+@pytest.fixture(scope="module", params=["configs", "configs_secure"])
+def test_cluster(request):
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
+
+    try:
+        # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.
+        cluster.prepare(replace_hostnames_with_ips=False)
+
+        yield cluster
+
+        instance = cluster.instances['ch1']
+        cluster.ddl_check_query(instance, "DROP DATABASE test ON CLUSTER 'cluster'")
+        cluster.ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
+
+        # Check query log to ensure that DDL queries are not executed twice
+        time.sleep(1.5)
+        for instance in cluster.instances.values():
+            cluster.ddl_check_there_are_no_dublicates(instance)
+
+        cluster.pm_random_drops.heal_all()
+
+    finally:
+        cluster.shutdown()
+
+
+def test_replicated_alters(test_cluster):
+    instance = test_cluster.instances['ch2']
+
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS merge_for_alter ON CLUSTER cluster")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster")
+    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster")
+
+    # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
+    firewall_drops_rules = test_cluster.pm_random_drops.pop_rules()
+
+    test_cluster.ddl_check_query(instance, """
+CREATE TABLE IF NOT EXISTS merge_for_alter ON CLUSTER cluster (p Date, i Int32)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', p, p, 1)
+""")
+
+    test_cluster.ddl_check_query(instance, """
+CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER cluster (p Date, i Int32)
+ENGINE = Distributed(cluster, default, merge_for_alter, i)
+""")
+    test_cluster.ddl_check_query(instance, """
+CREATE TABLE IF NOT EXISTS all_merge_64 ON CLUSTER cluster (p Date, i Int64, s String)
+ENGINE = Distributed(cluster, default, merge_for_alter, i)
+""")
+
+    for i in xrange(4):
+        k = (i / 2) * 2
+        test_cluster.insert_reliable(test_cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (i) VALUES ({})({})".format(k, k+1))
+
+    test_cluster.sync_replicas("merge_for_alter")
+
+    assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
+
+
+    test_cluster.ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster MODIFY COLUMN i Int64")
+    test_cluster.ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster ADD COLUMN s DEFAULT toString(i)")
+
+    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
+
+
+    for i in xrange(4):
+        k = (i / 2) * 2 + 4
+        test_cluster.insert_reliable(test_cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (p, i) VALUES (31, {})(31, {})".format(k, k+1))
+
+    test_cluster.sync_replicas("merge_for_alter")
+
+    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
+
+    test_cluster.ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster DETACH PARTITION 197002")
+    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
+
+    test_cluster.ddl_check_query(instance, "DROP TABLE merge_for_alter ON CLUSTER cluster")
+
+    # Enable random ZK packet drops
+    test_cluster.pm_random_drops.push_rules(firewall_drops_rules)
+
+    test_cluster.ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster")
+    test_cluster.ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster")
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/__init__.py b/dbms/tests/integration/test_distributed_ddl_secure/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/dbms/tests/integration/test_distributed_ddl_secure/test.py b/dbms/tests/integration/test_distributed_ddl_secure/test.py
deleted file mode 100755
index fa8b35c605d..00000000000
--- a/dbms/tests/integration/test_distributed_ddl_secure/test.py
+++ /dev/null
@@ -1,395 +0,0 @@
-import os
-import os.path as p
-import sys
-import time
-import datetime
-import pytest
-from contextlib import contextmanager
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from helpers.cluster import ClickHouseCluster
-from helpers.network import PartitionManager, PartitionManagerDisbaler
-from helpers.test_tools import TSV
-
-
-def check_all_hosts_sucesfully_executed(tsv_content, num_hosts=None):
-    if num_hosts is None:
-        num_hosts = len(cluster.instances)
-
-    M = TSV.toMat(tsv_content)
-    hosts = [(l[0], l[1]) for l in M] # (host, port)
-    codes = [l[2] for l in M]
-    messages = [l[3] for l in M]
-
-    assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content
-    assert len(set(codes)) == 1, "\n" + tsv_content
-    assert codes[0] == "0", "\n" + tsv_content
-
-
-def ddl_check_query(instance, query, num_hosts=None):
-    contents = instance.query(query)
-    check_all_hosts_sucesfully_executed(contents, num_hosts)
-    return contents
-
-def ddl_check_there_are_no_dublicates(instance):
-    rows = instance.query("SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)")
-    assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name, instance.ip_address)
-
-# Make retries in case of UNKNOWN_STATUS_OF_INSERT or zkutil::KeeperException errors
-def insert_reliable(instance, query_insert):
-    for i in xrange(100):
-        try:
-            instance.query(query_insert)
-            return
-        except Exception as e:
-            last_exception = e
-            s = str(e)
-            if not (s.find('Unknown status, client must retry') >= 0 or s.find('zkutil::KeeperException')):
-                raise e
-
-    raise last_exception
-
-def sync_replicas(table, timeout=5):
-    for instance in cluster.instances.values():
-        instance.query("SYSTEM SYNC REPLICA {}".format(table), timeout=timeout)
-
-cluster = ClickHouseCluster(__file__)
-
-
-def replace_domains_to_ip_addresses_in_cluster_config(instances_to_replace):
-    clusters_config = open(p.join(cluster.base_dir, 'configs/config.d/clusters.xml')).read()
-
-    for inst_name, inst in cluster.instances.items():
-        clusters_config = clusters_config.replace(inst_name, str(inst.ip_address))
-
-    for inst_name in instances_to_replace:
-        inst = cluster.instances[inst_name]
-        cluster.instances[inst_name].exec_in_container(['bash', '-c', 'echo "$NEW_CONFIG" > /etc/clickhouse-server/config.d/clusters.xml'], environment={"NEW_CONFIG": clusters_config}, privileged=True)
-        # print cluster.instances[inst_name].exec_in_container(['cat', "/etc/clickhouse-server/config.d/clusters.xml"])
-
-
-def init_cluster(cluster):
-    try:
-        for i in xrange(4):
-            cluster.add_instance(
-                'ch{}'.format(i+1),
-                config_dir="configs",
-                macros={"layer": 0, "shard": i/2 + 1, "replica": i%2 + 1},
-                with_zookeeper=True)
-
-        cluster.start()
-
-        # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.
-        # Replace config files for testing ability to set host in DNS and IP formats
-        # replace_domains_to_ip_addresses_in_cluster_config(['ch1', 'ch3'])
-
-        # Select sacrifice instance to test CONNECTION_LOSS and server fail on it
-        sacrifice = cluster.instances['ch4']
-        cluster.pm_random_drops = PartitionManager()
-        cluster.pm_random_drops._add_rule({'probability': 0.01, 'destination': sacrifice.ip_address, 'source_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
-        cluster.pm_random_drops._add_rule({'probability': 0.01, 'source': sacrifice.ip_address, 'destination_port': 2181, 'action': 'REJECT --reject-with tcp-reset'})
-
-        # Initialize databases and service tables
-        instance = cluster.instances['ch1']
-
-        ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
-    (database String, name String, engine String, metadata_modification_time DateTime)
-    ENGINE = Distributed('cluster_no_replicas', 'system', 'tables')
-        """)
-
-        ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test ON CLUSTER 'cluster'")
-
-    except Exception as e:
-        print e
-        raise
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        init_cluster(cluster)
-
-        yield cluster
-
-        instance = cluster.instances['ch1']
-        ddl_check_query(instance, "DROP DATABASE test ON CLUSTER 'cluster'")
-        ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
-
-        # Check query log to ensure that DDL queries are not executed twice
-        time.sleep(1.5)
-        for instance in cluster.instances.values():
-            ddl_check_there_are_no_dublicates(instance)
-
-        cluster.pm_random_drops.heal_all()
-
-    finally:
-        cluster.shutdown()
-
-
-def test_default_database(started_cluster):
-    instance = cluster.instances['ch3']
-
-    ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test2 ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "CREATE TABLE null ON CLUSTER 'cluster2' (s String DEFAULT 'escape\t\nme') ENGINE = Null")
-
-    contents = instance.query("SELECT hostName() AS h, database FROM all_tables WHERE name = 'null' ORDER BY h")
-    assert TSV(contents) == TSV("ch1\tdefault\nch2\ttest2\nch3\tdefault\nch4\ttest2\n")
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER cluster2")
-    ddl_check_query(instance, "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
-
-
-def test_create_view(started_cluster):
-    instance = cluster.instances['ch3']
-    ddl_check_query(instance, "CREATE VIEW test.super_simple_view ON CLUSTER 'cluster' AS SELECT * FROM system.numbers FORMAT TSV")
-    ddl_check_query(instance, "CREATE MATERIALIZED VIEW test.simple_mat_view ON CLUSTER 'cluster' ENGINE = Memory AS SELECT * FROM system.numbers FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE test.simple_mat_view ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test.super_simple_view2 ON CLUSTER 'cluster' FORMAT TSV")
-
-    ddl_check_query(instance, "CREATE TABLE test.super_simple ON CLUSTER 'cluster' (i Int8) ENGINE = Memory")
-    ddl_check_query(instance, "RENAME TABLE test.super_simple TO test.super_simple2 ON CLUSTER 'cluster' FORMAT TSV")
-    ddl_check_query(instance, "DROP TABLE test.super_simple2 ON CLUSTER 'cluster'")
-
-
-def test_on_server_fail(started_cluster):
-    instance = cluster.instances['ch1']
-    kill_instance = cluster.instances['ch2']
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test.test_server_fail ON CLUSTER 'cluster'")
-
-    kill_instance.get_docker_handle().stop()
-    request = instance.get_query_request("CREATE TABLE test.test_server_fail ON CLUSTER 'cluster' (i Int8) ENGINE=Null", timeout=30)
-    kill_instance.get_docker_handle().start()
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test.__nope__ ON CLUSTER 'cluster'")
-
-    # Check query itself
-    check_all_hosts_sucesfully_executed(request.get_answer())
-
-    # And check query artefacts
-    contents = instance.query("SELECT hostName() AS h FROM all_tables WHERE database='test' AND name='test_server_fail' ORDER BY h")
-    assert TSV(contents) == TSV("ch1\nch2\nch3\nch4\n")
-
-    ddl_check_query(instance, "DROP TABLE test.test_server_fail ON CLUSTER 'cluster'")
-
-
-def _test_on_connection_losses(cluster, zk_timeout):
-    instance = cluster.instances['ch1']
-    kill_instance = cluster.instances['ch2']
-
-    with PartitionManager() as pm:
-        pm.drop_instance_zk_connections(kill_instance)
-        request = instance.get_query_request("DROP TABLE IF EXISTS test.__nope__ ON CLUSTER 'cluster'", timeout=10)
-        time.sleep(zk_timeout)
-        pm.restore_instance_zk_connections(kill_instance)
-
-    check_all_hosts_sucesfully_executed(request.get_answer())
-
-
-def test_on_connection_loss(started_cluster):
-    _test_on_connection_losses(cluster, 1.5) # connection loss will occur only (3 sec ZK timeout in config)
-
-
-def test_on_session_expired(started_cluster):
-    _test_on_connection_losses(cluster, 4) # session should be expired (3 sec ZK timeout in config)
-
-
-def test_replicated_alters(started_cluster):
-    instance = cluster.instances['ch2']
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS merge_for_alter ON CLUSTER cluster")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster")
-
-    # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
-    firewall_drops_rules = cluster.pm_random_drops.pop_rules()
-
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS merge_for_alter ON CLUSTER cluster (p Date, i Int32)
-ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', p, p, 1)
-""")
-
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER cluster (p Date, i Int32)
-ENGINE = Distributed(cluster, default, merge_for_alter, i)
-""")
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_merge_64 ON CLUSTER cluster (p Date, i Int64, s String)
-ENGINE = Distributed(cluster, default, merge_for_alter, i)
-""")
-
-    for i in xrange(4):
-        k = (i / 2) * 2
-        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (i) VALUES ({})({})".format(k, k+1))
-
-    sync_replicas("merge_for_alter")
-
-    assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
-
-
-    ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster MODIFY COLUMN i Int64")
-    ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster ADD COLUMN s DEFAULT toString(i)")
-
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
-
-
-    for i in xrange(4):
-        k = (i / 2) * 2 + 4
-        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO merge_for_alter (p, i) VALUES (31, {})(31, {})".format(k, k+1))
-
-    sync_replicas("merge_for_alter")
-
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
-
-
-    ddl_check_query(instance, "ALTER TABLE merge_for_alter ON CLUSTER cluster DETACH PARTITION 197002")
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
-
-    ddl_check_query(instance, "DROP TABLE merge_for_alter ON CLUSTER cluster")
-
-    # Enable random ZK packet drops
-    cluster.pm_random_drops.push_rules(firewall_drops_rules)
-
-    ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster")
-    ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster")
-
-
-def test_simple_alters(started_cluster):
-    instance = cluster.instances['ch2']
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS merge ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER cluster_without_replication")
-
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS merge ON CLUSTER cluster_without_replication (p Date, i Int32)
-ENGINE = MergeTree(p, p, 1)
-""")
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER cluster_without_replication (p Date, i Int32)
-ENGINE = Distributed(cluster_without_replication, default, merge, i)
-""")
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS all_merge_64 ON CLUSTER cluster_without_replication (p Date, i Int64, s String)
-ENGINE = Distributed(cluster_without_replication, default, merge, i)
-""")
-
-    for i in xrange(4):
-        k = (i / 2) * 2
-        cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (i) VALUES ({})({})".format(k, k+1))
-
-    assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(''.join(['{}\n'.format(x) for x in xrange(4)]))
-
-
-    time.sleep(5)
-    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication MODIFY COLUMN i Int64")
-    time.sleep(5)
-    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication ADD COLUMN s DEFAULT toString(i) FORMAT TSV")
-
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
-
-
-    for i in xrange(4):
-        k = (i / 2) * 2 + 4
-        cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (p, i) VALUES (31, {})(31, {})".format(k, k+1))
-
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(8)]))
-
-
-    ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER cluster_without_replication DETACH PARTITION 197002")
-    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(''.join(['{}\t{}\n'.format(x,x) for x in xrange(4)]))
-
-    ddl_check_query(instance, "DROP TABLE merge ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER cluster_without_replication")
-    ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER cluster_without_replication")
-
-
-def test_macro(started_cluster):
-    instance = cluster.instances['ch2']
-    ddl_check_query(instance, "CREATE TABLE tab ON CLUSTER '{cluster}' (value UInt8) ENGINE = Memory")
-
-    for i in xrange(4):
-        insert_reliable(cluster.instances['ch{}'.format(i + 1)], "INSERT INTO tab VALUES ({})".format(i))
-
-    ddl_check_query(instance, "CREATE TABLE distr ON CLUSTER '{cluster}' (value UInt8) ENGINE = Distributed('{cluster}', 'default', 'tab', value % 4)")
-
-    assert TSV(instance.query("SELECT value FROM distr ORDER BY value")) == TSV('0\n1\n2\n3\n')
-    assert TSV( cluster.instances['ch3'].query("SELECT value FROM distr ORDER BY value")) == TSV('0\n1\n2\n3\n')
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS distr ON CLUSTER '{cluster}'")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS tab ON CLUSTER '{cluster}'")
-
-
-def test_implicit_macros(started_cluster):
-    # Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
-    firewall_drops_rules = cluster.pm_random_drops.pop_rules()
-
-    instance = cluster.instances['ch2']
-
-    ddl_check_query(instance, "DROP DATABASE IF EXISTS test_db ON CLUSTER '{cluster}'")
-    ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS test_db ON CLUSTER '{cluster}'")
-    
-    ddl_check_query(instance, """
-CREATE TABLE IF NOT EXISTS test_db.test_macro ON CLUSTER '{cluster}' (p Date, i Int32)
-ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/{layer}-{shard}/{table}', '{replica}', p, p, 1)
-""")
-
-    # Check that table was created at correct path in zookeeper
-    assert cluster.get_kazoo_client('zoo1').exists('/clickhouse/tables/test_db/0-1/test_macro') is not None
-
-    # Enable random ZK packet drops
-    cluster.pm_random_drops.push_rules(firewall_drops_rules)
-
-
-def test_allowed_databases(started_cluster):
-    instance = cluster.instances['ch2']
-    instance.query("CREATE DATABASE IF NOT EXISTS db1 ON CLUSTER cluster")
-    instance.query("CREATE DATABASE IF NOT EXISTS db2 ON CLUSTER cluster")
-
-    instance.query("CREATE TABLE db1.t1 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"})
-    
-    with pytest.raises(Exception):
-        instance.query("CREATE TABLE db2.t2 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"})
-    with pytest.raises(Exception):
-        instance.query("CREATE TABLE t3 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"})
-    with pytest.raises(Exception):
-        instance.query("DROP DATABASE db2 ON CLUSTER cluster", settings={"user" : "restricted_user"})
-
-    instance.query("DROP DATABASE db1 ON CLUSTER cluster", settings={"user" : "restricted_user"})
-
-def test_kill_query(started_cluster):
-    instance = cluster.instances['ch3']
-
-    ddl_check_query(instance, "KILL QUERY ON CLUSTER 'cluster' WHERE NOT elapsed FORMAT TSV")
-
-def test_detach_query(started_cluster):
-    instance = cluster.instances['ch3']
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_attach ON CLUSTER cluster FORMAT TSV")
-    ddl_check_query(instance, "CREATE TABLE test_attach ON CLUSTER cluster (i Int8)ENGINE = Log")
-    ddl_check_query(instance, "DETACH TABLE test_attach ON CLUSTER cluster FORMAT TSV")
-    ddl_check_query(instance, "ATTACH TABLE test_attach ON CLUSTER cluster")
-
-
-def test_optimize_query(started_cluster):
-    instance = cluster.instances['ch3']
-
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_optimize ON CLUSTER cluster FORMAT TSV")
-    ddl_check_query(instance, "CREATE TABLE test_optimize ON CLUSTER cluster (p Date, i Int32) ENGINE = MergeTree(p, p, 8192)")
-    ddl_check_query(instance, "OPTIMIZE TABLE test_optimize ON CLUSTER cluster FORMAT TSV")
-
-
-def test_create_as_select(started_cluster):
-    instance = cluster.instances['ch2']
-    ddl_check_query(instance, "CREATE TABLE test_as_select ON CLUSTER cluster ENGINE = Memory AS (SELECT 1 AS x UNION ALL SELECT 2 AS x)")
-    assert TSV(instance.query("SELECT x FROM test_as_select ORDER BY x")) == TSV("1\n2\n")
-    ddl_check_query(instance, "DROP TABLE IF EXISTS test_as_select ON CLUSTER cluster")
-
-
-if __name__ == '__main__':
-    with contextmanager(started_cluster)() as cluster:
-       for name, instance in cluster.instances.items():
-           print name, instance.ip_address
-       raw_input("Cluster created, press any key to destroy...")

From 472cf81c3c3d23efa4c15abbaf3e5fef77505d90 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Tue, 5 Nov 2019 22:15:54 +0300
Subject: [PATCH 214/222] DOCAPI-7459: EN review, RU translation for
 skip_unavailable_shards docs (#6996)

* Typo fix.

* Update settings.md (#46)

* DOCAPI-7459: RU translation.

* DOCAPI-7459: Translation.

* DOCAPI-7459: Fix.

* DOCAPI-7459: Rewrited the whole text after Alexey's review.

* Update docs/en/operations/settings/settings.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>

* Update docs/en/operations/settings/settings.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>

* Update docs/en/operations/settings/settings.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>
---
 docs/en/operations/settings/settings.md       | 21 ++++++++-----
 docs/en/operations/table_engines/buffer.md    |  2 +-
 docs/en/operations/table_engines/mergetree.md |  2 +-
 .../ru/operations/server_settings/settings.md |  9 +++---
 docs/ru/operations/settings/settings.md       | 31 +++++++++++++++++++
 docs/ru/operations/table_engines/buffer.md    | 21 +++++++------
 6 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 159d0cbe7ff..17a79282d00 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -912,25 +912,32 @@ Default value: `uniqExact`.
 
 ## skip_unavailable_shards {#settings-skip_unavailable_shards}
 
-Enables or disables silent skipping of:
+Enables or disables silently skipping of unavailable shards.
 
-- Node, if its name cannot be resolved through DNS.
+Shard is considered unavailable if all its replicas are unavailable. A replica is unavailable in the following cases:
 
-    When skipping is disabled, ClickHouse requires that all the nodes in the [cluster configuration](../server_settings/settings.md#server_settings_remote_servers) can be resolvable through DNS. Otherwise, ClickHouse throws an exception when trying to perform a query on the cluster.
+- ClickHouse can't connect to replica for any reason.
 
-    If skipping is enabled, ClickHouse considers unresolved nodes as unavailable and tries to resolve them at every connection attempt. Such behavior creates the risk of wrong cluster configuration because a user can specify the wrong node name, and ClickHouse doesn't report about it. However, this can be useful in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error.
+    When connecting to a replica, ClickHouse performs several attempts. If all these attempts fail, the replica is considered unavailable.
 
-- Shard, if there are no available replicas of the shard.
+- Replica can't be resolved through DNS.
 
-    When skipping is disabled, ClickHouse throws an exception.
+    If replica's hostname can't be resolved through DNS, it can indicate the following situations:
 
-    When skipping is enabled, ClickHouse returns a partial answer and doesn't report about issues with nodes availability.
+    - Replica's host has no DNS record. It can occur in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error.
+
+    - Configuration error. ClickHouse configuration file contains a wrong hostname.
 
 Possible values:
 
 - 1 — skipping enabled.
+
+    If a shard is unavailable, ClickHouse returns a result based on partial data and doesn't report node availability issues. 
+
 - 0 — skipping disabled.
 
+    If a shard is unavailable, ClickHouse throws an exception.
+
 Default value: 0.
 
 ## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
diff --git a/docs/en/operations/table_engines/buffer.md b/docs/en/operations/table_engines/buffer.md
index 8f4035da19b..2c9e0dd8dc0 100644
--- a/docs/en/operations/table_engines/buffer.md
+++ b/docs/en/operations/table_engines/buffer.md
@@ -10,7 +10,7 @@ Engine parameters:
 
 - `database` – Database name. Instead of the database name, you can use a constant expression that returns a string.
 - `table` – Table to flush data to. 
-- `num_layers` – Parallelism layer. Physically, the table will be represented as 'num_layers' of independent buffers. Recommended value: 16.
+- `num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
 - `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` – Conditions for flushing data from the buffer.
 
 Data is flushed from the buffer and written to the destination table if all the `min*` conditions or at least one `max*` condition are met.
diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md
index e4dfa6e25c0..5a541e77d09 100644
--- a/docs/en/operations/table_engines/mergetree.md
+++ b/docs/en/operations/table_engines/mergetree.md
@@ -175,7 +175,7 @@ The number of columns in the primary key is not explicitly limited. Depending on
 - Improve the performance of an index.
 
     If the primary key is `(a, b)`, then adding another column `c` will improve the performance if the following conditions are met:
-    
+
     - There are queries with a condition on column `c`.
     - Long data ranges (several times longer than the `index_granularity`) with identical values for `(a, b)` are common. In other words, when adding another column allows you to skip quite long data ranges.
 
diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md
index 0bec350b15f..50bbb6b1a5d 100644
--- a/docs/ru/operations/server_settings/settings.md
+++ b/docs/ru/operations/server_settings/settings.md
@@ -580,11 +580,9 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat
 ```
 
 
-## remote_servers
+## remote_servers {#server_settings_remote_servers}
 
-Конфигурация кластеров, которые использует движок таблиц Distributed.
-
-Пример настройки смотрите в разделе "[Движки таблиц/Distributed](../../operations/table_engines/distributed.md)".
+Конфигурация кластеров, которые использует движок таблиц [Distributed](../../operations/table_engines/distributed.md) и табличная функция `cluster`.
 
 **Пример**
 
@@ -595,6 +593,9 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat
 Значение атрибута `incl` смотрите в разделе "[Конфигурационные файлы](../configuration_files.md#configuration_files)".
 
 
+**Смотрите также**
+
+ - [skip_unavailable_shards](../settings/settings.md#settings-skip_unavailable_shards)
 
 ## timezone
 
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index d37e0911698..78d177f7452 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -853,6 +853,36 @@ load_balancing = first_or_random
 - [Множественный JOIN](../../query_language/select.md#select-join)
 
 
+## skip_unavailable_shards {#settings-skip_unavailable_shards}
+
+Включает или отключает тихий пропуск недоступных шардов.
+
+Шард считается недоступным, если все его реплики недоступны. Реплика недоступна в следующих случаях:
+
+- ClickHouse не может установить соединение с репликой по любой причине.
+
+    ClickHouse предпринимает несколько попыток подключиться к реплике. Если все попытки оказались неудачными, реплика считается недоступной.
+
+- Реплика не может быть разрешена с помощью DNS.
+
+    Если имя хоста реплики не может быть разрешено с помощью DNS, это может указывать на следующие ситуации:
+
+    - Нет записи DNS для хоста. Это может происходить в системах с динамическим DNS, например, [Kubernetes](https://kubernetes.io), где отключенные ноды не разрешаться с помощью DNS и это не ошибка.
+
+    - Ошибка конфигурации. Конфигурационный файл ClickHouse может содержать неправильное имя хоста.
+
+Возможные значения:
+
+- 1 — пропуск включен.
+
+    Если шард недоступен, то ClickHouse возвращает результат, основанный на неполных данных и не оповещает о проблемах с доступностью хостов.
+
+- 0 — пропуск выключен.
+
+    Если шард недоступен, то ClickHouse генерирует исключение.
+
+Значение по умолчанию — 0.
+
 ## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
 
 Включает или отключает генерирование исключения в в случаях, когда запрос [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) не выполняет мёрж.
@@ -866,6 +896,7 @@ load_balancing = first_or_random
 
 Значение по умолчанию — 0.
 
+
 ## os_thread_priority {#setting-os_thread_priority}
 
 Устанавливает приоритет ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) для потоков, исполняющих запросы. Планировщик ОС учитывает эти приоритеты при выборе следующего потока для исполнения на доступном ядре CPU.
diff --git a/docs/ru/operations/table_engines/buffer.md b/docs/ru/operations/table_engines/buffer.md
index bf3c1b450fc..964897162c2 100644
--- a/docs/ru/operations/table_engines/buffer.md
+++ b/docs/ru/operations/table_engines/buffer.md
@@ -7,18 +7,21 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_
 ```
 
 Параметры движка:
-database, table - таблица, в которую сбрасывать данные. Вместо имени базы данных может использоваться константное выражение, возвращающее строку.
-num_layers - уровень параллелизма. Физически таблица будет представлена в виде num_layers независимых буферов. Рекомендуемое значение - 16.
-min_time, max_time, min_rows, max_rows, min_bytes, max_bytes - условия для сброса данных из буфера.
 
-Данные сбрасываются из буфера и записываются в таблицу назначения, если выполнены все min-условия или хотя бы одно max-условие.
-min_time, max_time - условие на время в секундах от момента первой записи в буфер;
-min_rows, max_rows - условие на количество строк в буфере;
-min_bytes, max_bytes - условие на количество байт в буфере.
+`database` — имя базы данных.  Вместо имени базы данных может использоваться константное выражение, возвращающее строку.
+`table` — таблица, в которую сбрасывать данные.
+`num_layers` — уровень параллелизма. Физически таблица будет представлена в виде `num_layers` независимых буферов. Рекомендуемое значение — 16.
+`min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, `max_bytes` — условия для сброса данных из буфера.
 
-При записи, данные вставляются в случайный из num_layers буферов. Или, если размер куска вставляемых данных достаточно большой (больше max_rows или max_bytes), то он записывается в таблицу назначения минуя буфер.
+Данные сбрасываются из буфера и записываются в таблицу назначения, если выполнены все `min`-условия или хотя бы одно `max`-условие.
 
-Условия для сброса данных учитываются отдельно для каждого из num_layers буферов. Например, если num_layers = 16 и max_bytes = 100000000, то максимальный расход оперативки будет 1.6 GB.
+- `min_time`, `max_time` — условие на время в секундах от момента первой записи в буфер.
+- `min_rows`, `max_rows` — условие на количество строк в буфере.
+- `min_bytes`, `max_bytes` — условие на количество байт в буфере.
+
+При записи, данные вставляются в случайный из `num_layers` буферов. Или, если размер куска вставляемых данных достаточно большой (больше `max_rows` или `max_bytes`), то он записывается в таблицу назначения минуя буфер.
+
+Условия для сброса данных учитываются отдельно для каждого из `num_layers` буферов. Например, если `num_layers = 16` и `max_bytes = 100000000`, то максимальный расход оперативки будет 1.6 GB.
 
 Пример:
 

From 9ac4e4c7fb53f801859037f2291566b3c58b76a1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 5 Nov 2019 22:31:07 +0300
Subject: [PATCH 215/222] Little better #7600

---
 dbms/src/Storages/Distributed/DirectoryMonitor.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
index 00129757591..a9b51aebdd4 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -6,6 +6,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/SipHash.h>
+#include <Common/quoteString.h>
 #include <Interpreters/Context.h>
 #include <Storages/Distributed/DirectoryMonitor.h>
 #include <IO/ReadBufferFromFile.h>
@@ -360,7 +361,7 @@ struct StorageDistributedDirectoryMonitor::Batch
             String tmp_file{parent.current_batch_file_path + ".tmp"};
 
             if (Poco::File{tmp_file}.exists())
-                LOG_ERROR(parent.log, "Temporary file `" << tmp_file << "` exists. Unclean shutdown?");
+                LOG_ERROR(parent.log, "Temporary file " << backQuote(tmp_file) << " exists. Unclean shutdown?");
 
             {
                 WriteBufferFromFile out{tmp_file, O_WRONLY | O_TRUNC | O_CREAT};

From 950e95c178f8d52a56f65e7d7e7f9c1ca6b8fd4c Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Tue, 5 Nov 2019 23:22:20 +0300
Subject: [PATCH 216/222] support JOIN ON expression

---
 .../Interpreters/InterpreterSelectQuery.cpp   |  6 ++-
 dbms/src/Interpreters/Join.cpp                | 40 ++++----------
 dbms/src/Interpreters/Join.h                  |  2 +-
 .../00999_join_on_expression.reference        | 36 +++++++++++++
 .../0_stateless/00999_join_on_expression.sql  | 54 +++++++++++++++++++
 5 files changed, 107 insertions(+), 31 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00999_join_on_expression.reference
 create mode 100644 dbms/tests/queries/0_stateless/00999_join_on_expression.sql

diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index 0437f4a7c19..4aabce6f257 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -15,6 +15,7 @@
 #include <DataStreams/DistinctBlockInputStream.h>
 #include <DataStreams/NullBlockInputStream.h>
 #include <DataStreams/TotalsHavingBlockInputStream.h>
+#include <DataStreams/OneBlockInputStream.h>
 #include <DataStreams/copyData.h>
 #include <DataStreams/CreatingSetsBlockInputStream.h>
 #include <DataStreams/MaterializingBlockInputStream.h>
@@ -1179,7 +1180,10 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
 
                 if (JoinPtr join = expressions.before_join->getTableJoinAlgo())
                 {
-                    if (auto stream = join->createStreamWithNonJoinedRows(header_before_join, settings.max_block_size))
+                    Block join_result_sample = ExpressionBlockInputStream(
+                        std::make_shared<OneBlockInputStream>(header_before_join), expressions.before_join).getHeader();
+
+                    if (auto stream = join->createStreamWithNonJoinedRows(join_result_sample, settings.max_block_size))
                     {
                         if constexpr (pipeline_with_processors)
                         {
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 67ad6ef264a..12f319fdf7a 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -1066,9 +1066,10 @@ struct AdderNonJoined<ASTTableJoin::Strictness::Asof, Mapped>
 class NonJoinedBlockInputStream : public IBlockInputStream
 {
 public:
-    NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, UInt64 max_block_size_)
+    NonJoinedBlockInputStream(const Join & parent_, const Block & result_sample_block_, UInt64 max_block_size_)
         : parent(parent_)
         , max_block_size(max_block_size_)
+        , result_sample_block(materializeBlock(result_sample_block_))
     {
         bool remap_keys = parent.table_join->hasUsing();
         std::unordered_map<size_t, size_t> left_to_right_key_remap;
@@ -1078,16 +1079,18 @@ public:
             const String & left_key_name = parent.table_join->keyNamesLeft()[i];
             const String & right_key_name = parent.table_join->keyNamesRight()[i];
 
-            size_t left_key_pos = left_sample_block.getPositionByName(left_key_name);
+            size_t left_key_pos = result_sample_block.getPositionByName(left_key_name);
             size_t right_key_pos = parent.saved_block_sample.getPositionByName(right_key_name);
 
             if (remap_keys && !parent.required_right_keys.has(right_key_name))
                 left_to_right_key_remap[left_key_pos] = right_key_pos;
         }
 
-        makeResultSampleBlock(left_sample_block);
+        /// result_sample_block: left_sample_block + left expressions, right not key columns, required right keys
+        size_t left_columns_count = result_sample_block.columns() -
+            parent.sample_block_with_columns_to_add.columns() - parent.required_right_keys.columns();
 
-        for (size_t left_pos = 0; left_pos < left_sample_block.columns(); ++left_pos)
+        for (size_t left_pos = 0; left_pos < left_columns_count; ++left_pos)
         {
             /// We need right 'x' for 'RIGHT JOIN ... USING(x)'.
             if (left_to_right_key_remap.count(left_pos))
@@ -1108,7 +1111,7 @@ public:
             size_t result_position = result_sample_block.getPositionByName(name);
 
             /// Don't remap left keys twice. We need only qualified right keys here
-            if (result_position < left_sample_block.columns())
+            if (result_position < left_columns_count)
                 continue;
 
             setRightIndex(right_pos, result_position);
@@ -1140,7 +1143,7 @@ private:
     UInt64 max_block_size;
 
     Block result_sample_block;
-    /// Indices of columns in result_sample_block that come from the left-side table: left_pos == result_pos
+    /// Indices of columns in result_sample_block that should be generated
     std::vector<size_t> column_indices_left;
     /// Indices of columns that come from the right-side table: right_pos -> result_pos
     std::unordered_map<size_t, size_t> column_indices_right;
@@ -1152,27 +1155,6 @@ private:
     std::any position;
     std::optional<Join::BlockNullmapList::const_iterator> nulls_position;
 
-
-    /// "left" columns, "right" not key columns, some "right keys"
-    void makeResultSampleBlock(const Block & left_sample_block)
-    {
-        result_sample_block = materializeBlock(left_sample_block);
-        if (parent.nullable_left_side)
-            JoinCommon::convertColumnsToNullable(result_sample_block);
-
-        for (const ColumnWithTypeAndName & column : parent.sample_block_with_columns_to_add)
-        {
-            bool is_nullable = parent.nullable_right_side || column.column->isNullable();
-            result_sample_block.insert(correctNullability({column.column, column.type, column.name}, is_nullable));
-        }
-
-        for (const ColumnWithTypeAndName & right_key : parent.required_right_keys)
-        {
-            bool is_nullable = parent.nullable_right_side || right_key.column->isNullable();
-            result_sample_block.insert(correctNullability({right_key.column, right_key.type, right_key.name}, is_nullable));
-        }
-    }
-
     void setRightIndex(size_t right_pos, size_t result_position)
     {
         if (!column_indices_right.count(right_pos))
@@ -1328,10 +1310,10 @@ private:
 };
 
 
-BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, UInt64 max_block_size) const
+BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
 {
     if (isRightOrFull(table_join->kind()))
-        return std::make_shared<NonJoinedBlockInputStream>(*this, left_sample_block, max_block_size);
+        return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, max_block_size);
     return {};
 }
 
diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h
index 32ecd09c952..3553680b879 100644
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@@ -156,7 +156,7 @@ public:
       * Use only after all calls to joinBlock was done.
       * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside).
       */
-    BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, UInt64 max_block_size) const override;
+    BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const override;
 
     /// Number of keys in all built JOIN maps.
     size_t getTotalRowCount() const final;
diff --git a/dbms/tests/queries/0_stateless/00999_join_on_expression.reference b/dbms/tests/queries/0_stateless/00999_join_on_expression.reference
new file mode 100644
index 00000000000..b08c88c3e35
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00999_join_on_expression.reference
@@ -0,0 +1,36 @@
+0	2
+-
+0	2
+1	0
+-
+1	2
+-
+1	2
+-
+1	2
+-
+1	2
+-
+0	2
+-
+0	2
+1	0
+----
+\N	2
+-
+1	\N
+\N	2
+-
+1	2
+-
+1	2
+-
+1	2
+-
+1	2
+-
+\N	2
+-
+1	\N
+\N	2
+-
diff --git a/dbms/tests/queries/0_stateless/00999_join_on_expression.sql b/dbms/tests/queries/0_stateless/00999_join_on_expression.sql
new file mode 100644
index 00000000000..6da5ff8bd74
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00999_join_on_expression.sql
@@ -0,0 +1,54 @@
+drop table if exists X;
+drop table if exists Y;
+create table X (id Int64) Engine = Memory;
+create table Y (id Int64) Engine = Memory;
+
+insert into X (id) values (1);
+insert into Y (id) values (2);
+
+set join_use_nulls = 0;
+
+select X.id, Y.id from X right join Y on X.id = Y.id order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on Y.id = X.id order by X.id, Y.id;
+select '-';
+
+select X.id, Y.id from X right join Y on X.id = (Y.id - 1) order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on (Y.id - 1) = X.id order by X.id, Y.id;
+select '-';
+
+select X.id, Y.id from X right join Y on (X.id + 1) = Y.id order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on Y.id = (X.id + 1) order by X.id, Y.id;
+select '-';
+
+select X.id, Y.id from X right join Y on (X.id + 1) = (Y.id + 1) order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on (Y.id + 1) = (X.id + 1) order by X.id, Y.id;
+select '----';
+
+set join_use_nulls = 1;
+
+select X.id, Y.id from X right join Y on X.id = Y.id order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on Y.id = X.id order by X.id, Y.id;
+select '-';
+
+select X.id, Y.id from X right join Y on X.id = (Y.id - 1) order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on (Y.id - 1) = X.id order by X.id, Y.id;
+select '-';
+
+select X.id, Y.id from X right join Y on (X.id + 1) = Y.id order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on Y.id = (X.id + 1) order by X.id, Y.id;
+select '-';
+
+select X.id, Y.id from X right join Y on (X.id + 1) = (Y.id + 1) order by X.id, Y.id;
+select '-';
+select X.id, Y.id from X full join Y on (Y.id + 1) = (X.id + 1) order by X.id, Y.id;
+select '-';
+
+drop table X;
+drop table Y;

From 9446edb42a2df6cda7ba95cbfae7872bd461d9f8 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Wed, 6 Nov 2019 07:59:35 +0300
Subject: [PATCH 217/222] Fixed bug with `keep_free_space_ratio` not being read
 from disks configuration.

---
 dbms/src/Common/DiskSpaceMonitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Common/DiskSpaceMonitor.cpp b/dbms/src/Common/DiskSpaceMonitor.cpp
index a09daf6f677..732f6f34823 100644
--- a/dbms/src/Common/DiskSpaceMonitor.cpp
+++ b/dbms/src/Common/DiskSpaceMonitor.cpp
@@ -193,7 +193,7 @@ DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, con
 
         if (has_space_ratio)
         {
-            auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
+            auto ratio = config.getDouble(disk_config_prefix + ".keep_free_space_ratio");
             if (ratio < 0 || ratio > 1)
                 throw Exception("'keep_free_space_ratio' have to be between 0 and 1",
                                 ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);

From db299dacd97adb2af6e8bc598ed52f5c6be2867a Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Wed, 6 Nov 2019 08:24:33 +0300
Subject: [PATCH 218/222] DOCAPI-6422: EN review, RU translation. Docs for
 adaptive index granularity and some settings (#7381)

* Typo fix.

* Links fix.

* Fixed links in docs.

* More fixes.

* Link fixes.

* Update settings.md (#64)

* Update mergetree.md (#65)

* DOCAPI-6422: EN review. RU translation.

* Update docs/en/operations/settings/settings.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>

* Update docs/en/operations/settings/settings.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>

* DOCAPI-6422: Update.
---
 docs/en/operations/settings/settings.md       | 12 ++--
 docs/en/operations/table_engines/mergetree.md | 12 ++--
 docs/ru/operations/settings/settings.md       | 66 ++++++++++++++-----
 docs/ru/operations/table_engines/mergetree.md | 21 +++---
 4 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 17a79282d00..5def2bee8c2 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -424,7 +424,7 @@ Default value: 163840.
 
 ## merge_tree_min_bytes_for_concurrent_read {#setting-merge_tree_min_bytes_for_concurrent_read}
 
-If a number of bytes to read from one file of a [MergeTree*](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
+If the number of bytes to read from one file of a [MergeTree*](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file from several threads.
 
 Possible values:
 
@@ -445,7 +445,7 @@ Default value: 0.
 
 ## merge_tree_min_bytes_for_seek {#setting-merge_tree_min_bytes_for_seek}
 
-If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` rows, then ClickHouse does not seek through the file, but reads the data sequentially.
+If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads range of file that contains both blocks, thus avoiding extra seek.
 
 Possible values:
 
@@ -466,9 +466,9 @@ Default value: 8.
 
 ## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache}
 
-If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it does not use the cache of uncompressed blocks. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
+If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn't use the cache of uncompressed blocks. 
 
-The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries reading a large amount of data.
+The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
 
 Possible values:
 
@@ -479,9 +479,9 @@ Default value: 128 ✕ 8192.
 
 ## merge_tree_max_bytes_to_use_cache {#setting-merge_tree_max_bytes_to_use_cache}
 
-If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it does not use the cache of uncompressed blocks. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
+If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesn't use the cache of uncompressed blocks.
 
-The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries reading a large amount of data.
+The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
 
 Possible values:
 
diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md
index 5a541e77d09..465c883d684 100644
--- a/docs/en/operations/table_engines/mergetree.md
+++ b/docs/en/operations/table_engines/mergetree.md
@@ -79,13 +79,13 @@ For a description of parameters, see the [CREATE query description](../../query_
 
 - `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
     - `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
-    - `index_granularity_bytes` — Maximum size of data granule in bytes. Default value: 10Mb. To restrict the size of granule only by number of rows set 0 (not recommended). See [Data Storage](#mergetree-data-storage).
-    - `enable_mixed_granularity_parts` — Enables or disables transition to controlling the granule size with the `index_granularity_bytes` setting. Before the version 19.11 there was the only `index_granularity` setting for the granule size restriction. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from the tables with big rows (tens and hundreds of megabytes). So if you have tables with big rows, you can turn the setting on for the tables to get better efficiency of your `SELECT` queries.
+    - `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
+    - `enable_mixed_granularity_parts` — Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries.
     - `use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If  `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](../server_settings/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in "Server configuration parameters".
     - `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
     <a name="mergetree_setting-merge_with_ttl_timeout"></a>
     - `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with TTL. Default value: 86400 (1 day).
-    - `write_final_mark` — Enables or disables writing the final index mark at the end of data part. Default value: 1. Don't turn it off.
+    - `write_final_mark` — Enables or disables writing the final index mark at the end of the data part. Default value: 1. Don't turn it off.
 
 **Example of Sections Setting**
 
@@ -137,9 +137,9 @@ When data is inserted in a table, separate data parts are created and each of th
 
 Data belonging to different partitions are separated into different parts. In the background, ClickHouse merges data parts for more efficient storage. Parts belonging to different partitions are not merged. The merge mechanism does not guarantee that all rows with the same primary key will be in the same data part.
 
-Each data part is logically divided by granules. A granule is the smallest indivisible data set that ClickHouse reads when selecting data. ClickHouse doesn't split rows or values, so each granule always contains an integer number of rows. The first row of a granule is marked with the value of the primary key for this row. For each data part, ClickHouse creates an index file that stores the marks. For each column, whether it is in the primary key or not, ClickHouse also stores the same marks. These marks allow finding the data directly in the columns.
+Each data part is logically divided into granules. A granule is the smallest indivisible data set that ClickHouse reads when selecting data. ClickHouse doesn't split rows or values, so each granule always contains an integer number of rows. The first row of a granule is marked with the value of the primary key for the row. For each data part, ClickHouse creates an index file that stores the marks. For each column, whether it's in the primary key or not, ClickHouse also stores the same marks. These marks let you find data directly in column files.
 
-The size of a granule is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in granule lays in the `[1, index_granularity]` range, depending on the size of rows. The size of a granule can exceed `index_granularity_bytes` if the size of the single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row.
+The granule size is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in a granule lays in the `[1, index_granularity]` range, depending on the size of the rows. The size of a granule can exceed `index_granularity_bytes` if the size of a single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row.
 
 ## Primary Keys and Indexes in Queries {#primary-keys-and-indexes-in-queries}
 
@@ -164,7 +164,7 @@ The examples above show that it is always more effective to use an index than a
 
 A sparse index allows extra data to be read. When reading a single range of the primary key, up to `index_granularity * 2` extra rows in each data block can be read.
 
-Sparse indexes allow you to work with a very large number of table rows, because such indexes fit the computer's RAM in the very most cases.
+Sparse indexes allow you to work with a very large number of table rows, because in most cases, such indexes fit in the computer's RAM.
 
 ClickHouse does not require a unique primary key. You can insert multiple rows with the same primary key.
 
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 78d177f7452..81cff172f98 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -384,52 +384,86 @@ Ok.
 
 При чтении из таблиц [MergeTree*](../table_engines/mergetree.md) ClickHouse использует несколько потоков. Этот параметр включает/выключает равномерное распределение заданий по рабочим потокам. Алгоритм равномерного распределения стремится сделать время выполнения всех потоков примерно равным для одного запроса `SELECT`.
 
-**Возможные значения**
+Возможные значения:
 
 - 0 — не использовать равномерное распределение заданий на чтение.
 - 1 — использовать равномерное распределение заданий на чтение.
 
-**Значение по умолчанию**: 1.
+Значение по умолчанию — 1.
 
 ## merge_tree_min_rows_for_concurrent_read {#setting-merge_tree_min_rows_for_concurrent_read}
 
 Если количество строк, считываемых из файла таблицы [MergeTree*](../table_engines/mergetree.md) превышает `merge_tree_min_rows_for_concurrent_read`, то ClickHouse пытается выполнить одновременное чтение из этого файла в несколько потоков.
 
-**Возможные значения**
+Возможные значения:
 
-Любое положительное целое число.
+- Любое положительное целое число.
 
-**Значение по умолчанию**: 163840.
+Значение по умолчанию — 163840.
+
+## merge_tree_min_bytes_for_concurrent_read {#setting-merge_tree_min_bytes_for_concurrent_read}
+
+Если число байтов, которые должны быть прочитаны из одного файла таблицы с движком [MergeTree*](../table_engines/mergetree.md) превышает `merge_tree_min_bytes_for_concurrent_read`, то ClickHouse пытается выполнить конкурентное чтение в несколько потоков из этого файла.
+
+Возможные значения:
+
+- Положительное целое число.
+
+Значение по умолчанию — 240 ✕ 1024 ✕ 1024.
 
 ## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek}
 
-Если расстояние между двумя блоками данных для чтения в одном файле меньше, чем `merge_tree_min_rows_for_seek` строк, то ClickHouse не перескакивает через блоки, а считывает данные последовательно.
+Если расстояние между двумя блоками данных для чтения в одном файле меньше, чем `merge_tree_min_rows_for_seek` строк, то ClickHouse не перескакивает (seek) через блоки, а считывает данные последовательно.
 
-**Возможные значения**
+Возможные значения:
 
-Любое положительное целое число.
+- Положительное целое число.
 
-**Значение по умолчанию**: 0.
+Значение по умолчанию — 0.
+
+## merge_tree_min_bytes_for_seek {#setting-merge_tree_min_bytes_for_seek}
+
+Если расстояние между двумя блоками данных для чтения в одном файле меньше, чем `merge_tree_min_bytes_for_seek` байтов, то ClickHouse не перескакивает (seek) через блоки, а считывает данные последовательно.
+
+Возможные значения:
+
+- Положительное целое число.
+
+Значение по умолчанию — 0.
 
 ## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity}
 
 При поиске данных ClickHouse проверяет засечки данных в файле индекса. Если ClickHouse обнаруживает, что требуемые ключи находятся в некотором диапазоне, он делит этот диапазон на `merge_tree_coarse_index_granularity` поддиапазонов и выполняет в них рекурсивный поиск нужных ключей.
 
-**Возможные значения**
+Возможные значения:
 
-Любое положительное целое число.
+- Положительное целое число.
 
-**Значение по умолчанию**: 8.
+Значение по умолчанию — 8.
 
 ## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache}
 
-Если требуется прочитать более, чем `merge_tree_max_rows_to_use_cache` строк в одном запросе, ClickHouse не используют кэш несжатых блоков. Настройка сервера [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков.
+Если требуется прочитать более, чем `merge_tree_max_rows_to_use_cache` строк в одном запросе, ClickHouse не используют кэш несжатых блоков.
 
-**Возможные значения**
+Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует этот кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от замусоривания запросами, для выполнения которых необходимо извлечь большое количество данных. Настройка сервера [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков.
 
-Любое положительное целое число.
+Возможные значения:
 
-**Значение по умолчанию**: 1048576.
+- Положительное целое число.
+
+Значение по умолчанию — 128 ✕ 8192.
+
+## merge_tree_max_bytes_to_use_cache {#setting-merge_tree_max_bytes_to_use_cache}
+
+Если требуется прочитать более, чем `merge_tree_max_bytes_to_use_cache` байтов в одном запросе, ClickHouse не используют кэш несжатых блоков.
+
+Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует этот кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от замусоривания запросами, для выполнения которых необходимо извлечь большое количество данных. Настройка сервера [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков.
+
+Возможные значения:
+
+- Положительное целое число.
+
+Значение по умолчанию — 1920 ✕ 1024 ✕ 1024.
 
 ## min_bytes_to_use_direct_io {#settings-min_bytes_to_use_direct_io}
 
diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md
index 931a969f076..1de59ad0449 100644
--- a/docs/ru/operations/table_engines/mergetree.md
+++ b/docs/ru/operations/table_engines/mergetree.md
@@ -78,10 +78,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 
 - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`:
 
-    - `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. По умолчанию — 8192. Список всех доступных параметров можно посмотреть в [MergeTreeSettings.h](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h).
-    - `min_merge_bytes_to_use_direct_io` — минимальный объем данных, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объем хранения всех данных, подлежащих слиянию. Если общий объем хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse  использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байт.
+    - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
+    - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
+    - `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. До версии 19.11, размер гранул ограничивался только настройкой `index_granularity`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
+    - `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если  `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../server_settings/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
+    - `min_merge_bytes_to_use_direct_io` — минимальный объем данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объем хранения всех данных, подлежащих слиянию. Если общий объем хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse  использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
     <a name="mergetree_setting-merge_with_ttl_timeout"></a>
-    - `merge_with_ttl_timeout` - Минимальное время в секундах для повторного выполнения слияний с TTL. По умолчанию - 86400 (1 день).
+    - `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
+    - `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных. По умолчанию — 1. Не отключайте её.
 
 **Пример задания секций**
 
@@ -126,7 +130,7 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)
 
 </details>
 
-## Хранение данных
+## Хранение данных {#mergetree-data-storage}
 
 Таблица состоит из *кусков* данных (data parts), отсортированных по первичному ключу.
 
@@ -134,9 +138,10 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)
 
 Данные, относящиеся к разным партициям, разбиваются на разные куски. В фоновом режиме ClickHouse выполняет слияния (merge) кусков данных для более эффективного хранения. Куски, относящиеся к разным партициям не объединяются. Механизм слияния не гарантирует, что все строки с одинаковым первичным ключом окажутся в одном куске.
 
-Для каждого куска данных ClickHouse создаёт индексный файл, который содержит значение первичного ключа для каждой индексной строки («засечка»). Номера строк индекса определяются как `n * index_granularity`. Максимальное значение `n` равно целой части деления общего числа строк на `index_granularity`. Для каждого столбца "засечки" также записываются для тех же строк индекса, что и первичный ключ. Эти "засечки" позволяют находить данные непосредственно в столбцах.
+Каждый кусок данных логически делится на гранулы. Гранула — это минимальный неделимый набор данных, который ClickHouse считывает при выборке данных. ClickHouse не разбивает строки и значения и гранула всегда содержит целое число строк. Первая строка гранулы помечается значением первичного ключа для этой строки (засечка). Для каждого куска данных ClickHouse создаёт файл с засечками (индексный файл). Для каждого столбца, независимо от того, входит он в первичный ключ или нет, ClickHouse также сохраняет эти же засечки. Засечки используются для поиска данных напрямую в файлах столбцов.
+
+Размер гранул оганичен настройками движка `index_granularity` и `index_granularity_bytes`. Количество строк в грануле лежит в диапазоне `[1, index_granularity]`, в зависимости от размера строк. Размер гранулы может превышать `index_granularity_bytes` в том случае, когда размер единственной строки в грануле превышает значение настройки. В этом случае, размер гранулы равен размеру строки.
 
-Вы можете использовать одну большую таблицу, постоянно добавляя в неё данные пачками, именно для этого предназначен движок `MergeTree`.
 
 ## Первичные ключи и индексы в запросах {#primary-keys-and-indexes-in-queries}
 
@@ -159,9 +164,9 @@ Marks numbers:   0      1      2      3      4      5      6      7      8
 
 Примеры выше показывают, что использование индекса всегда эффективнее, чем full scan.
 
-Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных. В большинстве случаев ClickHouse не теряет производительности при `index_granularity = 8192`.
+Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных.
 
-Разреженность индекса позволяет работать даже с очень большим количеством строк в таблицах, поскольку такой индекс всегда помещается в оперативную память компьютера.
+Разреженный индекс почти всегда помещаеся в оперативную память и поволяет работать с очень большим количеством строк в таблицах.
 
 ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом.
 

From 78f7826a0917f05bcfa89b46670bf374be277a94 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 6 Nov 2019 11:17:26 +0300
Subject: [PATCH 219/222] Update InterpreterSelectQuery.cpp

---
 dbms/src/Interpreters/InterpreterSelectQuery.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index 4aabce6f257..3803984daac 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -27,7 +27,6 @@
 #include <DataStreams/ReverseBlockInputStream.h>
 #include <DataStreams/FillingBlockInputStream.h>
 #include <DataStreams/SquashingBlockInputStream.h>
-#include <DataStreams/OneBlockInputStream.h>
 
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>

From 99210b24a9000248ab275c21b84b4ac5697741e7 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Wed, 6 Nov 2019 13:03:06 +0300
Subject: [PATCH 220/222] DOCS-4146: Updated the description of the DateTime
 data type. (#7429)

* Typo fix.

* elenbaskakova-DOCSUP-52 (#66)

The article "Time Zones" has been expanded.

* DOCAPI-4146: DateTime rewrited.

* DOCAPI-4146: Update after the review.
---
 docs/en/data_types/datetime.md                | 57 +++++++++++++++++--
 .../en/operations/server_settings/settings.md |  2 +-
 .../functions/date_time_functions.md          |  2 +-
 3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/docs/en/data_types/datetime.md b/docs/en/data_types/datetime.md
index ccaf44fa40d..967e184ba99 100644
--- a/docs/en/data_types/datetime.md
+++ b/docs/en/data_types/datetime.md
@@ -1,15 +1,60 @@
 # DateTime {#data_type-datetime}
 
-Date with time. Stored in four bytes as a Unix timestamp (unsigned). Allows storing values in the same range as for the Date type. The minimal value is output as 0000-00-00 00:00:00.
-The time is stored with accuracy up to one second (without leap seconds).
+Data structure storing Unix timestamp. Also, it can store a time zone.
 
-## Time Zones
+Syntax:
 
-The date with time is converted from text (divided into component parts) to binary and back, using the system's time zone at the time the client or server starts. In text format, information about daylight savings is lost.
+```sql
+DateTime([timezone])
+```
 
-By default, the client switches to the timezone of the server when it connects. You can change this behavior by enabling the client command-line option `--use_client_time_zone`.
+Range of values in the Unix timestamp: [1970-01-01 00:00:00, 2105-12-31 23:59:59].
 
-So when working with a textual date (for example, when saving text dumps), keep in mind that there may be ambiguity during changes for daylight savings time, and there may be problems matching data if the time zone changed.
+Resolution: 1 second.
 
+## Usage remarks
+
+ClickHouse stores date and time values in the Unix timestamp format that is independent of the time zones and daylight saving rules. The time zone value affects displaying `DateTime` values in text formats and parsing the input strings for storage. You can find the list of supported time zones in [IANA Time Zone Database](https://www.iana.org/time-zones).
+
+You can explicitly set a time zone for `DateTime`-type column when creating a table. If time zone isn't set, ClickHouse uses the value of the [timezone](../operations/server_settings/settings.md#server_settings-timezone) server configuration parameter or the operating system settings at the moment of the ClickHouse server start. 
+
+The [clickhouse-client](../interfaces/cli.md) applies the server time zone by default if a time zone isn't explicitly defined when initializing the data type. To use the client time zone, run it with the `--use_client_time_zone` parameter.
+
+ClickHouse outputs values in the `YYYY-MM-DD hh:mm:ss` text format by default. You can change the format with the [formatDateTime](../query_language/functions/date_time_functions.md#formatdatetime) function.
+
+When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the [date_time_input_format](../operations/settings/settings.md#settings-date_time_input_format) setting value.
+
+## Examples
+
+Creating a table with a `DateTime`-type column:
+
+```sql
+CREATE TABLE dt(
+    timestamp DateTime('Europe/Moscow')
+)
+```
+
+Getting a time zone for a `DateTime`-type value:
+
+```sql
+SELECT
+    toDateTime(now(), 'Europe/Moscow') AS column,
+    toTypeName(column) AS x
+```
+```text
+┌──────────────column─┬─x─────────────────────────┐
+│ 2019-10-16 04:12:04 │ DateTime('Europe/Moscow') │
+└─────────────────────┴───────────────────────────┘
+```
+
+## See Also
+
+- [Type Conversion Functions](../query_language/functions/type_conversion_functions.md)
+- [Functions for Working with Dates and Times](../query_language/functions/date_time_functions.md)
+- [Functions for Working with Arrays](../query_language/functions/array_functions.md)
+- [The `date_time_input_format` setting](../operations/settings/settings.md#settings-date_time_input_format)
+- [The `timezone` server configuration parameter](../operations/server_settings/settings.md#server_settings-timezone)
+- [Operator for Working with Dates and Times](../query_language/operators.md#operators-datetime)
+- [The `Date` data type](date.md)
 
 [Original article](https://clickhouse.yandex/docs/en/data_types/datetime/) <!--hide-->
diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md
index 9e0fbb3d1aa..97e6588fcb0 100644
--- a/docs/en/operations/server_settings/settings.md
+++ b/docs/en/operations/server_settings/settings.md
@@ -625,7 +625,7 @@ For the value of the `incl` attribute, see the section "[Configuration files](..
 
 - [skip_unavailable_shards](../settings/settings.md#settings-skip_unavailable_shards)
 
-## timezone
+## timezone {#server_settings-timezone}
 
 The server's time zone.
 
diff --git a/docs/en/query_language/functions/date_time_functions.md b/docs/en/query_language/functions/date_time_functions.md
index 971c4032c68..26c44a6a391 100644
--- a/docs/en/query_language/functions/date_time_functions.md
+++ b/docs/en/query_language/functions/date_time_functions.md
@@ -334,7 +334,7 @@ For a time interval starting at 'StartTime' and continuing for 'Duration' second
 For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`.
 This is necessary for searching for pageviews in the corresponding session.
 
-## formatDateTime(Time, Format\[, Timezone\])
+## formatDateTime(Time, Format\[, Timezone\]) {#formatdatetime}
 
 Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column.
 

From bfc611b1a2d0621dd47297cc3f99466e7b7a8fc0 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 6 Nov 2019 13:32:49 +0300
Subject: [PATCH 221/222] Changelog script: add rev-parse & fix category regex.

---
 utils/make_changelog.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/utils/make_changelog.py b/utils/make_changelog.py
index 76ba4ef0775..a47706767e3 100755
--- a/utils/make_changelog.py
+++ b/utils/make_changelog.py
@@ -64,6 +64,18 @@ def get_merge_base(first, second, project_root):
         logging.error('Cannot find merge base for %s and %s', first, second)
         raise
 
+def rev_parse(rev, project_root):
+    try:
+        command = "git rev-parse {}".format(rev)
+        text = subprocess.check_output(command, shell=True, cwd=project_root)
+        text = text.decode('utf-8', 'ignore')
+        sha = tuple(filter(len, text.split()))[0]
+        check_sha(sha)
+        return sha
+    except Exception:
+        logging.error('Cannot find revision %s', rev)
+        raise
+
 
 # Get list of commits from branch to base_sha. Update commits_info.
 def get_commits_from_branch(repo, branch, base_sha, commits_info, max_pages, token, max_retries, retry_timeout):
@@ -230,7 +242,7 @@ def parse_one_pull_request(item):
 
     if lines:
         for i in range(len(lines) - 1):
-            if re.match(r'(?i)category.*:$', lines[i]):
+            if re.match(r'(?i).*category.*:$', lines[i]):
                 cat_pos = i
             if re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]):
                 short_descr_pos = i
@@ -460,5 +472,7 @@ if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
 
     repo_folder = os.path.expanduser(repo_folder)
+    new_release_tag = rev_parse(new_release_tag, repo_folder)
+    prev_release_tag = rev_parse(prev_release_tag, repo_folder)
 
     make_changelog(new_release_tag, prev_release_tag, pull_requests, repo, repo_folder, state_file, token, max_retry, retry_timeout)

From 6ae3998fe7967c8794bd4ad3cfaa1ba8b65998a0 Mon Sep 17 00:00:00 2001
From: BayoNet <da-daos@yandex.ru>
Date: Wed, 6 Nov 2019 15:14:41 +0300
Subject: [PATCH 222/222] DOCS-163: Docs for the Interval data type and the
 INTERVAL operator. (#7452)

* Typo fix.

* Links fix.

* Fixed links in docs.

* More fixes.

* Link fixes.

* CLICKHOUSEDOCS-163: INTERVAL related articles

* CLICKHOUSEDOCS-163: build fixes.

* Update docs/en/data_types/special_data_types/interval.md

Co-Authored-By: Ivan Blinkov <github@blinkov.ru>

* CLICKHOUSEDOCS-163: Update after the review.
---
 .../data_types/special_data_types/interval.md | 74 ++++++++++++++++
 .../functions/type_conversion_functions.md    |  2 +-
 docs/en/query_language/operators.md           | 38 +++++++-
 .../data_types/special_data_types/interval.md |  1 +
 docs/toc_en.yml                               |  1 +
 docs/toc_fa.yml                               |  1 +
 docs/toc_zh.yml                               |  1 +
 .../data_types/special_data_types/interval.md |  1 +
 .../functions/type_conversion_functions.md    |  2 +-
 docs/zh/query_language/operators.md           | 86 +++++++++++++++++++
 10 files changed, 203 insertions(+), 4 deletions(-)
 create mode 100644 docs/en/data_types/special_data_types/interval.md
 create mode 120000 docs/fa/data_types/special_data_types/interval.md
 create mode 120000 docs/zh/data_types/special_data_types/interval.md

diff --git a/docs/en/data_types/special_data_types/interval.md b/docs/en/data_types/special_data_types/interval.md
new file mode 100644
index 00000000000..73a9ced5eb3
--- /dev/null
+++ b/docs/en/data_types/special_data_types/interval.md
@@ -0,0 +1,74 @@
+# Interval {#data-type-interval}
+
+The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../query_language/operators.md#operator-interval) operator.
+
+!!! warning "Warning"
+    You can't use the `Interval` data types for storing values in tables.
+
+Structure:
+
+- Time interval as unsigned integer value.
+- Type of an interval.
+
+Supported interval types:
+
+- `SECOND`
+- `MINUTE`
+- `HOUR`
+- `DAY`
+- `WEEK`
+- `MONTH`
+- `QUARTER`
+- `YEAR`
+
+For each interval type, there is the separated data type. For example, the `DAY` interval is expressed as the `IntervalDay` data type:
+
+```sql
+SELECT toTypeName(INTERVAL 4 DAY)
+```
+```text
+┌─toTypeName(toIntervalDay(4))─┐
+│ IntervalDay                  │
+└──────────────────────────────┘
+```
+
+## Usage Remarks {#data-type-interval-usage-remarks}
+
+You can use `Interval`-type values in arithmetical operations with [Date](../../data_types/date.md) and [DateTime](../../data_types/datetime.md)-type values. For example, you can add 4 days to the current time:
+
+```sql
+SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY
+```
+```text
+┌───current_date_time─┬─plus(now(), toIntervalDay(4))─┐
+│ 2019-10-23 10:58:45 │           2019-10-27 10:58:45 │
+└─────────────────────┴───────────────────────────────┘
+```
+
+Intervals of different types can't be combined. You can't use intervals like `4 DAY 1 HOUR`, express intervals in the units that smaller or equal the the smallest unit of the interval. For example, `1 day and an hour` interval can be expressed as `25 HOUR` or `90000 SECOND`.
+
+You can't perform arithmetical operations with the `Interval`-type values, but you can add intervals of different types consequently to some value. For example:
+
+```sql
+SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
+```
+```text
+┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
+│ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
+└─────────────────────┴────────────────────────────────────────────────────────┘
+```
+
+The following query causes the exception:
+
+```sql
+select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
+```
+```text
+Received exception from server (version 19.14.1):
+Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. 
+```
+
+## See Also
+
+- [INTERVAL](../../query_language/operators.md#operator-interval) operator
+- [toInterval](../../query_language/functions/type_conversion_functions.md#function-tointerval) type convertion functions
diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md
index f37130ecf33..7cca9e3fa1f 100644
--- a/docs/en/query_language/functions/type_conversion_functions.md
+++ b/docs/en/query_language/functions/type_conversion_functions.md
@@ -321,7 +321,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
 └─────────────────────────────────────────┘
 ```
 
-## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second)
+## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second) {#function-tointerval}
 
 Converts a Number type argument to a Interval type (duration).
 The interval type is actually very useful, you can use this type of data to perform arithmetic operations directly with Date or DateTime. At the same time, ClickHouse provides a more convenient syntax for declaring Interval type data. For example:
diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md
index 0a7a81550a2..b9780bb7d1d 100644
--- a/docs/en/query_language/operators.md
+++ b/docs/en/query_language/operators.md
@@ -65,7 +65,9 @@ Groups of operators are listed in order of priority (the higher it is in the lis
 
 `a GLOBAL NOT IN ...` – The `globalNotIn(a, b) function.`
 
-## Operator for Working With Dates and Times {#operators-datetime}
+## Operators for Working with Dates and Times {#operators-datetime}
+
+### EXTRACT {#operator-extract}
 
 ```sql
 EXTRACT(part FROM date);
@@ -120,7 +122,6 @@ SELECT
 FROM test.Orders;
 ```
 ```text
-
 ┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
 │      2008 │         10 │       11 │        13 │          23 │          44 │
 └───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
@@ -128,6 +129,39 @@ FROM test.Orders;
 
 You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00619_extract.sql).
 
+### INTERVAL {#operator-interval}
+
+Creates an [Interval](../data_types/special_data_types/interval.md)-type value that should be used in arithmetical operations with [Date](../data_types/date.md) and [DateTime](../data_types/datetime.md)-type values.
+
+Types of intervals:
+- `SECOND`
+- `MINUTE`
+- `HOUR`
+- `DAY`
+- `WEEK`
+- `MONTH`
+- `QUARTER`
+- `YEAR`
+
+!!! warning "Warning"
+    Intervals of different types can't be combined. You can't use the expressions like `INTERVAL 4 DAY 1 HOUR`. Express intervals in the units that smaller or equal the the smallest unit of the interval, for example `INTERVAL 25 HOUR`. Also you can use consequtive operations like in the example below.
+
+Example:
+
+```sql
+SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
+```
+```text
+┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
+│ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
+└─────────────────────┴────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+- [Interval](../data_types/special_data_types/interval.md) data type
+- [toInterval](functions/type_conversion_functions.md#function-tointerval) type convertion functions
+
 ## Logical Negation Operator
 
 `NOT a` The `not(a) function.`
diff --git a/docs/fa/data_types/special_data_types/interval.md b/docs/fa/data_types/special_data_types/interval.md
new file mode 120000
index 00000000000..6829f5ced00
--- /dev/null
+++ b/docs/fa/data_types/special_data_types/interval.md
@@ -0,0 +1 @@
+../../../en/data_types/special_data_types/interval.md
\ No newline at end of file
diff --git a/docs/toc_en.yml b/docs/toc_en.yml
index 2e0a7c5efa7..356a256e2d0 100644
--- a/docs/toc_en.yml
+++ b/docs/toc_en.yml
@@ -172,6 +172,7 @@ nav:
       - 'Expression': 'data_types/special_data_types/expression.md'
       - 'Set': 'data_types/special_data_types/set.md'
       - 'Nothing': 'data_types/special_data_types/nothing.md'
+      - 'Interval': 'data_types/special_data_types/interval.md'
     - 'Domains':
       - 'Overview': 'data_types/domains/overview.md'
       - 'IPv4': 'data_types/domains/ipv4.md'
diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml
index 8c8dea64147..6457c2da42e 100644
--- a/docs/toc_fa.yml
+++ b/docs/toc_fa.yml
@@ -57,6 +57,7 @@ nav:
     - 'Expression': 'data_types/special_data_types/expression.md'
     - 'Set': 'data_types/special_data_types/set.md'
     - 'Nothing': 'data_types/special_data_types/nothing.md'
+    - 'Interval': 'data_types/special_data_types/interval.md'    
   - 'Domains':
     - 'Overview': 'data_types/domains/overview.md'
     - 'IPv4': 'data_types/domains/ipv4.md'
diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml
index be300b6d7ad..0dc022af1f9 100644
--- a/docs/toc_zh.yml
+++ b/docs/toc_zh.yml
@@ -56,6 +56,7 @@ nav:
     - 'Expression': 'data_types/special_data_types/expression.md'
     - 'Set': 'data_types/special_data_types/set.md'
     - 'Nothing': 'data_types/special_data_types/nothing.md'
+    - 'Interval': 'data_types/special_data_types/interval.md'    
   - 'Domain类型':
     - '介绍': 'data_types/domains/overview.md'
     - 'IPv4': 'data_types/domains/ipv4.md'
diff --git a/docs/zh/data_types/special_data_types/interval.md b/docs/zh/data_types/special_data_types/interval.md
new file mode 120000
index 00000000000..6829f5ced00
--- /dev/null
+++ b/docs/zh/data_types/special_data_types/interval.md
@@ -0,0 +1 @@
+../../../en/data_types/special_data_types/interval.md
\ No newline at end of file
diff --git a/docs/zh/query_language/functions/type_conversion_functions.md b/docs/zh/query_language/functions/type_conversion_functions.md
index 70ccc191e19..3cabf07643b 100644
--- a/docs/zh/query_language/functions/type_conversion_functions.md
+++ b/docs/zh/query_language/functions/type_conversion_functions.md
@@ -145,7 +145,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
 └─────────────────────────────────────────┘
 ```
 
-## toIntervalYear, toIntervalQuarter, toIntervalMonth, toIntervalWeek, toIntervalDay, toIntervalHour, toIntervalMinute, toIntervalSecond
+## toIntervalYear, toIntervalQuarter, toIntervalMonth, toIntervalWeek, toIntervalDay, toIntervalHour, toIntervalMinute, toIntervalSecond {#function-tointerval}
 
 将数字类型参数转换为Interval类型（时间区间）。
 Interval类型实际上是非常有用的，您可以使用此类型的数据直接与Date或DateTime执行算术运算。同时，ClickHouse为Interval类型数据的声明提供了更方便的语法。例如：
diff --git a/docs/zh/query_language/operators.md b/docs/zh/query_language/operators.md
index 2e1dec00897..9dbddd681ed 100644
--- a/docs/zh/query_language/operators.md
+++ b/docs/zh/query_language/operators.md
@@ -82,6 +82,92 @@
 
 条件运算符会先计算表达式b和表达式c的值，再根据表达式a的真假，返回相应的值。如果表达式b和表达式c是 [arrayJoin()](functions/array_join.md#functions_arrayjoin) 函数，则不管表达式a是真是假，每行都会被复制展开。
 
+
+## Operators for Working with Dates and Times {#operators-datetime}
+
+### EXTRACT {#operator-extract}
+
+```sql
+EXTRACT(part FROM date);
+```
+
+Extracts a part from a given date. For example, you can retrieve a month from a given date, or a second from a time.
+
+The `part` parameter specifies which part of the date to retrieve. The following values are available:
+
+- `DAY` — The day of the month. Possible values: 1–31.
+- `MONTH` — The number of a month. Possible values: 1–12.
+- `YEAR` — The year.
+- `SECOND` — The second. Possible values: 0–59.
+- `MINUTE` — The minute. Possible values: 0–59.
+- `HOUR` — The hour. Possible values: 0–23.
+
+The `part` parameter is case-insensitive.
+
+The `date` parameter specifies the date or the time to process. Either [Date](../data_types/date.md) or [DateTime](../data_types/datetime.md) type is supported.
+
+Examples:
+
+```sql
+SELECT EXTRACT(DAY FROM toDate('2017-06-15'));
+SELECT EXTRACT(MONTH FROM toDate('2017-06-15'));
+SELECT EXTRACT(YEAR FROM toDate('2017-06-15'));
+```
+
+In the following example we create a table and insert into it a value with the `DateTime` type.
+
+```sql
+CREATE TABLE test.Orders
+(
+    OrderId UInt64,
+    OrderName String,
+    OrderDate DateTime
+)
+ENGINE = Log;
+```
+
+```sql
+INSERT INTO test.Orders VALUES (1, 'Jarlsberg Cheese', toDateTime('2008-10-11 13:23:44'));
+```
+```sql
+SELECT
+    toYear(OrderDate) AS OrderYear,
+    toMonth(OrderDate) AS OrderMonth,
+    toDayOfMonth(OrderDate) AS OrderDay,
+    toHour(OrderDate) AS OrderHour,
+    toMinute(OrderDate) AS OrderMinute,
+    toSecond(OrderDate) AS OrderSecond
+FROM test.Orders;
+```
+```text
+┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
+│      2008 │         10 │       11 │        13 │          23 │          44 │
+└───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
+```
+
+You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00619_extract.sql).
+
+### INTERVAL {#operator-interval}
+
+Creates an [Interval](../data_types/special_data_types/interval.md)-type value that should be used in arithmetical operations with [Date](../data_types/date.md) and [DateTime](../data_types/datetime.md)-type values.
+
+Example:
+
+```sql
+SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
+```
+```text
+┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
+│ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
+└─────────────────────┴────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+- [Interval](../data_types/special_data_types/interval.md) data type
+- [toInterval](functions/type_conversion_functions.md#function-tointerval) type convertion functions
+
+
 ## CASE条件表达式 {#operator_case}
 
 ``` sql