Merge branch 'master' of github.com:ClickHouse/ClickHouse into sentry

2024-11-10 01:25:21 +00:00 · 2020-06-09 11:51:29 +03:00 · 2020-06-09 11:51:29 +03:00 · 5b3fef526e
commit 5b3fef526e
parent 653a9afda0 fa7e0e0172
199 changed files with 5677 additions and 3992 deletions
--- a/base/common/CMakeLists.txt
+++ b/base/common/CMakeLists.txt
@ -16,6 +16,7 @@ set (SRCS
    shift10.cpp
    sleep.cpp
    terminalColors.cpp
+    errnoToString.cpp
 )

 if (ENABLE_REPLXX)
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@ -1,9 +1,11 @@
 #include <common/ReplxxLineReader.h>
+#include <common/errnoToString.h>

 #include <errno.h>
 #include <string.h>
 #include <unistd.h>
 #include <functional>
+#include <sys/file.h>

 namespace
 {
@ -17,14 +19,41 @@ void trim(String & s)
 }

 ReplxxLineReader::ReplxxLineReader(
-    const Suggest & suggest, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
-    : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_))
+    const Suggest & suggest,
+    const String & history_file_path_,
+    bool multiline_,
+    Patterns extenders_,
+    Patterns delimiters_,
+    replxx::Replxx::highlighter_callback_t highlighter_)
+    : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
 {
    using namespace std::placeholders;
    using Replxx = replxx::Replxx;

    if (!history_file_path.empty())
-        rx.history_load(history_file_path);
+    {
+        history_file_fd = open(history_file_path.c_str(), O_RDWR);
+        if (history_file_fd < 0)
+        {
+            rx.print("Open of history file failed: %s\n", errnoToString(errno).c_str());
+        }
+        else
+        {
+            if (flock(history_file_fd, LOCK_SH))
+            {
+                rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str());
+            }
+            else
+            {
+                rx.history_load(history_file_path);
+
+                if (flock(history_file_fd, LOCK_UN))
+                {
+                    rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
+                }
+            }
+        }
+    }

    auto callback = [&suggest] (const String & context, size_t context_size)
    {
@ -36,6 +65,9 @@ ReplxxLineReader::ReplxxLineReader(
    rx.set_complete_on_empty(false);
    rx.set_word_break_characters(word_break_characters);

+    if (highlighter)
+        rx.set_highlighter_callback(highlighter);
+
    /// By default C-p/C-n binded to COMPLETE_NEXT/COMPLETE_PREV,
    /// bind C-p/C-n to history-previous/history-next like readline.
    rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
@ -49,8 +81,8 @@ ReplxxLineReader::ReplxxLineReader(

 ReplxxLineReader::~ReplxxLineReader()
 {
-    if (!history_file_path.empty())
-        rx.history_save(history_file_path);
+    if (close(history_file_fd))
+        rx.print("Close of history file failed: %s\n", strerror(errno));
 }

 LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@ -68,7 +100,20 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)

 void ReplxxLineReader::addToHistory(const String & line)
 {
+    // locking history file to prevent from inconsistent concurrent changes
+    bool locked = false;
+    if (flock(history_file_fd, LOCK_EX))
+        rx.print("Lock of history file failed: %s\n", strerror(errno));
+    else
+        locked = true;
+
    rx.history_add(line);
+
+    // flush changes to the disk
+    rx.history_save(history_file_path);
+
+    if (locked && 0 != flock(history_file_fd, LOCK_UN))
+        rx.print("Unlock of history file failed: %s\n", strerror(errno));
 }

 void ReplxxLineReader::enableBracketedPaste()
--- a/base/common/ReplxxLineReader.h
+++ b/base/common/ReplxxLineReader.h
@ -4,10 +4,17 @@

 #include <replxx.hxx>

+
 class ReplxxLineReader : public LineReader
 {
 public:
-    ReplxxLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_);
+    ReplxxLineReader(
+        const Suggest & suggest,
+        const String & history_file_path,
+        bool multiline,
+        Patterns extenders_,
+        Patterns delimiters_,
+        replxx::Replxx::highlighter_callback_t highlighter_);
    ~ReplxxLineReader() override;

    void enableBracketedPaste() override;
@ -17,4 +24,8 @@ private:
    void addToHistory(const String & line) override;

    replxx::Replxx rx;
+    replxx::Replxx::highlighter_callback_t highlighter;
+
+    // used to call flock() to synchronize multiple clients using same history file
+    int history_file_fd = -1;
 };
--- a/base/common/errnoToString.cpp
+++ b/base/common/errnoToString.cpp
@ -0,0 +1,29 @@
+#include "errnoToString.h"
+
+#include <fmt/format.h>
+
+
+std::string errnoToString(int code, int the_errno)
+{
+    const size_t buf_size = 128;
+    char buf[buf_size];
+#ifndef _GNU_SOURCE
+    int rc = strerror_r(the_errno, buf, buf_size);
+#ifdef __APPLE__
+    if (rc != 0 && rc != EINVAL)
+#else
+    if (rc != 0)
+#endif
+    {
+        std::string tmp = std::to_string(code);
+        const char * code_str = tmp.c_str();
+        const char * unknown_message = "Unknown error ";
+        strcpy(buf, unknown_message);
+        strcpy(buf + strlen(unknown_message), code_str);
+    }
+    return fmt::format("errno: {}, strerror: {}", the_errno, buf);
+#else
+    (void)code;
+    return fmt::format("errno: {}, strerror: {}", the_errno, strerror_r(the_errno, buf, sizeof(buf)));
+#endif
+}
--- a/base/common/errnoToString.h
+++ b/base/common/errnoToString.h
@ -0,0 +1,6 @@
+#pragma once
+
+#include <cerrno>
+#include <string>
+
+std::string errnoToString(int code, int the_errno = errno);
--- a/base/common/strong_typedef.h
+++ b/base/common/strong_typedef.h
@ -1,6 +1,8 @@
 #pragma once

+#include <functional>
 #include <type_traits>
+#include <utility>

 template <class T, class Tag>
 struct StrongTypedef
--- a/base/common/ya.make
+++ b/base/common/ya.make
@ -47,6 +47,7 @@ SRCS(
    shift10.cpp
    sleep.cpp
    terminalColors.cpp
+    errnoToString.cpp
 )

 END()
--- a/cmake/find/hyperscan.cmake
+++ b/cmake/find/hyperscan.cmake
@ -14,10 +14,12 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
   set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
 endif ()

-if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
-    find_library (HYPERSCAN_LIBRARY hs)
-    find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
-endif ()
+# We cannot use OS hyperscan library due to different include path.
+#
+#if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
+#    find_library (HYPERSCAN_LIBRARY hs)
+#    find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
+#endif ()

 if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
    set (USE_HYPERSCAN 1)
--- a/contrib/replxx
+++ b/contrib/replxx
@ -1 +1 @@
-Subproject commit f1332626639d6492eaf170758642da14fbbda7bf
+Subproject commit 2d37daaad24be71e76514a36b0a47120be2f9086
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@ -18,7 +18,7 @@ ccache --zero-stats ||:
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
 rm -f CMakeCache.txt
 cmake .. -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS
-ninja
+ninja clickhouse-bundle
 mv ./programs/clickhouse* /output
 mv ./src/unit_tests_dbms /output
 find . -name '*.so' -print -exec mv '{}' /output \;
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -82,8 +82,8 @@ RUN apt-get --allow-unauthenticated update -y \
            libcctz-dev \
            libldap2-dev \
            libsasl2-dev \
-            heimdal-multidev
-
+            heimdal-multidev \
+            libhyperscan-dev


 # This symlink required by gcc to find lld compiler
--- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
@ -19,6 +19,5 @@
        <collect_interval_milliseconds>1000</collect_interval_milliseconds>
    </metric_log>

-    <use_uncompressed_cache>0</use_uncompressed_cache>
    <uncompressed_cache_size>1000000000</uncompressed_cache_size>
 </yandex>
--- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
+++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
@ -5,6 +5,7 @@
            <query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
            <allow_introspection_functions>1</allow_introspection_functions>
            <log_queries>1</log_queries>
+            <metrics_perf_events_enabled>1</metrics_perf_events_enabled>
        </default>
    </profiles>
 </yandex>
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -131,5 +131,5 @@ done

 dmesg -T > dmesg.log

-7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
+7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
 cp compare.log /output
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -100,11 +100,20 @@ for c in connections:

 report_stage_end('drop1')

-# Apply settings
+# Apply settings.
+# If there are errors, report them and continue -- maybe a new test uses a setting
+# that is not in master, but the queries can still run. If we have multiple
+# settings and one of them throws an exception, all previous settings for this
+# connection will be reset, because the driver reconnects on error (not
+# configurable). So the end result is uncertain, but hopefully we'll be able to
+# run at least some queries.
 settings = root.findall('settings/*')
 for c in connections:
    for s in settings:
-        c.execute("set {} = '{}'".format(s.tag, s.text))
+        try:
+            c.execute("set {} = '{}'".format(s.tag, s.text))
+        except:
+            print(traceback.format_exc(), file=sys.stderr)

 report_stage_end('settings')

--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@ -60,7 +60,7 @@ Engines in the family:
 -   [Distributed](special/distributed.md#distributed)
 -   [MaterializedView](special/materializedview.md#materializedview)
 -   [Dictionary](special/dictionary.md#dictionary)
-   [Merge](special/merge.md#merge
+-   [Merge](special/merge.md#merge)
 -   [File](special/file.md#file)
 -   [Null](special/null.md#null)
 -   [Set](special/set.md#set)
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -41,8 +41,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
    INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
 ) ENGINE = MergeTree()
+ORDER BY expr
 [PARTITION BY expr]
-[ORDER BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
 [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
@ -58,23 +58,27 @@ For a description of parameters, see the [CREATE query description](../../../sql

 -   `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.

-   `PARTITION BY` — The [partitioning key](custom-partitioning-key.md).
+-   `ORDER BY` — The sorting key.
+
+    A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
+
+    ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause. 
+    
+    Use the `ORDER BY tuple()` syntax, if you don't need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
+
+-   `PARTITION BY` — The [partitioning key](custom-partitioning-key.md). Optional.

    For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.

-   `ORDER BY` — The sorting key.
-
-    A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
-
-   `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key).
+-   `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.

    By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.

-   `SAMPLE BY` — An expression for sampling.
+-   `SAMPLE BY` — An expression for sampling. Optional.

    If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.

-   `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes).
+-   `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.

    Expression must have one `Date` or `DateTime` column as a result. Example:
    `TTL date + INTERVAL 1 DAY`
@ -83,7 +87,7 @@ For a description of parameters, see the [CREATE query description](../../../sql

    For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)

-   `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
+-   `SETTINGS` — Additional parameters that control the behavior of the `MergeTree` (optional):

    -   `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
    -   `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
@ -198,6 +202,10 @@ The number of columns in the primary key is not explicitly limited. Depending on

 A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.

+You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
+    
+To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
+
 ### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}

 It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
--- a/docs/en/sql-reference/functions/comparison-functions.md
+++ b/docs/en/sql-reference/functions/comparison-functions.md
@ -22,7 +22,7 @@ Strings are compared by bytes. A shorter string is smaller than all strings that

 ## equals, a = b and a == b operator {#function-equals}

-## notEquals, a ! operator= b and a \<\> b {#function-notequals}
+## notEquals, a != b and a \<\> b operator {#function-notequals}

 ## less, \< operator {#function-less}

--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@ -5,10 +5,13 @@ toc_title: SYSTEM

 # SYSTEM Queries {#query-language-system}

+-   [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) 
 -   [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
 -   [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
 -   [DROP DNS CACHE](#query_language-system-drop-dns-cache)
 -   [DROP MARK CACHE](#query_language-system-drop-mark-cache)
+-   [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) 
+-   [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
 -   [FLUSH LOGS](#query_language-system-flush_logs)
 -   [RELOAD CONFIG](#query_language-system-reload-config)
 -   [SHUTDOWN](#query_language-system-shutdown)
@ -18,7 +21,25 @@ toc_title: SYSTEM
 -   [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
 -   [STOP MERGES](#query_language-system-stop-merges)
 -   [START MERGES](#query_language-system-start-merges)
+-   [STOP TTL MERGES](#query_language-stop-ttl-merges) 
+-   [START TTL MERGES](#query_language-start-ttl-merges) 
+-   [STOP MOVES](#query_language-stop-moves) 
+-   [START MOVES](#query_language-start-moves) 
+-   [STOP FETCHES](#query_language-system-stop-fetches)
+-   [START FETCHES](#query_language-system-start-fetches)
+-   [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
+-   [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
+-   [SYNC REPLICA](#query_language-system-sync-replica)
+-   [RESTART REPLICA](#query_language-system-restart-replica)
+-   [RESTART REPLICAS](#query_language-system-restart-replicas) 

+## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} 
+Reload all [Internal dictionaries](../dictionaries/internal-dicts.md).
+By default, internal dictionaries are disabled. 
+Always returns `Ok.` regardless of the result of the internal dictionary update.
+   
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}

 Reloads all dictionaries that have been successfully loaded before.
@ -45,6 +66,16 @@ For more convenient (automatic) cache management, see disable\_internal\_dns\_ca

 Resets the mark cache. Used in development of ClickHouse and performance tests.

+## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
+
+Reset the uncompressed data cache. Used in development of ClickHouse and performance tests.
+For manage uncompressed data cache parameters use following server level settings [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) and query/user/profile level settings [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache)
+
+
+## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
+Reset the compiled expression cache. Used in development of ClickHouse and performance tests.
+Complied expression cache used when query/user/profile enable option [compile](../../operations/settings/settings.md#compile)
+
 ## FLUSH LOGS {#query_language-system-flush_logs}

 Flushes buffers of log messages to system tables (e.g. system.query\_log). Allows you to not wait 7.5 seconds when debugging.
@ -89,6 +120,10 @@ Enables background data distribution when inserting data into distributed tables
 SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```

+## Managing MergeTree Tables {#query-language-system-mergetree}
+
+ClickHouse can manage background processes in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
+
 ### STOP MERGES {#query_language-system-stop-merges}

 Provides possibility to stop background merges for tables in the MergeTree family:
@ -108,4 +143,110 @@ Provides possibility to start background merges for tables in the MergeTree fami
 SYSTEM START MERGES [[db.]merge_tree_family_table_name]
 ```

+### STOP TTL MERGES {#query_language-stop-ttl-merges}
+
+Provides possibility to stop background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists or table have not MergeTree engine. Return error when database doesn't exists:
+
+``` sql
+SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### START TTL MERGES {#query_language-start-ttl-merges} 
+
+Provides possibility to start background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists. Return error when database doesn't exists:
+
+``` sql
+SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### STOP MOVES {#query_language-stop-moves} 
+
+Provides possibility to stop background move data according to [TTL table expression with TO VOLUME or TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists. Return error when database doesn't exists:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+### START MOVES {#query_language-start-moves} 
+
+Provides possibility to start background move data according to [TTL table expression with TO VOLUME and TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists. Return error when database doesn't exists:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
+
+ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables.
+
+### STOP FETCHES {#query_language-system-stop-fetches}
+Provides possibility to stop background fetches for inserted parts for tables in the `ReplicatedMergeTree` family:
+Always returns `Ok.` regardless of the table engine and even table or database doesn't exists.
+
+``` sql
+SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START FETCHES {#query_language-system-start-fetches}
+Provides possibility to start background fetches for inserted parts for tables in the `ReplicatedMergeTree` family:
+Always returns `Ok.` regardless of the table engine and even table or database doesn't exists.
+
+``` sql
+SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Provides possibility to stop background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:
+
+``` sql
+SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Provides possibility to start background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:
+
+``` sql
+SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
+Provides possibility to stop background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: 
+
+``` sql
+SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
+Provides possibility to start background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: 
+
+``` sql
+SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### SYNC REPLICA {#query_language-system-sync-replica}
+Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster. Will run until `receive_timeout` if fetches currently disabled for the table.
+
+``` sql
+SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICA {#query_language-system-restart-replica}
+Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed  
+Initialization replication quene based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations.
+
+``` sql
+SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICAS {#query_language-system-restart-replicas}
+Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed  
+
+``` sql
+SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name
+```
+
 [Original article](https://clickhouse.tech/docs/en/query_language/system/) <!--hide-->
--- a/docs/ru/sql-reference/statements/system.md
+++ b/docs/ru/sql-reference/statements/system.md
@ -1,9 +1,12 @@
 # Запросы SYSTEM {#query-language-system}

+-   [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) 
 -   [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
 -   [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
 -   [DROP DNS CACHE](#query_language-system-drop-dns-cache)
 -   [DROP MARK CACHE](#query_language-system-drop-mark-cache)
+-   [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) 
+-   [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
 -   [FLUSH LOGS](#query_language-system-flush_logs)
 -   [RELOAD CONFIG](#query_language-system-reload-config)
 -   [SHUTDOWN](#query_language-system-shutdown)
@ -13,7 +16,25 @@
 -   [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
 -   [STOP MERGES](#query_language-system-stop-merges)
 -   [START MERGES](#query_language-system-start-merges)
+-   [STOP TTL MERGES](#query_language-stop-ttl-merges) 
+-   [START TTL MERGES](#query_language-start-ttl-merges) 
+-   [STOP MOVES](#query_language-stop-moves) 
+-   [START MOVES](#query_language-start-moves) 
+-   [STOP FETCHES](#query_language-system-stop-fetches)
+-   [START FETCHES](#query_language-system-start-fetches)
+-   [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
+-   [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
+-   [SYNC REPLICA](#query_language-system-sync-replica)
+-   [RESTART REPLICA](#query_language-system-restart-replica)
+-   [RESTART REPLICAS](#query_language-system-restart-replicas) 

+## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} 
+Перегружет все [Встроенные словари](../dictionaries/internal-dicts.md).
+По умолчанию встроенные словари выключены. 
+Всегда возвращает `Ok.`, вне зависимости от результата обновления встроенных словарей.
+   
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}

 Перегружает все словари, которые были успешно загружены до этого.
@ -40,6 +61,16 @@ SELECT name, status FROM system.dictionaries;

 Сбрасывает кеш «засечек» (`mark cache`). Используется при разработке ClickHouse и тестах производительности.

+## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
+
+Сбрасывает кеш не сжатых данных. Используется при разработке ClickHouse и тестах производительности.
+Для управления кешем не сжатых данных используйте следующие настройки уровня сервера [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) и настройки уровня запрос/пользователь/профиль [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache)
+
+
+## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
+Сбрасывает кеш скомпилированных выражений. Используется при разработке ClickHouse и тестах производительности.
+Компилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile](../../operations/settings/settings.md#compile)
+
 ## FLUSH LOGS {#query_language-system-flush_logs}

 Записывает буферы логов в системные таблицы (например system.query\_log). Позволяет не ждать 7.5 секунд при отладке.
@ -84,6 +115,10 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
 SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```

+## Managing MergeTree Tables {#query-language-system-mergetree}
+
+ClickHouse может управлять фоновыми процессами в [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) таблицах.
+
 ### STOP MERGES {#query_language-system-stop-merges}

 Позволяет остановить фоновые мержи для таблиц семейства MergeTree:
@ -103,4 +138,110 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 SYSTEM START MERGES [[db.]merge_tree_family_table_name]
 ```

+### STOP TTL MERGES {#query_language-stop-ttl-merges}
+
+Позволяет остановить фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### START TTL MERGES {#query_language-start-ttl-merges} 
+
+Запускает фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### STOP MOVES {#query_language-stop-moves} 
+
+Позволяет остановить фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+### START MOVES {#query_language-start-moves} 
+
+Запускает фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
+
+ClickHouse может управлять фоновыми процессами связанными c репликацией в таблицах семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md).
+
+### STOP FETCHES {#query_language-system-stop-fetches}
+Позволяет остановить фоновые процессы синхронизации новыми вставленными кусками данных с другими репликами в кластере для таблиц семейства `ReplicatedMergeTree`:
+Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.
+
+``` sql
+SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START FETCHES {#query_language-system-start-fetches}
+Позволяет запустить фоновые процессы синхронизации новыми вставленными кусками данных с другими репликами в кластере для таблиц семейства `ReplicatedMergeTree`:
+Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.
+
+``` sql
+SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Позволяет остановить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:
+
+``` sql
+SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Позволяет запустить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:
+
+``` sql
+SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
+Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: 
+
+``` sql
+SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
+Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: 
+
+``` sql
+SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### SYNC REPLICA {#query_language-system-sync-replica}
+Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:  
+
+``` sql
+SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICA {#query_language-system-restart-replica}
+Реинициализация состояния Zookeeper сессий для таблицы семейства `ReplicatedMergeTree`, сравнивает текущее состояние с тем что хранится в Zookeeper как источник правды и добавляет задачи Zookeeper очередь если необходимо  
+Инициализация очереди репликации на основе данных ZooKeeper, происходит так же как при attach table. На короткое время таблица станет недоступной для любых операций.
+
+``` sql
+SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICAS {#query_language-system-restart-replicas}
+Реинициализация состояния Zookeeper сессий для всех `ReplicatedMergeTree` таблиц, сравнивает текущее состояние с тем что хранится в Zookeeper как источник правды и добавляет задачи Zookeeper очередь если необходимо
+
+``` sql
+SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/system/) <!--hide-->
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@ -2,7 +2,7 @@ Babel==2.8.0
 backports-abc==0.5
 backports.functools-lru-cache==1.6.1
 beautifulsoup4==4.9.1
-certifi==2020.4.5.1
+certifi==2020.4.5.2
 chardet==3.0.4
 click==7.1.2
 closure==20191111
@ -13,7 +13,7 @@ idna==2.9
 Jinja2==2.11.2
 jinja2-highlight==0.6.1
 jsmin==2.2.2
-livereload==2.6.1
+livereload==2.6.2
 Markdown==3.2.1
 MarkupSafe==1.1.1
 mkdocs==1.1.2
--- a/docs/tools/translate/requirements.txt
+++ b/docs/tools/translate/requirements.txt
@ -1,5 +1,5 @@
 Babel==2.8.0
-certifi==2020.4.5.1
+certifi==2020.4.5.2
 chardet==3.0.4
 googletrans==2.4.0
 idna==2.9
--- a/docs/zh/sql-reference/data-types/lowcardinality.md
+++ b/docs/zh/sql-reference/data-types/lowcardinality.md
@ -0,0 +1,59 @@
+---
+toc_priority: 51
+toc_title: 低基数类型
+---
+
+# 低基数类型 {#lowcardinality-data-type}
+
+把其它数据类型转变为字典编码类型。
+
+## 语法 {#lowcardinality-syntax}
+
+```sql
+LowCardinality(data_type)
+```
+
+**参数**
+
+- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md)，包括数字类型，但是[Decimal](decimal.md)除外。对一些数据类型来说，`LowCardinality` 并不高效，详查[allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)设置描述。
+
+## 描述 {#lowcardinality-dscr}
+
+`LowCardinality` 是一种改变数据存储和数据处理方法的概念。 ClickHouse会把 `LowCardinality` 所在的列进行[dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder)。对很多应用来说，处理字典编码的数据可以显著的增加[SELECT](../statements/select/index.md)查询速度。
+
+使用 `LowCarditality` 数据类型的效率依赖于数据的多样性。如果一个字典包含少于10000个不同的值，那么ClickHouse可以进行更高效的数据存储和处理。反之如果字典多于10000，效率会表现的更差。
+
+当使用字符类型的时候，可以考虑使用 `LowCardinality` 代替[Enum](enum.md)。 `LowCardinality` 通常更加灵活和高效。
+
+## 例子
+
+创建一个 `LowCardinality` 类型的列：
+
+```sql
+CREATE TABLE lc_t
+(
+    `id` UInt16, 
+    `strings` LowCardinality(String)
+)
+ENGINE = MergeTree()
+ORDER BY id
+```
+
+## 相关的设置和函数
+
+设置:
+
+- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size)
+- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
+- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
+- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
+
+函数:
+
+- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality)
+
+## 参考
+
+- [高效低基数类型](https://www.altinity.com/blog/2019/3/27/low-cardinality).
+- [使用低基数类型减少ClickHouse的存储成本 – 来自Instana工程师的分享](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
+- [字符优化 (俄语视频分享)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [英语分享](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
--- a/docs/zh/sql-reference/functions/arithmetic-functions.md
+++ b/docs/zh/sql-reference/functions/arithmetic-functions.md
@ -1,3 +1,8 @@
+---
+toc_priority: 35
+toc_title: 算术函数
+---
+
 # 算术函数 {#suan-zhu-han-shu}

 对于所有算术函数，结果类型为结果适合的最小数字类型（如果存在这样的类型）。最小数字类型是根据数字的位数，是否有符号以及是否是浮点类型而同时进行的。如果没有足够的位，则采用最高位类型。
--- a/docs/zh/sql-reference/functions/comparison-functions.md
+++ b/docs/zh/sql-reference/functions/comparison-functions.md
@ -1,3 +1,8 @@
+---
+toc_priority: 36
+toc_title: 比较函数
+---
+
 # 比较函数 {#bi-jiao-han-shu}

 比较函数始终返回0或1（UInt8）。
@ -15,18 +20,16 @@

 字符串按字节进行比较。较短的字符串小于以其开头并且至少包含一个字符的所有字符串。

-注意。直到1.1.54134版本，有符号和无符号数字的比较方式与C++相同。换句话说，在SELECT 9223372036854775807 ＆gt; -1 等情况下，您可能会得到错误的结果。 此行为在版本1.1.54134中已更改，现在在数学上是正确的。
+## 等于，a=b和a==b 运算符 {#equals-a-b-and-a-b-operator}

-## 等于，a=b和a==b运算符 {#equals-a-b-and-a-b-operator}
+## 不等于，a!=b和a<>b 运算符 {#notequals-a-operator-b-and-a-b}

-## notEquals,a! 运算符=b和a `<>` b {#notequals-a-operator-b-and-a-b}
+## 少, < 运算符 {#less-operator}

-## 少, `< operator` {#less-operator}
+## 大于, > 运算符 {#greater-operator}

-## 更大, `> operator` {#greater-operator}
+## 小于等于, <= 运算符 {#lessorequals-operator}

-## 出租等级, `<= operator` {#lessorequals-operator}
-
-## 伟大的等级, `>= operator` {#greaterorequals-operator}
+## 大于等于, >= 运算符 {#greaterorequals-operator}

 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/comparison_functions/) <!--hide-->
--- a/docs/zh/sql-reference/functions/index.md
+++ b/docs/zh/sql-reference/functions/index.md
@ -1,3 +1,9 @@
+---
+toc_folder_title: 函数
+toc_priority: 32
+toc_title: 简介
+---
+
 # 函数 {#han-shu}

 ClickHouse中至少存在两种类型的函数 - 常规函数（它们称之为«函数»）和聚合函数。 常规函数的工作就像分别为每一行执行一次函数计算一样（对于每一行，函数的结果不依赖于其他行）。 聚合函数则从各行累积一组值（即函数的结果以来整个结果集）。
--- a/docs/zh/sql-reference/functions/logical-functions.md
+++ b/docs/zh/sql-reference/functions/logical-functions.md
@ -1,15 +1,20 @@
+---
+toc_priority: 37
+toc_title: 逻辑函数
+---
+
 # 逻辑函数 {#luo-ji-han-shu}

 逻辑函数可以接受任何数字类型的参数，并返回UInt8类型的0或1。

 当向函数传递零时，函数将判定为«false»，否则，任何其他非零的值都将被判定为«true»。

-## 和，和运营商 {#and-and-operator}
+## 和，`AND` 运算符 {#and-and-operator}

-## 或，或运营商 {#or-or-operator}
+## 或，`OR` 运算符 {#or-or-operator}

-## 不是，不是运营商 {#not-not-operator}
+## 非，`NOT` 运算符 {#not-not-operator}

-## 异或 {#xor}
+## 异或，`XOR` 运算符 {#xor}

 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/logical_functions/) <!--hide-->
--- a/docs/zh/sql-reference/functions/type-conversion-functions.md
+++ b/docs/zh/sql-reference/functions/type-conversion-functions.md
@ -1,16 +1,230 @@
+---
+toc_priority: 38
+toc_title: 类型转换函数
+---
+
 # 类型转换函数 {#lei-xing-zhuan-huan-han-shu}

-## toUInt8,toUInt16,toUInt32,toUInt64 {#touint8-touint16-touint32-touint64}
+## 数值类型转换常见的问题 {#numeric-conversion-issues}

-## toInt8,toInt16,toInt32,toInt64 {#toint8-toint16-toint32-toint64}
+当你把一个值从一个类型转换为另外一个类型的时候，你需要注意的是这是一个不安全的操作，可能导致数据的丢失。数据丢失一般发生在你将一个大的数据类型转换为小的数据类型的时候，或者你把两个不同的数据类型相互转换的时候。

-## toFloat32,toFloat64 {#tofloat32-tofloat64}
+ClickHouse和[C++](https://en.cppreference.com/w/cpp/language/implicit_conversion)有相同的类型转换行为。

-## 今天，今天 {#todate-todatetime}
+## toInt(8\|16\|32\|64) {#touint8-touint16-touint32-touint64}

-## toUInt8OrZero,toUInt16OrZero,toUInt32OrZero,toUInt64OrZero,toInt8OrZero,toInt16OrZero,toInt32OrZero,toInt64OrZero,toFloat32OrZero,toFloat64OrZero,toDateOrZero,toDateTimeOrZero {#touint8orzero-touint16orzero-touint32orzero-touint64orzero-toint8orzero-toint16orzero-toint32orzero-toint64orzero-tofloat32orzero-tofloat64orzero-todateorzero-todatetimeorzero}
+转换一个输入值为[Int](../../sql-reference/data-types/int-uint.md)类型。这个函数包括：
+
+-   `toInt8(expr)` — 结果为`Int8`数据类型。
+-   `toInt16(expr)` — 结果为`Int16`数据类型。
+-   `toInt32(expr)` — 结果为`Int32`数据类型。
+-   `toInt64(expr)` — 结果为`Int64`数据类型。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个数字或者代表数值类型的字符串。不支持二进制、八进制、十六进制的数字形式，有效数字之前的0也会被忽略。
+
+**返回值**
+
+整形在`Int8`, `Int16`, `Int32`，或者 `Int64` 的数据类型。
+
+函数使用[rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero)原则，这意味着会截断丢弃小数部分的数值。
+
+[NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf)转换是不确定的。具体使用的时候，请参考[数值类型转换常见的问题](#numeric-conversion-issues)。
+
+**例子**
+
+``` sql
+SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
+```
+
+``` text
+┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
+│ -9223372036854775808 │          32 │            16 │           8 │
+└──────────────────────┴─────────────┴───────────────┴─────────────┘
+```
+
+## toInt(8\|16\|32\|64)OrZero {#toint8163264orzero}
+
+这个函数需要一个字符类型的入参，然后尝试把它转为`Int (8 | 16 | 32 | 64)`，如果转换失败直接返回0。
+
+**例子**
+
+``` sql
+select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
+```
+
+``` text
+┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
+│                  123123 │                         0 │
+└─────────────────────────┴───────────────────────────┘
+```
+## toInt(8\|16\|32\|64)OrNull {#toint8163264ornull}
+
+这个函数需要一个字符类型的入参，然后尝试把它转为`Int (8 | 16 | 32 | 64)`，如果转换失败直接返回`NULL`。
+
+**例子**
+
+``` sql
+select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
+```
+
+``` text
+┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
+│                  123123 │                      ᴺᵁᴸᴸ │
+└─────────────────────────┴───────────────────────────┘
+```
+
+## toUInt(8\|16\|32\|64) {#touint8163264}
+
+转换一个输入值到[UInt](../../sql-reference/data-types/int-uint.md)类型。 这个函数包括：
+
+-   `toUInt8(expr)` — 结果为`UInt8`数据类型。
+-   `toUInt16(expr)` — 结果为`UInt16`数据类型。
+-   `toUInt32(expr)` — 结果为`UInt32`数据类型。
+-   `toUInt64(expr)` — 结果为`UInt64`数据类型。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个数字或者代表数值类型的字符串。不支持二进制、八进制、十六进制的数字形式，有效数字之前的0也会被忽略。
+
+**返回值**
+
+整形在`UInt8`, `UInt16`, `UInt32`，或者 `UInt64` 的数据类型。
+
+函数使用[rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero)原则，这意味着会截断丢弃小数部分的数值。
+
+对于负数和[NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf)来说转换的结果是不确定的。如果你传入一个负数，比如：`'-32'`，ClickHouse会抛出异常。具体使用的时候，请参考[数值类型转换常见的问题](#numeric-conversion-issues)。
+
+**例子**
+
+``` sql
+SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
+```
+
+``` text
+┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
+│ 9223372036854775808 │    4294967264 │             16 │            8 │
+└─────────────────────┴───────────────┴────────────────┴──────────────┘
+```
+
+## toUInt(8\|16\|32\|64)OrZero {#touint8163264orzero}
+
+## toUInt(8\|16\|32\|64)OrNull {#touint8163264ornull}
+
+## toFloat(32\|64) {#tofloat3264}
+
+## toFloat(32\|64)OrZero {#tofloat3264orzero}
+
+## toFloat(32\|64)OrNull {#tofloat3264ornull}
+
+## toDate {#todate}
+
+## toDateOrZero {#todateorzero}
+
+## toDateOrNull {#todateornull}
+
+## toDateTime {#todatetime}
+
+## toDateTimeOrZero {#todatetimeorzero}
+
+## toDateTimeOrNull {#todatetimeornull}
+
+## toDecimal(32\|64\|128) {#todecimal3264128}
+
+转换 `value` 到[Decimal](../../sql-reference/data-types/decimal.md)类型的值，其中精度为`S`。`value`可以是一个数字或者一个字符串。`S` 指定小数位的精度。
+
+-   `toDecimal32(value, S)`
+-   `toDecimal64(value, S)`
+-   `toDecimal128(value, S)`
+
+## toDecimal(32\|64\|128)OrNull {#todecimal3264128ornull}
+
+转换一个输入的字符到[Nullable(Decimal(P,S))](../../sql-reference/data-types/decimal.md)类型的数据。这个函数包括：
+
+-   `toDecimal32OrNull(expr, S)` — 结果为`Nullable(Decimal32(S))`数据类型。
+-   `toDecimal64OrNull(expr, S)` — 结果为`Nullable(Decimal64(S))`数据类型。
+-   `toDecimal128OrNull(expr, S)` — 结果为`Nullable(Decimal128(S))`数据类型。
+
+如果在解析输入值发生错误的时候你希望得到一个`NULL`值而不是抛出异常，你可以使用该函数。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个[String](../../sql-reference/data-types/string.md)类型的数据。 ClickHouse倾向于文本类型的表示带小数类型的数值，比如`'1.111'`。
+-   `S` — 小数位的精度。
+
+**返回值**
+
+`Nullable(Decimal(P,S))`类型的数据，包括：
+
+-   如果有的话，小数位`S`。
+-   如果解析错误或者输入的数字的小数位多于`S`,那结果为`NULL`。
+
+**例子**
+
+``` sql
+SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
+```
+
+``` text
+┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
+│ -1.11100 │ Nullable(Decimal(9, 5))                            │
+└──────────┴────────────────────────────────────────────────────┘
+```
+
+``` sql
+SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
+```
+
+``` text
+┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
+│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2))                            │
+└──────┴────────────────────────────────────────────────────┘
+```
+
+## toDecimal(32\|64\|128)OrZero {#todecimal3264128orzero}
+
+转换输入值为[Decimal(P,S)](../../sql-reference/data-types/decimal.md)类型数据。这个函数包括：
+
+-   `toDecimal32OrZero( expr, S)` — 结果为`Decimal32(S)` 数据类型。
+-   `toDecimal64OrZero( expr, S)` — 结果为`Decimal64(S)` 数据类型。
+-   `toDecimal128OrZero( expr, S)` — 结果为`Decimal128(S)` 数据类型。
+
+当解析错误的时候，你不需要抛出异常而希望得到`0`值，你可以使用该函数。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个[String](../../sql-reference/data-types/string.md)类型的数据。 ClickHouse倾向于文本类型的表示带小数类型的数值，比如`'1.111'`。
+-   `S` — 小数位的精度。
+
+**返回值**
+
+A value in the `Nullable(Decimal(P,S))` data type. The value contains:
+
+-   如果有的话，小数位`S`。
+-   如果解析错误或者输入的数字的小数位多于`S`,那结果为小数位精度为`S`的`0`。
+**例子**
+
+``` sql
+SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
+```
+
+``` text
+┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
+│ -1.11100 │ Decimal(9, 5)                                      │
+└──────────┴────────────────────────────────────────────────────┘
+```
+
+``` sql
+SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
+```
+
+``` text
+┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
+│ 0.00 │ Decimal(9, 2)                                      │
+└──────┴────────────────────────────────────────────────────┘
+```

-## toUInt8OrNull,toUInt16OrNull,toUInt32OrNull,toUInt64OrNull,toInt8OrNull,toInt16OrNull,toInt32OrNull,toInt64OrNull,toFloat32OrNull,toFloat64OrNull,toDateOrNull,toDateTimeOrNull {#touint8ornull-touint16ornull-touint32ornull-touint64ornull-toint8ornull-toint16ornull-toint32ornull-toint64ornull-tofloat32ornull-tofloat64ornull-todateornull-todatetimeornull}

 ## toString {#tostring}

@ -47,10 +261,6 @@ SELECT

 另请参阅`toUnixTimestamp`函数。

-## toDecimal32(value,S),toDecimal64(value,S),toDecimal128(value,S) {#todecimal32value-s-todecimal64value-s-todecimal128value-s}
-
-将`value`转换为精度为`S`的[十进制](../../sql-reference/functions/type-conversion-functions.md)。`value`可以是数字或字符串。`S`参数为指定的小数位数。
-
 ## toFixedString(s,N) {#tofixedstrings-n}

 将String类型的参数转换为FixedString(N)类型的值（具有固定长度N的字符串）。N必须是一个常量。
@ -78,17 +288,19 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
    │ foo\0bar\0 │ foo   │
    └────────────┴───────┘

-## reinterpretAsUInt8,reinterpretAsUInt16,reinterpretAsUInt32,reinterpretAsUInt64 {#reinterpretasuint8-reinterpretasuint16-reinterpretasuint32-reinterpretasuint64}
+## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}

-## reinterpretAsInt8,reinterpretAsInt16,reinterpretAsInt32,reinterpretAsInt64 {#reinterpretasint8-reinterpretasint16-reinterpretasint32-reinterpretasint64}
+## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}

-## reinterpretAsFloat32,reinterpretAsFloat64 {#reinterpretasfloat32-reinterpretasfloat64}
+## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}

-## 重新解释日期，重新解释日期时间 {#reinterpretasdate-reinterpretasdatetime}
+## reinterpretAsDate {#reinterpretasdate}
+
+## reinterpretAsDateTime {#reinterpretasdatetime}

 这些函数接受一个字符串，并将放在字符串开头的字节解释为主机顺序中的数字（little endian）。如果字符串不够长，则函数就像使用必要数量的空字节填充字符串一样。如果字符串比需要的长，则忽略额外的字节。Date被解释为Unix时间戳的天数，DateTime被解释为Unix时间戳。

-## 重新解释字符串 {#reinterpretasstring}
+## reinterpretAsString {#reinterpretasstring}

 此函数接受数字、Date或DateTime，并返回一个字符串，其中包含表示主机顺序（小端）的相应值的字节。从末尾删除空字节。例如，UInt32类型值255是一个字节长的字符串。

@ -96,7 +308,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut

 此函数接受数字、Date或DateTime，并返回包含表示主机顺序（小端）的相应值的字节的FixedString。从末尾删除空字节。例如，UInt32类型值255是一个长度为一个字节的FixedString。

-## 演员(x,t) {#type_conversion_function-cast}
+## CAST(x, T) {#type_conversion_function-cast}

 将’x’转换为’t’数据类型。还支持语法CAST（x AS t）

@ -133,10 +345,32 @@ SELECT
    │ Nullable(UInt16)                        │
    └─────────────────────────────────────────┘

-## 每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每 {#function-tointerval}
+## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}

-将数字类型参数转换为Interval类型（时间区间）。
-Interval类型实际上是非常有用的，您可以使用此类型的数据直接与Date或DateTime执行算术运算。同时，ClickHouse为Interval类型数据的声明提供了更方便的语法。例如：
+把一个数值类型的值转换为[Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的数据。
+
+**语法**
+
+``` sql
+toIntervalSecond(number)
+toIntervalMinute(number)
+toIntervalHour(number)
+toIntervalDay(number)
+toIntervalWeek(number)
+toIntervalMonth(number)
+toIntervalQuarter(number)
+toIntervalYear(number)
+```
+
+**参数**
+
+-   `number` — 正整数，持续的时间。
+
+**返回值**
+
+-   时间的`Interval`值。
+
+**例子**

 ``` sql
 WITH
@ -148,22 +382,257 @@ SELECT
    date + interval_to_week
 ```

-    ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
-    │                2019-01-08 │                   2019-01-08 │
-    └───────────────────────────┴──────────────────────────────┘
+``` text
+┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
+│                2019-01-08 │                   2019-01-08 │
+└───────────────────────────┴──────────────────────────────┘
+```

-## parsedatetimebestefort {#type_conversion_functions-parsedatetimebesteffort}
+## parseDateTimeBestEffort {#parsedatetimebesteffort}

-将数字类型参数解析为Date或DateTime类型。
-与toDate和toDateTime不同，parseDateTimeBestEffort可以进行更复杂的日期格式。
-有关详细信息，请参阅链接：[复杂日期格式](https://xkcd.com/1179/)。
+把[String](../../sql-reference/data-types/string.md)类型的时间日期转换为[DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime)数据类型。

-## parsedatetimebestefortornull {#parsedatetimebesteffortornull}
+该函数可以解析[ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)，[RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55)或者ClickHouse的一些别的时间日期格式。

-与[parsedatetimebestefort](#type_conversion_functions-parsedatetimebesteffort)相同，但它遇到无法处理的日期格式时返回null。
+**语法**

-## parsedatetimebestefortorzero {#parsedatetimebesteffortorzero}
+``` sql
+parseDateTimeBestEffort(time_string [, time_zone]);
+```

-与[parsedatetimebestefort](#type_conversion_functions-parsedatetimebesteffort)相同，但它遇到无法处理的日期格式时返回零Date或零DateTime。
+**参数**
+
+-   `time_string` — 字符类型的时间和日期。
+-   `time_zone` — 字符类型的时区。
+
+**非标准格式的支持**
+
+-   9位或者10位的数字时间，[unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
+-   时间和日期组成的字符串： `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`等。
+-   只有日期的字符串： `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` 等。
+-   只有天和时间： `DD`, `DD hh`, `DD hh:mm`。这种情况下 `YYYY-MM` 默认为 `2000-01`。
+-   包含时间日期以及时区信息： `YYYY-MM-DD hh:mm:ss ±h:mm`等。例如： `2020-12-12 17:36:00 -5:00`。
+
+对于所有的格式来说，这个函数通过全称或者第一个三个字符的月份名称来解析月份，比如：`24/DEC/18`, `24-Dec-18`, `01-September-2018`。
+
+**返回值**
+
+-   `DateTime`类型数据。
+
+**例子**
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('12/12/2020 12:12:57')
+AS parseDateTimeBestEffort;
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2020-12-12 12:12:57 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
+AS parseDateTimeBestEffort
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2018-08-18 10:22:16 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('1284101485')
+AS parseDateTimeBestEffort
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2015-07-07 12:04:41 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
+AS parseDateTimeBestEffort
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2018-12-12 10:12:12 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('10 20:19')
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort('10 20:19')─┐
+│                 2000-01-10 20:19:00 │
+└─────────────────────────────────────┘
+```
+
+**除此之外**
+
+-   [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
+-   [RFC 1123](https://tools.ietf.org/html/rfc1123)
+-   [toDate](#todate)
+-   [toDateTime](#todatetime)
+
+## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull}
+
+这个函数和[parseDateTimeBestEffort](#parsedatetimebesteffort)基本一致，除了无法解析返回结果为`NULL`。
+
+## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero}
+
+这个函数和[parseDateTimeBestEffort](#parsedatetimebesteffort)基本一致，除了无法解析返回结果为`0`。
+
+## toLowCardinality {#tolowcardinality}
+
+把输入值转换为[LowCardianlity](../data-types/lowcardinality.md)的相同类型的数据。
+
+如果要把`LowCardinality`类型的数据转换为其他类型，使用[CAST](#type_conversion_function-cast)函数。比如：`CAST(x as String)`。
+
+**语法**
+
+```sql
+toLowCardinality(expr)
+```
+
+**参数**
+
+- `expr` — [表达式](../syntax.md#syntax-expressions)为[支持的数据类型](../data-types/index.md#data_types)的一种。
+
+
+**返回值**
+
+- `expr`的结果。
+
+类型： `LowCardinality(expr_result_type)`
+
+**例子**
+
+查询:
+
+```sql
+SELECT toLowCardinality('1')
+```
+
+结果:
+
+```text
+┌─toLowCardinality('1')─┐
+│ 1                     │
+└───────────────────────┘
+```
+
+
+## toUnixTimestamp64Milli
+## toUnixTimestamp64Micro
+## toUnixTimestamp64Nano
+
+把一个`DateTime64`类型的数据转换为`Int64`类型的数据，结果包含固定亚秒的精度。输入的值是变大还是变低依赖于输入的精度。需要注意的是输出的值是一个UTC的时间戳, 不是同一个时区的`DateTime64`值。
+
+**语法**
+
+``` sql
+toUnixTimestamp64Milli(value)
+```
+
+**参数**
+
+-   `value` — 任何精度的DateTime64类型的数据。
+
+**返回值**
+
+-   `value` `Int64`类型数据。
+
+**例子**
+
+查询:
+
+``` sql
+WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
+SELECT toUnixTimestamp64Milli(dt64)
+```
+
+结果:
+
+``` text
+┌─toUnixTimestamp64Milli(dt64)─┐
+│                1568650812345 │
+└──────────────────────────────┘
+```
+
+``` sql
+WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
+SELECT toUnixTimestamp64Nano(dt64)
+```
+
+结果:
+
+``` text
+┌─toUnixTimestamp64Nano(dt64)─┐
+│         1568650812345678000 │
+└─────────────────────────────┘
+```
+
+## fromUnixTimestamp64Milli
+## fromUnixTimestamp64Micro
+## fromUnixTimestamp64Nano
+
+把`Int64`类型的数据转换为`DateTime64`类型的数据，结果包含固定的亚秒精度和可选的时区。 输入的值是变大还是变低依赖于输入的精度。需要注意的是输入的值是一个UTC的时间戳, 不是一个包含时区的时间戳。
+
+
+**语法**
+
+``` sql
+fromUnixTimestamp64Milli(value [, ti])
+```
+
+**参数**
+
+-   `value` — `Int64`类型的数据，可以是任意精度。
+-   `timezone` — `String`类型的时区
+
+**返回值**
+
+-   `value` DateTime64`类型的数据。
+
+**例子**
+
+``` sql
+WITH CAST(1234567891011, 'Int64') AS i64
+SELECT fromUnixTimestamp64Milli(i64, 'UTC')
+```
+
+``` text
+┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
+│              2009-02-13 23:31:31.011 │
+└──────────────────────────────────────┘
+```

 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) <!--hide-->
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -75,6 +75,7 @@
 #include <Storages/ColumnsDescription.h>
 #include <common/argsToConfig.h>
 #include <Common/TerminalSize.h>
+#include <Common/UTF8Helpers.h>

 #if !defined(ARCADIA_BUILD)
 #    include <Common/config_version.h>
@ -357,6 +358,78 @@ private:
        return false;
    }

+#if USE_REPLXX
+    static void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors)
+    {
+        using namespace replxx;
+
+        static const std::unordered_map<TokenType, Replxx::Color> token_to_color =
+        {
+            { TokenType::Whitespace, Replxx::Color::DEFAULT },
+            { TokenType::Comment, Replxx::Color::GRAY },
+            { TokenType::BareWord, Replxx::Color::DEFAULT },
+            { TokenType::Number, Replxx::Color::GREEN },
+            { TokenType::StringLiteral, Replxx::Color::CYAN },
+            { TokenType::QuotedIdentifier, Replxx::Color::MAGENTA },
+            { TokenType::OpeningRoundBracket, Replxx::Color::BROWN },
+            { TokenType::ClosingRoundBracket, Replxx::Color::BROWN },
+            { TokenType::OpeningSquareBracket, Replxx::Color::BROWN },
+            { TokenType::ClosingSquareBracket, Replxx::Color::BROWN },
+            { TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE },
+            { TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE },
+
+            { TokenType::Comma, Replxx::Color::INTENSE },
+            { TokenType::Semicolon, Replxx::Color::INTENSE },
+            { TokenType::Dot, Replxx::Color::INTENSE },
+            { TokenType::Asterisk, Replxx::Color::INTENSE },
+            { TokenType::Plus, Replxx::Color::INTENSE },
+            { TokenType::Minus, Replxx::Color::INTENSE },
+            { TokenType::Slash, Replxx::Color::INTENSE },
+            { TokenType::Percent, Replxx::Color::INTENSE },
+            { TokenType::Arrow, Replxx::Color::INTENSE },
+            { TokenType::QuestionMark, Replxx::Color::INTENSE },
+            { TokenType::Colon, Replxx::Color::INTENSE },
+            { TokenType::Equals, Replxx::Color::INTENSE },
+            { TokenType::NotEquals, Replxx::Color::INTENSE },
+            { TokenType::Less, Replxx::Color::INTENSE },
+            { TokenType::Greater, Replxx::Color::INTENSE },
+            { TokenType::LessOrEquals, Replxx::Color::INTENSE },
+            { TokenType::GreaterOrEquals, Replxx::Color::INTENSE },
+            { TokenType::Concatenation, Replxx::Color::INTENSE },
+            { TokenType::At, Replxx::Color::INTENSE },
+
+            { TokenType::EndOfStream, Replxx::Color::DEFAULT },
+
+            { TokenType::Error, Replxx::Color::RED },
+            { TokenType::ErrorMultilineCommentIsNotClosed, Replxx::Color::RED },
+            { TokenType::ErrorSingleQuoteIsNotClosed, Replxx::Color::RED },
+            { TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED },
+            { TokenType::ErrorSinglePipeMark, Replxx::Color::RED },
+            { TokenType::ErrorWrongNumber, Replxx::Color::RED },
+            { TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }
+        };
+
+        const Replxx::Color unknown_token_color = Replxx::Color::RED;
+
+        Lexer lexer(query.data(), query.data() + query.size());
+        size_t pos = 0;
+
+        for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken())
+        {
+            size_t utf8_len = UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(token.begin), token.size());
+            for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index)
+            {
+                if (token_to_color.find(token.type) != token_to_color.end())
+                    colors[pos + code_point_index] = token_to_color.at(token.type);
+                else
+                    colors[pos + code_point_index] = unknown_token_color;
+            }
+
+            pos += utf8_len;
+        }
+    }
+#endif
+
    int mainImpl()
    {
        UseSSL use_ssl;
@ -502,7 +575,18 @@ private:
            LineReader::Patterns query_delimiters = {";", "\\G"};

 #if USE_REPLXX
-            ReplxxLineReader lr(Suggest::instance(), history_file, config().has("multiline"), query_extenders, query_delimiters);
+            replxx::Replxx::highlighter_callback_t highlight_callback{};
+            if (config().getBool("highlight"))
+                highlight_callback = highlight;
+
+            ReplxxLineReader lr(
+                Suggest::instance(),
+                history_file,
+                config().has("multiline"),
+                query_extenders,
+                query_delimiters,
+                highlight_callback);
+
 #elif defined(USE_READLINE) && USE_READLINE
            ReadlineLineReader lr(Suggest::instance(), history_file, config().has("multiline"), query_extenders, query_delimiters);
 #else
@ -1766,6 +1850,7 @@ public:
            ("echo", "in batch mode, print query before execution")
            ("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
            ("compression", po::value<bool>(), "enable or disable compression")
+            ("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
            ("log-level", po::value<std::string>(), "client log level")
            ("server_logs_file", po::value<std::string>(), "put server logs into specified file")
        ;
@ -1912,6 +1997,8 @@ public:
            config().setBool("disable_suggestion", true);
        if (options.count("suggestion_limit"))
            config().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
+        if (options.count("highlight"))
+            config().setBool("highlight", options["highlight"].as<bool>());

        argsToConfig(common_arguments, config(), 100);

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -17,6 +17,7 @@
 #include <common/phdr_cache.h>
 #include <common/ErrorHandlers.h>
 #include <common/getMemoryAmount.h>
+#include <common/errnoToString.h>
 #include <common/coverage.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/DNSResolver.h>
@ -125,6 +126,7 @@ namespace ErrorCodes
    extern const int FAILED_TO_GETPWUID;
    extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
    extern const int NETWORK_ERROR;
+    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
 }


@ -210,6 +212,52 @@ void Server::defineOptions(Poco::Util::OptionSet & options)
    BaseDaemon::defineOptions(options);
 }

+
+/// Check that there is no user-level settings at the top level in config.
+/// This is a common source of mistake (user don't know where to write user-level setting).
+void checkForUserSettingsAtTopLevel(const Poco::Util::AbstractConfiguration & config, const std::string & path)
+{
+    if (config.getBool("skip_check_for_incorrect_settings", false))
+        return;
+
+    Settings settings;
+    for (const auto & setting : settings)
+    {
+        std::string name = setting.getName().toString();
+        if (config.has(name))
+        {
+            throw Exception(fmt::format("A setting '{}' appeared at top level in config {}."
+                " But it is user-level setting that should be located in users.xml inside <profiles> section for specific profile."
+                " You can add it to <profiles><default> if you want to change default value of this setting."
+                " You can also disable the check - specify <skip_check_for_incorrect_settings>1</skip_check_for_incorrect_settings>"
+                " in the main configuration file.",
+                name, path),
+                ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+        }
+    }
+}
+
+void checkForUsersNotInMainConfig(
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string & config_path,
+    const std::string & users_config_path,
+    Poco::Logger * log)
+{
+    if (config.getBool("skip_check_for_incorrect_settings", false))
+        return;
+
+    if (config.has("users") || config.has("profiles") || config.has("quotas"))
+    {
+        /// We cannot throw exception here, because we have support for obsolete 'conf.d' directory
+        /// (that does not correspond to config.d or users.d) but substitute configuration to both of them.
+
+        LOG_ERROR(log, "The <users>, <profiles> and <quotas> elements should be located in users config file: {} not in main config {}."
+            " Also note that you should place configuration changes to the appropriate *.d directory like 'users.d'.",
+            users_config_path, config_path);
+    }
+}
+
+
 int Server::main(const std::vector<std::string> & /*args*/)
 {
    Poco::Logger * log = &logger();
@ -269,6 +317,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
        config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
    }

+    checkForUserSettingsAtTopLevel(config(), config_path);
+
    const auto memory_amount = getMemoryAmount();

 #if defined(OS_LINUX)
@ -473,13 +523,16 @@ int Server::main(const std::vector<std::string> & /*args*/)
        SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
    }

-    auto main_config_reloader = std::make_unique<ConfigReloader>(config_path,
+    auto main_config_reloader = std::make_unique<ConfigReloader>(
+        config_path,
        include_from_path,
        config().getString("path", ""),
        std::move(main_config_zk_node_cache),
        main_config_zk_changed_event,
        [&](ConfigurationPtr config)
        {
+            checkForUserSettingsAtTopLevel(*config, config_path);
+
            // FIXME logging-related things need synchronization -- see the 'Logger * log' saved
            // in a lot of places. For now, disable updating log configuration without server restart.
            //setTextLog(global_context->getTextLog());
@ -508,12 +561,21 @@ int Server::main(const std::vector<std::string> & /*args*/)
        if (Poco::File(config_dir + users_config_path).exists())
            users_config_path = config_dir + users_config_path;
    }
-    auto users_config_reloader = std::make_unique<ConfigReloader>(users_config_path,
+
+    if (users_config_path != config_path)
+        checkForUsersNotInMainConfig(config(), config_path, users_config_path, log);
+
+    auto users_config_reloader = std::make_unique<ConfigReloader>(
+        users_config_path,
        include_from_path,
        config().getString("path", ""),
        zkutil::ZooKeeperNodeCache([&] { return global_context->getZooKeeper(); }),
        std::make_shared<Poco::Event>(),
-        [&](ConfigurationPtr config) { global_context->setUsersConfig(config); },
+        [&](ConfigurationPtr config)
+        {
+            global_context->setUsersConfig(config);
+            checkForUserSettingsAtTopLevel(*config, users_config_path);
+        },
        /* already_loaded = */ false);

    /// Reload config in SYSTEM RELOAD CONFIG query.
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -1,6 +1,9 @@
 <?xml version="1.0"?>
 <!--
  NOTE: User and query level settings are set up in "users.xml" file.
+  If you have accidentially specified user-level settings here, server won't start.
+  You can either move the settings to the right place inside "users.xml" file
+   or add <skip_check_for_incorrect_settings>1</skip_check_for_incorrect_settings> here.
 -->
 <yandex>
    <logger>
--- a/src/Access/AllowedClientHosts.cpp
+++ b/src/Access/AllowedClientHosts.cpp
@ -8,6 +8,7 @@
 #include <ext/scope_guard.h>
 #include <boost/algorithm/string/replace.hpp>
 #include <ifaddrs.h>
+#include <Common/DNSResolver.h>


 namespace DB
@ -44,66 +45,22 @@ namespace
        return IPSubnet(toIPv6(subnet.getPrefix()), subnet.getMask());
    }

-
-    /// Helper function for isAddressOfHost().
-    bool isAddressOfHostImpl(const IPAddress & address, const String & host)
-    {
-        IPAddress addr_v6 = toIPv6(address);
-
-        /// Resolve by hand, because Poco don't use AI_ALL flag but we need it.
-        addrinfo * ai_begin = nullptr;
-        SCOPE_EXIT(
-        {
-            if (ai_begin)
-                freeaddrinfo(ai_begin);
-        });
-
-        addrinfo hints;
-        memset(&hints, 0, sizeof(hints));
-        hints.ai_family = AF_UNSPEC;
-        hints.ai_flags |= AI_V4MAPPED | AI_ALL;
-
-        int err = getaddrinfo(host.c_str(), nullptr, &hints, &ai_begin);
-        if (err)
-            throw Exception("Cannot getaddrinfo(" + host + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
-
-        for (const addrinfo * ai = ai_begin; ai; ai = ai->ai_next)
-        {
-            if (ai->ai_addrlen && ai->ai_addr)
-            {
-                if (ai->ai_family == AF_INET)
-                {
-                    const auto & sin = *reinterpret_cast<const sockaddr_in *>(ai->ai_addr);
-                    if (addr_v6 == toIPv6(IPAddress(&sin.sin_addr, sizeof(sin.sin_addr))))
-                    {
-                        return true;
-                    }
-                }
-                else if (ai->ai_family == AF_INET6)
-                {
-                    const auto & sin = *reinterpret_cast<const sockaddr_in6*>(ai->ai_addr);
-                    if (addr_v6 == IPAddress(&sin.sin6_addr, sizeof(sin.sin6_addr), sin.sin6_scope_id))
-                    {
-                        return true;
-                    }
-                }
-            }
-        }
-
-        return false;
-    }
-
-    auto & getIsAddressOfHostCache()
-    {
-        static SimpleCache<decltype(isAddressOfHostImpl), isAddressOfHostImpl> cache;
-        return cache;
-    }
-
    /// Whether a specified address is one of the addresses of a specified host.
    bool isAddressOfHost(const IPAddress & address, const String & host)
    {
-        /// We need to cache DNS requests.
-        return getIsAddressOfHostCache()(address, host);
+        IPAddress addr_v6 = toIPv6(address);
+
+        auto host_addresses = DNSResolver::instance().resolveHostAll(host);
+
+        for (const auto & addr : host_addresses)
+        {
+            if (addr.family() == IPAddress::Family::IPv4 && addr_v6 == toIPv6(addr))
+                return true;
+            else if (addr.family() == IPAddress::Family::IPv6 && addr_v6 == addr)
+                return true;
+        }
+
+        return false;
    }

    /// Helper function for isAddressOfLocalhost().
@ -147,16 +104,10 @@ namespace
        return boost::range::find(local_addresses, toIPv6(address)) != local_addresses.end();
    }

-    /// Helper function for getHostByAddress().
-    String getHostByAddressImpl(const IPAddress & address)
+    /// Returns the host name by its address.
+    String getHostByAddress(const IPAddress & address)
    {
-        Poco::Net::SocketAddress sock_addr(address, 0);
-
-        /// Resolve by hand, because Poco library doesn't have such functionality.
-        char host[1024];
-        int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
-        if (err)
-            throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
+        String host = DNSResolver::instance().reverseResolve(address);

        /// Check that PTR record is resolved back to client address
        if (!isAddressOfHost(address, host))
@ -165,19 +116,6 @@ namespace
        return host;
    }

-    auto & getHostByAddressCache()
-    {
-        static SimpleCache<decltype(getHostByAddressImpl), &getHostByAddressImpl> cache;
-        return cache;
-    }
-
-    /// Returns the host name by its address.
-    String getHostByAddress(const IPAddress & address)
-    {
-        /// We need to cache DNS requests.
-        return getHostByAddressCache()(address);
-    }
-

    void parseLikePatternIfIPSubnet(const String & pattern, IPSubnet & subnet, IPAddress::Family address_family)
    {
@ -376,10 +314,4 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const
    return false;
 }

-void AllowedClientHosts::dropDNSCaches()
-{
-    getIsAddressOfHostCache().drop();
-    getHostByAddressCache().drop();
-}
-
 }
--- a/src/Access/AllowedClientHosts.h
+++ b/src/Access/AllowedClientHosts.h
@ -114,8 +114,6 @@ public:
    friend bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs);
    friend bool operator !=(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) { return !(lhs == rhs); }

-    static void dropDNSCaches();
-
 private:
    std::vector<IPAddress> addresses;
    std::vector<IPSubnet> subnets;
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -150,6 +150,8 @@ public:
    virtual void addBatchSinglePlaceNotNull(
        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0;

+    virtual void addBatchSinglePlaceFromInterval(size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
+
    /** In addition to addBatch, this method collects multiple rows of arguments into array "places"
      *  as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
      *  -Array combinator. It might also be used generally to break data dependency when array
@ -214,6 +216,12 @@ public:
                static_cast<const Derived *>(this)->add(place, columns, i, arena);
    }

+    void addBatchSinglePlaceFromInterval(size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
+    {
+        for (size_t i = batch_begin; i < batch_end; ++i)
+            static_cast<const Derived *>(this)->add(place, columns, i, arena);
+    }
+
    void addBatchArray(
        size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
        const override
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@ -6,6 +6,7 @@
 #include <IO/WriteBufferFromArena.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
+#include <Common/FieldVisitors.h>
 #include <Common/SipHash.h>
 #include <Common/AlignedBuffer.h>
 #include <Common/typeid_cast.h>
@ -27,6 +28,51 @@ namespace ErrorCodes
 }


+static std::string getTypeString(const AggregateFunctionPtr & func)
+{
+    WriteBufferFromOwnString stream;
+    stream << "AggregateFunction(" << func->getName();
+    const auto & parameters = func->getParameters();
+    const auto & argument_types = func->getArgumentTypes();
+
+    if (!parameters.empty())
+    {
+        stream << '(';
+        for (size_t i = 0; i < parameters.size(); ++i)
+        {
+            if (i)
+                stream << ", ";
+            stream << applyVisitor(FieldVisitorToString(), parameters[i]);
+        }
+        stream << ')';
+    }
+
+    for (const auto & argument_type : argument_types)
+        stream << ", " << argument_type->getName();
+
+    stream << ')';
+    return stream.str();
+}
+
+
+ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_)
+    : func(func_), type_string(getTypeString(func))
+{
+}
+
+ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_, const ConstArenas & arenas_)
+    : foreign_arenas(arenas_), func(func_), type_string(getTypeString(func))
+{
+
+}
+
+void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
+{
+    func = func_;
+    type_string = getTypeString(func);
+}
+
+
 ColumnAggregateFunction::~ColumnAggregateFunction()
 {
    if (!func->hasTrivialDestructor() && !src)
@ -336,15 +382,10 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const
    return create(func);
 }

-String ColumnAggregateFunction::getTypeString() const
-{
-    return DataTypeAggregateFunction(func, func->getArgumentTypes(), func->getParameters()).getName();
-}
-
 Field ColumnAggregateFunction::operator[](size_t n) const
 {
    Field field = AggregateFunctionStateData();
-    field.get<AggregateFunctionStateData &>().name = getTypeString();
+    field.get<AggregateFunctionStateData &>().name = type_string;
    {
        WriteBufferFromString buffer(field.get<AggregateFunctionStateData &>().data);
        func->serialize(data[n], buffer);
@ -355,7 +396,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const
 void ColumnAggregateFunction::get(size_t n, Field & res) const
 {
    res = AggregateFunctionStateData();
-    res.get<AggregateFunctionStateData &>().name = getTypeString();
+    res.get<AggregateFunctionStateData &>().name = type_string;
    {
        WriteBufferFromString buffer(res.get<AggregateFunctionStateData &>().data);
        func->serialize(data[n], buffer);
@ -425,8 +466,6 @@ static void pushBackAndCreateState(ColumnAggregateFunction::Container & data, Ar

 void ColumnAggregateFunction::insert(const Field & x)
 {
-    String type_string = getTypeString();
-
    if (x.getType() != Field::Types::AggregateFunctionState)
        throw Exception(String("Inserting field of type ") + x.getTypeName() + " into ColumnAggregateFunction. "
                        "Expected " + Field::Types::toString(Field::Types::AggregateFunctionState), ErrorCodes::LOGICAL_ERROR);
@ -564,7 +603,7 @@ void ColumnAggregateFunction::getExtremes(Field & min, Field & max) const
    AggregateDataPtr place = place_buffer.data();

    AggregateFunctionStateData serialized;
-    serialized.name = getTypeString();
+    serialized.name = type_string;

    func->create(place);
    try
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@ -74,6 +74,9 @@ private:
    /// Array of pointers to aggregation states, that are placed in arenas.
    Container data;

+    /// Name of the type to distinguish different aggregation states.
+    String type_string;
+
    ColumnAggregateFunction() {}

    /// Create a new column that has another column as a source.
@ -84,29 +87,17 @@ private:
    ///  but ownership of different elements cannot be mixed by different columns.
    void ensureOwnership();

-    ColumnAggregateFunction(const AggregateFunctionPtr & func_)
-        : func(func_)
-    {
-    }
+    ColumnAggregateFunction(const AggregateFunctionPtr & func_);

    ColumnAggregateFunction(const AggregateFunctionPtr & func_,
-                            const ConstArenas & arenas_)
-        : foreign_arenas(arenas_), func(func_)
-    {
-    }
-
+                            const ConstArenas & arenas_);

    ColumnAggregateFunction(const ColumnAggregateFunction & src_);

-    String getTypeString() const;
-
 public:
    ~ColumnAggregateFunction() override;

-    void set(const AggregateFunctionPtr & func_)
-    {
-        func = func_;
-    }
+    void set(const AggregateFunctionPtr & func_);

    AggregateFunctionPtr getAggregateFunction() { return func; }
    AggregateFunctionPtr getAggregateFunction() const { return func; }
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@ -150,7 +150,7 @@ public:
        return res;
    }

-    /// Get peice of memory with alignment
+    /// Get piece of memory with alignment
    char * alignedAlloc(size_t size, size_t alignment)
    {
        do
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@ -4,20 +4,32 @@
 #include <Common/ProfileEvents.h>
 #include <Core/Names.h>
 #include <Core/Types.h>
+#include <Poco/Net/IPAddress.h>
 #include <Poco/Net/DNS.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/NumberParser.h>
-#include <Poco/Logger.h>
-#include <common/logger_useful.h>
 #include <arpa/inet.h>
 #include <atomic>
 #include <optional>
+#include <string_view>

 namespace ProfileEvents
 {
    extern Event DNSError;
 }

+namespace std
+{
+template<> struct hash<Poco::Net::IPAddress>
+{
+    size_t operator()(const Poco::Net::IPAddress & address) const noexcept
+    {
+        std::string_view addr(static_cast<const char *>(address.addr()), address.length());
+        std::hash<std::string_view> hash_impl;
+        return hash_impl(addr);
+    }
+};
+}

 namespace DB
 {
@ -25,6 +37,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
+    extern const int DNS_ERROR;
 }


@ -76,16 +89,48 @@ static void splitHostAndPort(const std::string & host_and_port, std::string & ou
    }
 }

-static Poco::Net::IPAddress resolveIPAddressImpl(const std::string & host)
+static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
 {
+    Poco::Net::IPAddress ip;
+
    /// NOTE: Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
-    /// Therefore we use SocketAddress constructor with dummy port to resolve IP
-    return Poco::Net::SocketAddress(host, 0U).host();
+    if (Poco::Net::IPAddress::tryParse(host, ip))
+        return DNSResolver::IPAddresses(1, ip);
+
+    /// Family: AF_UNSPEC
+    /// AI_ALL is required for checking if client is allowed to connect from an address
+    auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL;
+    /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured.
+    /// It should not affect client address checking, since client cannot connect from IPv6 address
+    /// if server has no IPv6 addresses.
+    flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;
+#if defined(ARCADIA_BUILD)
+    auto addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
+#else
+    auto addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
+#endif
+    if (addresses.empty())
+        throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR);
+
+    return addresses;
+}
+
+static String reverseResolveImpl(const Poco::Net::IPAddress & address)
+{
+    Poco::Net::SocketAddress sock_addr(address, 0);
+
+    /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...)
+    char host[1024];
+    int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
+    if (err)
+        throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
+    return host;
 }

 struct DNSResolver::Impl
 {
    SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host;
+    SimpleCache<decltype(reverseResolveImpl), &reverseResolveImpl> cache_address;

    std::mutex drop_mutex;
    std::mutex update_mutex;
@ -95,18 +140,25 @@ struct DNSResolver::Impl

    /// Store hosts, which was asked to resolve from last update of DNS cache.
    NameSet new_hosts;
+    std::unordered_set<Poco::Net::IPAddress> new_addresses;

    /// Store all hosts, which was whenever asked to resolve
    NameSet known_hosts;
+    std::unordered_set<Poco::Net::IPAddress> known_addresses;

    /// If disabled, will not make cache lookups, will resolve addresses manually on each call
    std::atomic<bool> disable_cache{false};
 };


-DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()) {}
+DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(&Poco::Logger::get("DNSResolver")) {}

 Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
+{
+    return resolveHostAll(host).front();
+}
+
+DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
 {
    if (impl->disable_cache)
        return resolveIPAddressImpl(host);
@ -125,7 +177,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an
    splitHostAndPort(host_and_port, host, port);

    addToNewHosts(host);
-    return Poco::Net::SocketAddress(impl->cache_host(host), port);
+    return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
 }

 Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port)
@ -134,17 +186,29 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U
        return Poco::Net::SocketAddress(host, port);

    addToNewHosts(host);
-    return  Poco::Net::SocketAddress(impl->cache_host(host), port);
+    return  Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
+}
+
+String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address)
+{
+    if (impl->disable_cache)
+        return reverseResolveImpl(address);
+
+    addToNewAddresses(address);
+    return impl->cache_address(address);
 }

 void DNSResolver::dropCache()
 {
    impl->cache_host.drop();
+    impl->cache_address.drop();

    std::scoped_lock lock(impl->update_mutex, impl->drop_mutex);

    impl->known_hosts.clear();
+    impl->known_addresses.clear();
    impl->new_hosts.clear();
+    impl->new_addresses.clear();
    impl->host_name.reset();
 }

@ -166,34 +230,27 @@ String DNSResolver::getHostName()
    return *impl->host_name;
 }

-bool DNSResolver::updateCache()
+static const String & cacheElemToString(const String & str) { return str; }
+static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr.toString(); }
+
+template<typename UpdateF, typename ElemsT>
+bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg)
 {
-    {
-        std::lock_guard lock(impl->drop_mutex);
-        for (const auto & host : impl->new_hosts)
-            impl->known_hosts.insert(host);
-        impl->new_hosts.clear();
-
-        impl->host_name.emplace(Poco::Net::DNS::hostName());
-    }
-
-    std::lock_guard lock(impl->update_mutex);
-
    bool updated = false;
-    String lost_hosts;
-    for (const auto & host : impl->known_hosts)
+    String lost_elems;
+    for (const auto & elem : elems)
    {
        try
        {
-            updated |= updateHost(host);
+            updated |= (this->*update_func)(elem);
        }
        catch (const Poco::Net::NetException &)
        {
            ProfileEvents::increment(ProfileEvents::DNSError);

-            if (!lost_hosts.empty())
-                lost_hosts += ", ";
-            lost_hosts += host;
+            if (!lost_elems.empty())
+                lost_elems += ", ";
+            lost_elems += cacheElemToString(elem);
        }
        catch (...)
        {
@ -201,12 +258,41 @@ bool DNSResolver::updateCache()
        }
    }

-    if (!lost_hosts.empty())
-        LOG_INFO(&Poco::Logger::get("DNSResolver"), "Cached hosts not found: {}", lost_hosts);
+    if (!lost_elems.empty())
+        LOG_INFO(log, log_msg, lost_elems);

    return updated;
 }

+bool DNSResolver::updateCache()
+{
+    LOG_DEBUG(log, "Updating DNS cache");
+
+    {
+        std::lock_guard lock(impl->drop_mutex);
+
+        for (const auto & host : impl->new_hosts)
+            impl->known_hosts.insert(host);
+        impl->new_hosts.clear();
+
+        for (const auto & address : impl->new_addresses)
+            impl->known_addresses.insert(address);
+        impl->new_addresses.clear();
+
+        impl->host_name.emplace(Poco::Net::DNS::hostName());
+    }
+
+    /// FIXME Updating may take a long time becouse we cannot manage timeouts of getaddrinfo(...) and getnameinfo(...).
+    /// DROP DNS CACHE will wait on update_mutex (possibly while holding drop_mutex)
+    std::lock_guard lock(impl->update_mutex);
+
+    bool hosts_updated = updateCacheImpl(&DNSResolver::updateHost, impl->known_hosts, "Cached hosts not found: {}");
+    updateCacheImpl(&DNSResolver::updateAddress, impl->known_addresses, "Cached addresses not found: {}");
+
+    LOG_DEBUG(log, "Updated DNS cache");
+    return hosts_updated;
+}
+
 bool DNSResolver::updateHost(const String & host)
 {
    /// Usage of updateHost implies that host is already in cache and there is no extra computations
@ -215,12 +301,25 @@ bool DNSResolver::updateHost(const String & host)
    return old_value != impl->cache_host(host);
 }

+bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address)
+{
+    auto old_value = impl->cache_address(address);
+    impl->cache_address.update(address);
+    return old_value == impl->cache_address(address);
+}
+
 void DNSResolver::addToNewHosts(const String & host)
 {
    std::lock_guard lock(impl->drop_mutex);
    impl->new_hosts.insert(host);
 }

+void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address)
+{
+    std::lock_guard lock(impl->drop_mutex);
+    impl->new_addresses.insert(address);
+}
+
 DNSResolver::~DNSResolver() = default;

 DNSResolver & DNSResolver::instance()
--- a/src/Common/DNSResolver.h
+++ b/src/Common/DNSResolver.h
@ -5,6 +5,7 @@
 #include <Core/Types.h>
 #include <Core/Names.h>
 #include <boost/noncopyable.hpp>
+#include <common/logger_useful.h>


 namespace DB
@ -16,18 +17,26 @@ namespace DB
 class DNSResolver : private boost::noncopyable
 {
 public:
+    typedef std::vector<Poco::Net::IPAddress> IPAddresses;
+
    static DNSResolver & instance();

    DNSResolver(const DNSResolver &) = delete;

-    /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolve its IP
+    /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves its IP
    Poco::Net::IPAddress resolveHost(const std::string & host);

-    /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolve its IP and port
+    /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs
+    IPAddresses resolveHostAll(const std::string & host);
+
+    /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port
    Poco::Net::SocketAddress resolveAddress(const std::string & host_and_port);

    Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);

+    /// Accepts host IP and resolves its host name
+    String reverseResolve(const Poco::Net::IPAddress & address);
+
    /// Get this server host name
    String getHostName();

@ -44,16 +53,21 @@ public:
    ~DNSResolver();

 private:
+    template<typename UpdateF, typename ElemsT>
+    bool updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg);

    DNSResolver();

    struct Impl;
    std::unique_ptr<Impl> impl;
+    Poco::Logger * log;

-    /// Returns true if IP of host has been changed.
+    /// Updates cached value and returns true it has been changed.
    bool updateHost(const String & host);
+    bool updateAddress(const Poco::Net::IPAddress & address);

    void addToNewHosts(const String & host);
+    void addToNewAddresses(const Poco::Net::IPAddress & address);
 };

 }
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@ -10,6 +10,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadBufferFromFile.h>
 #include <common/demangle.h>
+#include <common/errnoToString.h>
 #include <Common/formatReadable.h>
 #include <Common/filesystemHelpers.h>
 #include <filesystem>
@ -85,31 +86,6 @@ std::string Exception::getStackTraceString() const
 }


-std::string errnoToString(int code, int the_errno)
-{
-    const size_t buf_size = 128;
-    char buf[buf_size];
-#ifndef _GNU_SOURCE
-    int rc = strerror_r(the_errno, buf, buf_size);
-#ifdef __APPLE__
-    if (rc != 0 && rc != EINVAL)
-#else
-    if (rc != 0)
-#endif
-    {
-        std::string tmp = std::to_string(code);
-        const char * code_str = tmp.c_str();
-        const char * unknown_message = "Unknown error ";
-        strcpy(buf, unknown_message);
-        strcpy(buf + strlen(unknown_message), code_str);
-    }
-    return "errno: " + toString(the_errno) + ", strerror: " + std::string(buf);
-#else
-    (void)code;
-    return "errno: " + toString(the_errno) + ", strerror: " + std::string(strerror_r(the_errno, buf, sizeof(buf)));
-#endif
-}
-
 void throwFromErrno(const std::string & s, int code, int the_errno)
 {
    throw ErrnoException(s + ", " + errnoToString(code, the_errno), code, the_errno);
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@ -81,7 +81,6 @@ private:
 using Exceptions = std::vector<std::exception_ptr>;


-std::string errnoToString(int code, int the_errno = errno);
 [[noreturn]] void throwFromErrno(const std::string & s, int code, int the_errno = errno);
 /// Useful to produce some extra information about available space and inodes on device
 [[noreturn]] void throwFromErrnoWithPath(const std::string & s, const std::string & path, int code,
--- a/src/Common/PipeFDs.cpp
+++ b/src/Common/PipeFDs.cpp
@ -3,6 +3,7 @@
 #include <Common/formatReadable.h>

 #include <common/logger_useful.h>
+#include <common/errnoToString.h>

 #include <unistd.h>
 #include <fcntl.h>
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -180,6 +180,25 @@
    M(OSWriteBytes, "Number of bytes written to disks or block devices. Doesn't include bytes that are in page cache dirty pages. May not include data that was written by OS asynchronously.") \
    M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \
    M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \
+    \
+    M(PerfCpuCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.")  \
+    M(PerfInstructions, "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.") \
+    M(PerfCacheReferences, "Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.") \
+    M(PerfCacheMisses, "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in con‐junction with the PERFCOUNTHWCACHEREFERENCES event to calculate cache miss rates.") \
+    M(PerfBranchInstructions, "Retired branch instructions. Prior to Linux 2.6.35, this used the wrong event on AMD processors.") \
+    M(PerfBranchMisses, "Mispredicted branch instructions.") \
+    M(PerfBusCycles, "Bus cycles, which can be different from total cycles.") \
+    M(PerfStalledCyclesFrontend, "Stalled cycles during issue.") \
+    M(PerfStalledCyclesBackend, "Stalled cycles during retirement.") \
+    M(PerfRefCpuCycles, "Total cycles; not affected by CPU frequency scaling.") \
+    \
+    M(PerfCpuClock, "The CPU clock, a high-resolution per-CPU timer") \
+    M(PerfTaskClock, "A clock count specific to the task that is running") \
+    M(PerfContextSwitches, "Number of context switches") \
+    M(PerfCpuMigrations, "Number of times the process has migrated to a new CPU") \
+    M(PerfAlignmentFaults, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \
+    M(PerfEmulationFaults, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \
+    \
    M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
    \
    M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@ -8,6 +8,7 @@
 #include <common/StringRef.h>
 #include <common/logger_useful.h>
 #include <common/phdr_cache.h>
+#include <common/errnoToString.h>

 #include <random>

--- a/src/Common/ShellCommand.cpp
+++ b/src/Common/ShellCommand.cpp
@ -6,6 +6,7 @@
 #include <Common/ShellCommand.h>
 #include <Common/PipeFDs.h>
 #include <common/logger_useful.h>
+#include <common/errnoToString.h>
 #include <IO/WriteHelpers.h>
 #include <unistd.h>
 #include <csignal>
--- a/src/Common/StatusFile.cpp
+++ b/src/Common/StatusFile.cpp
@ -8,6 +8,7 @@

 #include <Poco/File.h>
 #include <common/logger_useful.h>
+#include <common/errnoToString.h>
 #include <Common/ClickHouseRevision.h>
 #include <common/LocalDateTime.h>

--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@ -4,9 +4,22 @@

 #include "TaskStatsInfoGetter.h"
 #include "ProcfsMetricsProvider.h"
+#include "hasLinuxCapability.h"

+#include <filesystem>
+#include <fstream>
 #include <optional>
+#include <sstream>
+#include <unordered_set>

+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <cerrno>
+#include <sys/types.h>
+#include <dirent.h>

 namespace DB
 {
@ -104,6 +117,404 @@ void TasksStatsCounters::incrementProfileEvents(const ::taskstats & prev, const
    profile_events.increment(ProfileEvents::OSReadBytes, safeDiff(prev.read_bytes, curr.read_bytes));
    profile_events.increment(ProfileEvents::OSWriteBytes, safeDiff(prev.write_bytes, curr.write_bytes));
 }
+
+}
+
+#endif
+
+#if defined(__linux__) && !defined(ARCADIA_BUILD)
+
+namespace DB
+{
+
+thread_local PerfEventsCounters current_thread_counters;
+
+#define SOFTWARE_EVENT(PERF_NAME, LOCAL_NAME) \
+    PerfEventInfo \
+    { \
+        .event_type = perf_type_id::PERF_TYPE_SOFTWARE, \
+        .event_config = (PERF_NAME), \
+        .profile_event = ProfileEvents::LOCAL_NAME, \
+        .settings_name = #LOCAL_NAME \
+    }
+
+#define HARDWARE_EVENT(PERF_NAME, LOCAL_NAME) \
+    PerfEventInfo \
+    { \
+        .event_type = perf_type_id::PERF_TYPE_HARDWARE, \
+        .event_config = (PERF_NAME), \
+        .profile_event = ProfileEvents::LOCAL_NAME, \
+        .settings_name = #LOCAL_NAME \
+    }
+
+// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
+static const PerfEventInfo raw_events_info[] = {
+    HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles),
+    HARDWARE_EVENT(PERF_COUNT_HW_INSTRUCTIONS, PerfInstructions),
+    HARDWARE_EVENT(PERF_COUNT_HW_CACHE_REFERENCES, PerfCacheReferences),
+    HARDWARE_EVENT(PERF_COUNT_HW_CACHE_MISSES, PerfCacheMisses),
+    HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PerfBranchInstructions),
+    HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_MISSES, PerfBranchMisses),
+    HARDWARE_EVENT(PERF_COUNT_HW_BUS_CYCLES, PerfBusCycles),
+    HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, PerfStalledCyclesFrontend),
+    HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, PerfStalledCyclesBackend),
+    HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCpuCycles),
+    // `cpu-clock` is a bit broken according to this: https://stackoverflow.com/a/56967896
+    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCpuClock),
+    SOFTWARE_EVENT(PERF_COUNT_SW_TASK_CLOCK, PerfTaskClock),
+    SOFTWARE_EVENT(PERF_COUNT_SW_CONTEXT_SWITCHES, PerfContextSwitches),
+    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations),
+    SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults),
+    SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults)
+};
+
+#undef HARDWARE_EVENT
+#undef SOFTWARE_EVENT
+
+// A map of event name -> event index, to parse event list in settings.
+static std::unordered_map<std::string, size_t> populateEventMap()
+{
+    std::unordered_map<std::string, size_t> name_to_index;
+    name_to_index.reserve(NUMBER_OF_RAW_EVENTS);
+
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        name_to_index.emplace(raw_events_info[i].settings_name, i);
+    }
+
+    return name_to_index;
+}
+
+static const auto event_name_to_index = populateEventMap();
+
+static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, UInt64 flags)
+{
+    return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
+}
+
+static int openPerfEventDisabled(Int32 perf_event_paranoid, bool has_cap_sys_admin, UInt32 perf_event_type, UInt64 perf_event_config)
+{
+    perf_event_attr pe{};
+    pe.type = perf_event_type;
+    pe.size = sizeof(struct perf_event_attr);
+    pe.config = perf_event_config;
+    // disable by default to add as little extra time as possible
+    pe.disabled = 1;
+    // can record kernel only when `perf_event_paranoid` <= 1 or have CAP_SYS_ADMIN
+    pe.exclude_kernel = perf_event_paranoid >= 2 && !has_cap_sys_admin;
+    pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+    return openPerfEvent(&pe, /* measure the calling thread */ 0, /* on any cpu */ -1, -1, 0);
+}
+
+static void enablePerfEvent(int event_fd)
+{
+    if (ioctl(event_fd, PERF_EVENT_IOC_ENABLE, 0))
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Can't enable perf event with file descriptor {}: '{}' ({})",
+            event_fd, strerror(errno), errno);
+    }
+}
+
+static void disablePerfEvent(int event_fd)
+{
+    if (ioctl(event_fd, PERF_EVENT_IOC_DISABLE, 0))
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Can't disable perf event with file descriptor {}: '{}' ({})",
+            event_fd, strerror(errno), errno);
+    }
+}
+
+static void releasePerfEvent(int event_fd)
+{
+    if (close(event_fd))
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Can't close perf event file descriptor {}: {} ({})",
+            event_fd, strerror(errno), errno);
+    }
+}
+
+static bool validatePerfEventDescriptor(int & fd)
+{
+    if (fcntl(fd, F_GETFL) != -1)
+        return true;
+
+    if (errno == EBADF)
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Event descriptor {} was closed from the outside; reopening", fd);
+    }
+    else
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Error while checking availability of event descriptor {}: {} ({})",
+            fd, strerror(errno), errno);
+
+        disablePerfEvent(fd);
+        releasePerfEvent(fd);
+    }
+
+    fd = -1;
+    return false;
+}
+
+bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_events_list)
+{
+    const auto valid_event_indices = eventIndicesFromString(needed_events_list);
+
+    // find state changes (if there are any)
+    bool old_state[NUMBER_OF_RAW_EVENTS];
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+        old_state[i] = thread_events_descriptors_holder.descriptors[i] != -1;
+
+    bool new_state[NUMBER_OF_RAW_EVENTS];
+    std::fill_n(new_state, NUMBER_OF_RAW_EVENTS, false);
+    for (size_t opened_index : valid_event_indices)
+        new_state[opened_index] = true;
+
+    std::vector<size_t> events_to_open;
+    std::vector<size_t> events_to_release;
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        bool old_one = old_state[i];
+        bool new_one = new_state[i];
+
+        if (old_one == new_one)
+        {
+            if (old_one
+                && !validatePerfEventDescriptor(
+                    thread_events_descriptors_holder.descriptors[i]))
+            {
+                events_to_open.push_back(i);
+            }
+            continue;
+        }
+
+        if (new_one)
+            events_to_open.push_back(i);
+        else
+            events_to_release.push_back(i);
+    }
+
+    // release unused descriptors
+    for (size_t i : events_to_release)
+    {
+        int & fd = thread_events_descriptors_holder.descriptors[i];
+        disablePerfEvent(fd);
+        releasePerfEvent(fd);
+        fd = -1;
+    }
+
+    if (events_to_open.empty())
+    {
+        return true;
+    }
+
+    // check permissions
+    // cat /proc/sys/kernel/perf_event_paranoid
+    // -1: Allow use of (almost) all events by all users
+    // >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+    // >=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+    // >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+    // >=3: Disallow all event access by users without CAP_SYS_ADMIN
+    Int32 perf_event_paranoid = 0;
+    std::ifstream paranoid_file("/proc/sys/kernel/perf_event_paranoid");
+    paranoid_file >> perf_event_paranoid;
+
+    bool has_cap_sys_admin = hasLinuxCapability(CAP_SYS_ADMIN);
+    if (perf_event_paranoid >= 3 && !has_cap_sys_admin)
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Not enough permissions to record perf events: "
+            "perf_event_paranoid = {} and CAP_SYS_ADMIN = 0",
+            perf_event_paranoid);
+        return false;
+    }
+
+    // Open descriptors for new events.
+    // Theoretically, we can run out of file descriptors. Threads go up to 10k,
+    // and there might be a dozen perf events per thread, so we're looking at
+    // 100k open files. In practice, this is not likely -- perf events are
+    // mostly used in performance tests or other kinds of testing, and the
+    // number of threads stays below hundred.
+    // We used to check the number of open files by enumerating /proc/self/fd,
+    // but listing all open files before opening more files is obviously
+    // quadratic, and quadraticity never ends well.
+    for (size_t i : events_to_open)
+    {
+        const PerfEventInfo & event_info = raw_events_info[i];
+        int & fd = thread_events_descriptors_holder.descriptors[i];
+        // disable by default to add as little extra time as possible
+        fd = openPerfEventDisabled(perf_event_paranoid, has_cap_sys_admin, event_info.event_type, event_info.event_config);
+
+        if (fd == -1 && errno != ENOENT)
+        {
+            // ENOENT means that the event is not supported. Don't log it, because
+            // this is called for each thread and would be too verbose. Log other
+            // error codes because they might signify an error.
+            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+                "Failed to open perf event {} (event_type={}, event_config={}): "
+                "'{}' ({})", event_info.settings_name, event_info.event_type,
+                event_info.event_config, strerror(errno), errno);
+        }
+    }
+
+    return true;
+}
+
+// Parse comma-separated list of event names. Empty means all available
+// events.
+std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string & events_list)
+{
+    std::vector<size_t> result;
+    result.reserve(NUMBER_OF_RAW_EVENTS);
+
+    if (events_list.empty())
+    {
+        for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+        {
+            result.push_back(i);
+        }
+        return result;
+    }
+
+    std::istringstream iss(events_list);
+    std::string event_name;
+    while (std::getline(iss, event_name, ','))
+    {
+        // Allow spaces at the beginning of the token, so that you can write
+        // 'a, b'.
+        event_name.erase(0, event_name.find_first_not_of(' '));
+
+        auto entry = event_name_to_index.find(event_name);
+        if (entry != event_name_to_index.end())
+        {
+            result.push_back(entry->second);
+        }
+        else
+        {
+            LOG_ERROR(&Poco::Logger::get("PerfEvents"),
+                "Unknown perf event name '{}' specified in settings", event_name);
+        }
+    }
+
+    return result;
+}
+
+void PerfEventsCounters::initializeProfileEvents(const std::string & events_list)
+{
+    if (!processThreadLocalChanges(events_list))
+        return;
+
+    for (int fd : thread_events_descriptors_holder.descriptors)
+    {
+        if (fd == -1)
+            continue;
+
+        // We don't reset the event, because the time_running and time_enabled
+        // can't be reset anyway and we have to calculate deltas.
+        enablePerfEvent(fd);
+    }
+}
+
+void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile_events)
+{
+    // Disable all perf events.
+    for (auto fd : thread_events_descriptors_holder.descriptors)
+    {
+        if (fd == -1)
+            continue;
+        disablePerfEvent(fd);
+    }
+
+    // Read the counter values.
+    PerfEventValue current_values[NUMBER_OF_RAW_EVENTS];
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        int fd = thread_events_descriptors_holder.descriptors[i];
+        if (fd == -1)
+            continue;
+
+        constexpr ssize_t bytes_to_read = sizeof(current_values[0]);
+        const int bytes_read = read(fd, &current_values[i], bytes_to_read);
+
+        if (bytes_read != bytes_to_read)
+        {
+            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+                "Can't read event value from file descriptor {}: '{}' ({})",
+                fd, strerror(errno), errno);
+            current_values[i] = {};
+        }
+    }
+
+    // actually process counters' values
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        int fd = thread_events_descriptors_holder.descriptors[i];
+        if (fd == -1)
+            continue;
+
+        const PerfEventInfo & info = raw_events_info[i];
+        const PerfEventValue & previous_value = previous_values[i];
+        const PerfEventValue & current_value = current_values[i];
+
+        // Account for counter multiplexing. time_running and time_enabled are
+        // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate
+        // deltas from old values.
+        const UInt64 delta = (current_value.value - previous_value.value)
+            * (current_value.time_enabled - previous_value.time_enabled)
+            / std::max(1.f,
+                float(current_value.time_running - previous_value.time_running));
+
+        profile_events.increment(info.profile_event, delta);
+    }
+
+    // Store current counter values for the next profiling period.
+    memcpy(previous_values, current_values, sizeof(current_values));
+}
+
+void PerfEventsCounters::closeEventDescriptors()
+{
+    thread_events_descriptors_holder.releaseResources();
+}
+
+PerfDescriptorsHolder::PerfDescriptorsHolder()
+{
+    for (int & descriptor : descriptors)
+        descriptor = -1;
+}
+
+PerfDescriptorsHolder::~PerfDescriptorsHolder()
+{
+    releaseResources();
+}
+
+void PerfDescriptorsHolder::releaseResources()
+{
+    for (int & descriptor : descriptors)
+    {
+        if (descriptor == -1)
+            continue;
+
+        disablePerfEvent(descriptor);
+        releasePerfEvent(descriptor);
+        descriptor = -1;
+    }
+}
+
+}
+
+#else
+
+namespace DB
+{
+
+// Not on Linux or in Arcadia: the functionality is disabled.
+PerfEventsCounters current_thread_counters;
+
 }

 #endif
--- a/src/Common/ThreadProfileEvents.h
+++ b/src/Common/ThreadProfileEvents.h
@ -5,6 +5,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <pthread.h>
+#include <common/logger_useful.h>


 #if defined(__linux__)
@ -34,6 +35,24 @@ namespace ProfileEvents
    extern const Event OSWriteChars;
    extern const Event OSReadBytes;
    extern const Event OSWriteBytes;
+
+    extern const Event PerfCpuCycles;
+    extern const Event PerfInstructions;
+    extern const Event PerfCacheReferences;
+    extern const Event PerfCacheMisses;
+    extern const Event PerfBranchInstructions;
+    extern const Event PerfBranchMisses;
+    extern const Event PerfBusCycles;
+    extern const Event PerfStalledCyclesFrontend;
+    extern const Event PerfStalledCyclesBackend;
+    extern const Event PerfRefCpuCycles;
+
+    extern const Event PerfCpuClock;
+    extern const Event PerfTaskClock;
+    extern const Event PerfContextSwitches;
+    extern const Event PerfCpuMigrations;
+    extern const Event PerfAlignmentFaults;
+    extern const Event PerfEmulationFaults;
 #endif
 }

@ -116,6 +135,78 @@ struct RUsageCounters
    }
 };

+// thread_local is disabled in Arcadia, so we have to use a dummy implementation
+// there.
+#if defined(__linux__) && !defined(ARCADIA_BUILD)
+
+struct PerfEventInfo
+{
+    // see perf_event.h/perf_type_id enum
+    int event_type;
+    // see configs in perf_event.h
+    int event_config;
+    ProfileEvents::Event profile_event;
+    std::string settings_name;
+};
+
+struct PerfEventValue
+{
+    UInt64 value = 0;
+    UInt64 time_enabled = 0;
+    UInt64 time_running = 0;
+};
+
+static constexpr size_t NUMBER_OF_RAW_EVENTS = 16;
+
+struct PerfDescriptorsHolder : boost::noncopyable
+{
+    int descriptors[NUMBER_OF_RAW_EVENTS]{};
+
+    PerfDescriptorsHolder();
+
+    ~PerfDescriptorsHolder();
+
+    void releaseResources();
+};
+
+struct PerfEventsCounters
+{
+    PerfDescriptorsHolder thread_events_descriptors_holder;
+
+    // time_enabled and time_running can't be reset, so we have to store the
+    // data from the previous profiling period and calculate deltas to them,
+    // to be able to properly account for counter multiplexing.
+    PerfEventValue previous_values[NUMBER_OF_RAW_EVENTS]{};
+
+
+    void initializeProfileEvents(const std::string & events_list);
+    void finalizeProfileEvents(ProfileEvents::Counters & profile_events);
+    void closeEventDescriptors();
+    bool processThreadLocalChanges(const std::string & needed_events_list);
+
+
+    static std::vector<size_t> eventIndicesFromString(const std::string & events_list);
+};
+
+// Perf event creation is moderately heavy, so we create them once per thread and
+// then reuse.
+extern thread_local PerfEventsCounters current_thread_counters;
+
+#else
+
+// Not on Linux, or in Arcadia: the functionality is disabled.
+struct PerfEventsCounters
+{
+    void initializeProfileEvents(const std::string & /* events_list */) {}
+    void finalizeProfileEvents(ProfileEvents::Counters & /* profile_events */) {}
+    void closeEventDescriptors() {}
+};
+
+// thread_local is disabled in Arcadia, so we are going to use a static dummy.
+extern PerfEventsCounters current_thread_counters;
+
+#endif
+
 #if defined(__linux__)

 class TasksStatsCounters
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@ -1,6 +1,5 @@
 #include <sstream>

-#include <Common/CurrentThread.h>
 #include <Common/Exception.h>
 #include <Common/ThreadProfileEvents.h>
 #include <Common/QueryProfiler.h>
@ -57,36 +56,6 @@ ThreadStatus::~ThreadStatus()
    current_thread = nullptr;
 }

-void ThreadStatus::initPerformanceCounters()
-{
-    performance_counters_finalized = false;
-
-    /// Clear stats from previous query if a new query is started
-    /// TODO: make separate query_thread_performance_counters and thread_performance_counters
-    performance_counters.resetCounters();
-    memory_tracker.resetCounters();
-    memory_tracker.setDescription("(for thread)");
-
-    query_start_time_nanoseconds = getCurrentTimeNanoseconds();
-    query_start_time = time(nullptr);
-    ++queries_started;
-
-    *last_rusage = RUsageCounters::current(query_start_time_nanoseconds);
-    if (!taskstats)
-    {
-        try
-        {
-            taskstats = TasksStatsCounters::create(thread_id);
-        }
-        catch (...)
-        {
-            tryLogCurrentException(log);
-        }
-    }
-    if (taskstats)
-        taskstats->reset();
-}
-
 void ThreadStatus::updatePerformanceCounters()
 {
    try
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@ -33,6 +33,7 @@ class QueryProfilerCpu;
 class QueryThreadLog;
 class TasksStatsCounters;
 struct RUsageCounters;
+struct PerfEventsCounters;
 class TaskStatsInfoGetter;
 class InternalTextLogsQueue;
 using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>;
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@ -238,7 +238,6 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
 {
    static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
    using UnsignedDeltaType = ValueType;
-    using SignedDeltaType = typename std::make_signed<UnsignedDeltaType>::type;

    const char * source_end = source + source_size;

@ -287,12 +286,13 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
        if (write_spec.data_bits != 0)
        {
            const UInt8 sign = reader.readBit();
-            SignedDeltaType signed_dd = static_cast<SignedDeltaType>(reader.readBits(write_spec.data_bits - 1) + 1);
+            double_delta = reader.readBits(write_spec.data_bits - 1) + 1;
            if (sign)
            {
-                signed_dd *= -1;
+                /// It's well defined for unsigned data types.
+                /// In constrast, it's undefined to do negation of the most negative signed number due to overflow.
+                double_delta = -double_delta;
            }
-            double_delta = static_cast<UnsignedDeltaType>(signed_dd);
        }

        const UnsignedDeltaType delta = double_delta + prev_delta;
--- a/src/Compression/ICompressionCodec.cpp
+++ b/src/Compression/ICompressionCodec.cpp
@ -21,6 +21,8 @@ namespace ErrorCodes

 UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const
 {
+    assert(source != nullptr && dest != nullptr);
+
    dest[0] = getMethodByte();
    UInt8 header_size = getHeaderSize();
    /// Write data from header_size
@ -33,8 +35,9 @@ UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char

 UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, char * dest) const
 {
-    UInt8 header_size = getHeaderSize();
+    assert(source != nullptr && dest != nullptr);

+    UInt8 header_size = getHeaderSize();
    if (source_size < header_size)
        throw Exception("Can't decompress data: the compressed data size (" + toString(source_size)
            + ", this should include header size) is less than the header size (" + toString(header_size) + ")", ErrorCodes::CORRUPTED_DATA);
--- a/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/src/Compression/tests/gtest_compressionCodec.cpp
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -28,6 +28,7 @@ class IColumn;


 /** Settings of query execution.
+  * These settings go to users.xml.
  */
 struct Settings : public SettingsCollection<Settings>
 {
@ -45,7 +46,7 @@ struct Settings : public SettingsCollection<Settings>
      * A setting is "IMPORTANT" if it affects the results of queries and can't be ignored by older versions.
      */

-#define LIST_OF_SETTINGS(M)                                            \
+#define COMMON_SETTINGS(M)                                            \
    M(SettingUInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
    M(SettingUInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
    M(SettingUInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \
@ -184,40 +185,10 @@ struct Settings : public SettingsCollection<Settings>
    \
    M(SettingString, count_distinct_implementation, "uniqExact", "What aggregate function to use for implementation of count(DISTINCT ...)", 0) \
    \
-    M(SettingBool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
-    M(SettingBool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \
-    \
    M(SettingBool, add_http_cors_header, false, "Write add http CORS header.", 0) \
    \
    M(SettingUInt64, max_http_get_redirects, 0, "Max number of http GET redirects hops allowed. Make sure additional security measures are in place to prevent a malicious server to redirect your requests to unexpected services.", 0) \
    \
-    M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \
-    M(SettingBool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \
-    M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
-    M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \
-    M(SettingBool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
-    M(SettingBool, input_format_null_as_default, false, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
-    \
-    M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
-    M(SettingBool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
-    M(SettingBool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
-    M(SettingURI, format_avro_schema_registry_url, {}, "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
-    \
-    M(SettingBool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
-    \
-    M(SettingBool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
-    \
-    M(SettingBool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
-    \
-    M(SettingUInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
-    M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
-    M(SettingUInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
-    M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \
-    M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \
-    M(SettingString, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
-    M(SettingUInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
-    M(SettingBool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
-    \
    M(SettingBool, use_client_time_zone, false, "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.", 0) \
    \
    M(SettingBool, send_progress_in_http_headers, false, "Send progress notifications using X-ClickHouse-Progress headers. Some clients do not support high amount of HTTP headers (Python requests in particular), so it is disabled by default.", 0) \
@ -226,9 +197,6 @@ struct Settings : public SettingsCollection<Settings>
    \
    M(SettingBool, fsync_metadata, 1, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
    \
-    M(SettingUInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
-    M(SettingFloat, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
-    \
    M(SettingBool, join_use_nulls, 0, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
    \
    M(SettingJoinStrictness, join_default_strictness, JoinStrictness::ALL, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
@ -246,23 +214,6 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingMilliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \
    M(SettingMilliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
    \
-    M(SettingString, format_schema, "", "Schema identifier (used by schema-based formats)", 0) \
-    M(SettingString, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
-    M(SettingString, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
-    M(SettingString, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
-    \
-    M(SettingString, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_field_delimiter, "\t", "Delimiter between fields (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_row_before_delimiter, "", "Delimiter before field of the first column (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_row_after_delimiter, "\n", "Delimiter after field of the last column (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_row_between_delimiter, "", "Delimiter between rows (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_result_before_delimiter, "", "Prefix before result set (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \
-    \
-    M(SettingString, format_regexp, "", "Regular expression (for Regexp format)", 0) \
-    M(SettingString, format_regexp_escaping_rule, "Escaped", "Field escaping rule (for Regexp format)", 0) \
-    M(SettingBool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \
-    \
    M(SettingBool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
    M(SettingSeconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
    M(SettingSeconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
@ -276,6 +227,8 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingUInt64, odbc_max_field_size, 1024, "Max size of filed can be read from ODBC dictionary. Long strings are truncated.", 0) \
    M(SettingUInt64, query_profiler_real_time_period_ns, 1000000000, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
    M(SettingUInt64, query_profiler_cpu_time_period_ns, 1000000000, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
+    M(SettingBool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \
+    M(SettingString, metrics_perf_events_list, "", "Comma separated list of perf metrics that will be measured throughout queries' execution. Empty means all events. See PerfEventInfo in sources for the available events.", 0) \
    \
    \
    /** Limits during query execution are part of the settings. \
@ -358,13 +311,7 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.", 0) \
    M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.", 0)\
    M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.", 0) \
-    M(SettingChar, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
-    M(SettingBool, format_csv_allow_single_quotes, 1, "If it is set to true, allow strings in single quotes.", 0) \
-    M(SettingBool, format_csv_allow_double_quotes, 1, "If it is set to true, allow strings in double quotes.", 0) \
-    M(SettingBool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
-    M(SettingBool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
    \
-    M(SettingDateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
    M(SettingBool, log_profile_events, true, "Log query performance statistics into the query_log and query_thread_log.", 0) \
    M(SettingBool, log_query_settings, true, "Log query settings into the query_log.", 0) \
    M(SettingBool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \
@ -385,6 +332,7 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingBool, enable_debug_queries, false, "Enables debug queries such as AST.", 0) \
    M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \
    M(SettingBool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \
+    M(SettingBool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \
    M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \
    M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \
    M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql' and 'odbc' table functions.", 0) \
@ -411,7 +359,7 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingBool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
    M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
    M(SettingUInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
-    M(SettingBool, optimize_arithmetic_operations_in_agr_func, true, "Removing arithmetic operations from aggregation functions", 0) \
+    M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
    M(SettingBool, optimize_if_chain_to_miltiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
    M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
    M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
@ -441,7 +389,70 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
    M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \
    M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
-    M(SettingBool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
+    \
+    M(SettingBool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0)
+
+#define FORMAT_FACTORY_SETTINGS(M)                                            \
+    M(SettingChar, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
+    M(SettingBool, format_csv_allow_single_quotes, 1, "If it is set to true, allow strings in single quotes.", 0) \
+    M(SettingBool, format_csv_allow_double_quotes, 1, "If it is set to true, allow strings in double quotes.", 0) \
+    M(SettingBool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
+    M(SettingBool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
+    M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \
+    M(SettingBool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \
+    M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
+    M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \
+    M(SettingBool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
+    M(SettingBool, input_format_null_as_default, false, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
+    \
+    M(SettingDateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
+    \
+    M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
+    M(SettingBool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
+    M(SettingBool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
+    M(SettingURI, format_avro_schema_registry_url, {}, "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
+    \
+    M(SettingBool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
+    \
+    M(SettingBool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
+    \
+    M(SettingBool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
+    \
+    M(SettingUInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
+    M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
+    M(SettingUInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
+    M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \
+    M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \
+    M(SettingString, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
+    M(SettingUInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
+    M(SettingBool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
+    \
+    M(SettingUInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
+    M(SettingFloat, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
+    \
+    M(SettingString, format_schema, "", "Schema identifier (used by schema-based formats)", 0) \
+    M(SettingString, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
+    M(SettingString, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
+    M(SettingString, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
+    \
+    M(SettingString, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_field_delimiter, "\t", "Delimiter between fields (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_row_before_delimiter, "", "Delimiter before field of the first column (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_row_after_delimiter, "\n", "Delimiter after field of the last column (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_row_between_delimiter, "", "Delimiter between rows (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_result_before_delimiter, "", "Prefix before result set (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \
+    \
+    M(SettingString, format_regexp, "", "Regular expression (for Regexp format)", 0) \
+    M(SettingString, format_regexp_escaping_rule, "Escaped", "Field escaping rule (for Regexp format)", 0) \
+    M(SettingBool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \
+    \
+    M(SettingBool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
+    M(SettingBool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0)
+
+    #define LIST_OF_SETTINGS(M)    \
+        COMMON_SETTINGS(M)         \
+        FORMAT_FACTORY_SETTINGS(M)

    DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)

--- a/src/Core/SortCursor.h
+++ b/src/Core/SortCursor.h
@ -63,7 +63,7 @@ struct SortCursorImpl
        for (auto & column_desc : desc)
        {
            if (!column_desc.column_name.empty())
-                throw Exception("SortDesctiption should contain column position if SortCursor was used without header.",
+                throw Exception("SortDescription should contain column position if SortCursor was used without header.",
                        ErrorCodes::LOGICAL_ERROR);
        }
        reset(columns, {});
--- a/src/Core/SortDescription.h
+++ b/src/Core/SortDescription.h
@ -59,6 +59,13 @@ struct SortColumnDescription
    {
        return !(*this == other);
    }
+
+    std::string dump() const
+    {
+        std::stringstream ss;
+        ss << column_name << ":" << column_number << ":dir " << direction << "nulls " << nulls_direction;
+        return ss.str();
+    }
 };

 /// Description of the sorting rule for several columns.
--- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
+++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
@ -1,4 +1,3 @@
-#include <DataStreams/ExpressionBlockInputStream.h>
 #include <DataStreams/CheckConstraintsBlockOutputStream.h>
 #include <Parsers/formatAST.h>
 #include <Interpreters/ExpressionActions.h>
--- a/src/DataStreams/tests/CMakeLists.txt
+++ b/src/DataStreams/tests/CMakeLists.txt
@ -1,13 +1,4 @@
 set(SRCS)

-add_executable (expression_stream expression_stream.cpp ${SRCS})
-target_link_libraries (expression_stream PRIVATE dbms clickhouse_storages_system clickhouse_parsers)
-
-add_executable (filter_stream filter_stream.cpp ${SRCS})
-target_link_libraries (filter_stream PRIVATE dbms clickhouse_storages_system clickhouse_parsers clickhouse_common_io)
-
-add_executable (union_stream2 union_stream2.cpp ${SRCS})
-target_link_libraries (union_stream2 PRIVATE dbms)
-
 add_executable (finish_sorting_stream finish_sorting_stream.cpp ${SRCS})
 target_link_libraries (finish_sorting_stream PRIVATE dbms)
--- a/src/DataStreams/tests/expression_stream.cpp
+++ b/src/DataStreams/tests/expression_stream.cpp
@ -1,86 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/ReadHelpers.h>
-
-#include <Storages/System/StorageSystemNumbers.h>
-
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/ExpressionBlockInputStream.h>
-#include <Formats/FormatFactory.h>
-#include <DataStreams/copyData.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Parsers/ParserSelectQuery.h>
-#include <Parsers/parseQuery.h>
-
-#include <Interpreters/SyntaxAnalyzer.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/Context.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-
-int main(int argc, char ** argv)
-try
-{
-    using namespace DB;
-
-    size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL;
-
-    std::string input = "SELECT number, number / 3, number * number";
-
-    ParserSelectQuery parser;
-    ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    NamesAndTypesList source_columns = {{"number", std::make_shared<DataTypeUInt64>()}};
-    auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns);
-    SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context);
-    ExpressionActionsChain chain(context);
-    analyzer.appendSelect(chain, false);
-    analyzer.appendProjectResult(chain);
-    chain.finalize();
-    ExpressionActionsPtr expression = chain.getLastActions();
-
-    StoragePtr table = StorageSystemNumbers::create(StorageID("test", "numbers"), false);
-
-    Names column_names;
-    column_names.push_back("number");
-
-    QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-
-    BlockInputStreamPtr in;
-    in = std::make_shared<TreeExecutorBlockInputStream>(std::move(table->read(column_names, {}, context, stage, 8192, 1)[0]));
-    in = std::make_shared<ExpressionBlockInputStream>(in, expression);
-    in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));
-
-    WriteBufferFromOStream out1(std::cout);
-    BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out1, expression->getSampleBlock(), context);
-
-    {
-        Stopwatch stopwatch;
-        stopwatch.start();
-
-        copyData(*in, *out);
-
-        stopwatch.stop();
-        std::cout << std::fixed << std::setprecision(2)
-            << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-            << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-            << std::endl;
-    }
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    throw;
-}
-
--- a/src/DataStreams/tests/filter_stream.cpp
+++ b/src/DataStreams/tests/filter_stream.cpp
@ -1,89 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/ReadHelpers.h>
-
-#include <Storages/System/StorageSystemNumbers.h>
-
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/ExpressionBlockInputStream.h>
-#include <DataStreams/FilterBlockInputStream.h>
-#include <Formats/FormatFactory.h>
-#include <DataStreams/copyData.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Parsers/ParserSelectQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-
-#include <Interpreters/SyntaxAnalyzer.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/Context.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-
-int main(int argc, char ** argv)
-try
-{
-    using namespace DB;
-
-    size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL;
-
-    std::string input = "SELECT number, number % 3 == 1";
-
-    ParserSelectQuery parser;
-    ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-    formatAST(*ast, std::cerr);
-    std::cerr << std::endl;
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    NamesAndTypesList source_columns = {{"number", std::make_shared<DataTypeUInt64>()}};
-    auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns);
-    SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context);
-    ExpressionActionsChain chain(context);
-    analyzer.appendSelect(chain, false);
-    analyzer.appendProjectResult(chain);
-    chain.finalize();
-    ExpressionActionsPtr expression = chain.getLastActions();
-
-    StoragePtr table = StorageSystemNumbers::create(StorageID("test", "numbers"), false);
-
-    Names column_names;
-    column_names.push_back("number");
-
-    QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-
-    BlockInputStreamPtr in = std::make_shared<TreeExecutorBlockInputStream>(std::move(table->read(column_names, {}, context, stage, 8192, 1)[0]));
-    in = std::make_shared<FilterBlockInputStream>(in, expression, "equals(modulo(number, 3), 1)");
-    in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));
-
-    WriteBufferFromOStream ob(std::cout);
-    BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", ob, expression->getSampleBlock(), context);
-
-    {
-        Stopwatch stopwatch;
-        stopwatch.start();
-
-        copyData(*in, *out);
-
-        stopwatch.stop();
-        std::cout << std::fixed << std::setprecision(2)
-            << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-            << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-            << std::endl;
-    }
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    throw;
-}
--- a/src/DataStreams/tests/union_stream2.cpp
+++ b/src/DataStreams/tests/union_stream2.cpp
@ -1,66 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-
-#include <Storages/System/StorageSystemNumbers.h>
-
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/UnionBlockInputStream.h>
-#include <DataStreams/AsynchronousBlockInputStream.h>
-#include <DataStreams/IBlockOutputStream.h>
-#include <DataStreams/copyData.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/loadMetadata.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-
-using namespace DB;
-
-int main(int, char **)
-try
-{
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-    Settings settings = context.getSettings();
-
-    context.setPath("./");
-
-    loadMetadata(context);
-
-    Names column_names;
-    column_names.push_back("WatchID");
-
-    StoragePtr table = DatabaseCatalog::instance().getTable({"default", "hits6"}, context);
-
-    QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-    auto pipes = table->read(column_names, {}, context, stage, settings.max_block_size, settings.max_threads);
-
-    BlockInputStreams streams(pipes.size());
-
-    for (size_t i = 0, size = streams.size(); i < size; ++i)
-        streams[i] = std::make_shared<AsynchronousBlockInputStream>(std::make_shared<TreeExecutorBlockInputStream>(std::move(pipes[i])));
-
-    BlockInputStreamPtr stream = std::make_shared<UnionBlockInputStream>(streams, nullptr, settings.max_threads);
-    stream = std::make_shared<LimitBlockInputStream>(stream, 10, 0);
-
-    WriteBufferFromFileDescriptor wb(STDERR_FILENO);
-    Block sample = table->getSampleBlock();
-    BlockOutputStreamPtr out = context.getOutputFormat("TabSeparated", wb, sample);
-
-    copyData(*stream, *out);
-
-    return 0;
-}
-catch (const Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl
-        << std::endl
-        << "Stack trace:" << std::endl
-        << e.getStackTraceString();
-    return 1;
-}
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@ -14,6 +14,8 @@
 #include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>

 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Parsers/ASTFunction.h>
@ -36,25 +38,25 @@ namespace ErrorCodes

 std::string DataTypeAggregateFunction::doGetName() const
 {
-    std::stringstream stream;
+    WriteBufferFromOwnString stream;
    stream << "AggregateFunction(" << function->getName();

    if (!parameters.empty())
    {
-        stream << "(";
+        stream << '(';
        for (size_t i = 0; i < parameters.size(); ++i)
        {
            if (i)
                stream << ", ";
            stream << applyVisitor(DB::FieldVisitorToString(), parameters[i]);
        }
-        stream << ")";
+        stream << ')';
    }

    for (const auto & argument_type : argument_types)
        stream << ", " << argument_type->getName();

-    stream << ")";
+    stream << ')';
    return stream.str();
 }

--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@ -358,6 +358,9 @@ bool PointInPolygonWithGrid<CoordinateType>::contains(CoordinateType x, Coordina
    if (has_empty_bound)
        return false;

+    if (std::isnan(x) || std::isnan(y))
+        return false;
+
    CoordinateType float_row = (y + y_shift) * y_scale;
    CoordinateType float_col = (x + x_shift) * x_scale;

--- a/src/Functions/bitBoolMaskAnd.cpp
+++ b/src/Functions/bitBoolMaskAnd.cpp
@ -7,7 +7,7 @@ namespace DB
 {
    namespace ErrorCodes
    {
-        extern const int LOGICAL_ERROR;
+        extern const int BAD_ARGUMENTS;
    }

    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@ -23,8 +23,10 @@ namespace DB
        template <typename Result = ResultType>
        static inline Result apply(A left, B right)
        {
+            // Should be a logical error, but this function is callable from SQL.
+            // Need to investigate this.
            if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd.", ErrorCodes::LOGICAL_ERROR);
+                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd.", ErrorCodes::BAD_ARGUMENTS);
            return static_cast<ResultType>(
                    ((static_cast<ResultType>(left) & static_cast<ResultType>(right)) & 1)
                    | ((((static_cast<ResultType>(left) >> 1) | (static_cast<ResultType>(right) >> 1)) & 1) << 1));
--- a/src/Functions/bitBoolMaskOr.cpp
+++ b/src/Functions/bitBoolMaskOr.cpp
@ -7,7 +7,7 @@ namespace DB
 {
    namespace ErrorCodes
    {
-        extern const int LOGICAL_ERROR;
+        extern const int BAD_ARGUMENTS;
    }

    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@ -24,7 +24,9 @@ namespace DB
        static inline Result apply(A left, B right)
        {
            if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskOr.", ErrorCodes::LOGICAL_ERROR);
+                // Should be a logical error, but this function is callable from SQL.
+                // Need to investigate this.
+                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskOr.", ErrorCodes::BAD_ARGUMENTS);
            return static_cast<ResultType>(
                    ((static_cast<ResultType>(left) | static_cast<ResultType>(right)) & 1)
                    | ((((static_cast<ResultType>(left) >> 1) & (static_cast<ResultType>(right) >> 1)) & 1) << 1));
--- a/src/Functions/bitSwapLastTwo.cpp
+++ b/src/Functions/bitSwapLastTwo.cpp
@ -7,6 +7,7 @@ namespace DB
    namespace ErrorCodes
    {
        extern const int LOGICAL_ERROR;
+        extern const int BAD_ARGUMENTS;
    }

    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@ -20,7 +21,9 @@ namespace DB
        static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
        {
            if constexpr (!std::is_same_v<A, ResultType>)
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitSwapLastTwo.", ErrorCodes::LOGICAL_ERROR);
+                // Should be a logical error, but this function is callable from SQL.
+                // Need to investigate this.
+                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitSwapLastTwo.", ErrorCodes::BAD_ARGUMENTS);
            return static_cast<ResultType>(
                    ((static_cast<ResultType>(a) & 1) << 1) | ((static_cast<ResultType>(a) >> 1) & 1));
        }
--- a/src/Functions/bitWrapperFunc.cpp
+++ b/src/Functions/bitWrapperFunc.cpp
@ -6,7 +6,7 @@ namespace DB
 {
    namespace ErrorCodes
    {
-        extern const int LOGICAL_ERROR;
+        extern const int BAD_ARGUMENTS;
    }

    /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@ -20,8 +20,10 @@ namespace DB

        static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
        {
+            // Should be a logical error, but this function is callable from SQL.
+            // Need to investigate this.
            if constexpr (!is_integral_v<A>)
-                throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::LOGICAL_ERROR);
+                throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::BAD_ARGUMENTS);
            return a == 0 ? static_cast<ResultType>(0b10) : static_cast<ResultType >(0b1);
        }

--- a/src/IO/tests/gtest_s3_uri.cpp
+++ b/src/IO/tests/gtest_s3_uri.cpp
@ -20,18 +20,42 @@ TEST(S3UriTest, validPatterns)
        ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
        ASSERT_EQ("jokserfn", uri.bucket);
        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(true, uri.is_virtual_hosted_style);
    }
    {
        S3::URI uri(Poco::URI("https://storage.yandexcloud.net/jokserfn/data"));
        ASSERT_EQ("https://storage.yandexcloud.net", uri.endpoint);
        ASSERT_EQ("jokserfn", uri.bucket);
        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(false, uri.is_virtual_hosted_style);
+    }
+    {
+        S3::URI uri(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data"));
+        ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint);
+        ASSERT_EQ("bucketname", uri.bucket);
+        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(true, uri.is_virtual_hosted_style);
    }
    {
        S3::URI uri(Poco::URI("https://s3.us-east-2.amazonaws.com/bucketname/data"));
        ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint);
        ASSERT_EQ("bucketname", uri.bucket);
        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(false, uri.is_virtual_hosted_style);
+    }
+    {
+        S3::URI uri(Poco::URI("https://bucketname.s3-us-east-2.amazonaws.com/data"));
+        ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint);
+        ASSERT_EQ("bucketname", uri.bucket);
+        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(true, uri.is_virtual_hosted_style);
+    }
+    {
+        S3::URI uri(Poco::URI("https://s3-us-east-2.amazonaws.com/bucketname/data"));
+        ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint);
+        ASSERT_EQ("bucketname", uri.bucket);
+        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(false, uri.is_virtual_hosted_style);
    }
 }

--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@ -530,63 +530,33 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl(
 }


-bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & result,
-    ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl(
+        AggregatedDataWithoutKey & res,
+        size_t row_begin,
+        size_t row_end,
+        AggregateFunctionInstruction * aggregate_instructions,
+        Arena * arena)
 {
-    UInt64 num_rows = block.rows();
-    return executeOnBlock(block.getColumns(), num_rows, result, key_columns, aggregate_columns, no_more_keys);
+    /// Adding values
+    for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
+    {
+        if (inst->offsets)
+            inst->batch_that->addBatchSinglePlaceFromInterval(inst->offsets[row_begin], inst->offsets[row_end - 1], res + inst->state_offset, inst->batch_arguments, arena);
+        else
+            inst->batch_that->addBatchSinglePlaceFromInterval(row_begin, row_end, res + inst->state_offset, inst->batch_arguments, arena);
+    }
 }

-bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result,
-    ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+
+void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns & aggregate_columns, Columns & materialized_columns,
+                                              AggregateFunctionInstructions & aggregate_functions_instructions, NestedColumnsHolder & nested_columns_holder)
 {
-    if (isCancelled())
-        return true;
-
-    /// `result` will destroy the states of aggregate functions in the destructor
-    result.aggregator = this;
-
-    /// How to perform the aggregation?
-    if (result.empty())
-    {
-        result.init(method_chosen);
-        result.keys_size = params.keys_size;
-        result.key_sizes = key_sizes;
-        LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
-    }
-
-    if (isCancelled())
-        return true;
-
    for (size_t i = 0; i < params.aggregates_size; ++i)
        aggregate_columns[i].resize(params.aggregates[i].arguments.size());

-    /** Constant columns are not supported directly during aggregation.
-      * To make them work anyway, we materialize them.
-      */
-    Columns materialized_columns;
-
-    /// Remember the columns we will work with
-    for (size_t i = 0; i < params.keys_size; ++i)
-    {
-        materialized_columns.push_back(columns.at(params.keys[i])->convertToFullColumnIfConst());
-        key_columns[i] = materialized_columns.back().get();
-
-        if (!result.isLowCardinality())
-        {
-            auto column_no_lc = recursiveRemoveLowCardinality(key_columns[i]->getPtr());
-            if (column_no_lc.get() != key_columns[i])
-            {
-                materialized_columns.emplace_back(std::move(column_no_lc));
-                key_columns[i] = materialized_columns.back().get();
-            }
-        }
-    }
-
-    AggregateFunctionInstructions aggregate_functions_instructions(params.aggregates_size + 1);
+    aggregate_functions_instructions.resize(params.aggregates_size + 1);
    aggregate_functions_instructions[params.aggregates_size].that = nullptr;

-    std::vector<std::vector<const IColumn *>> nested_columns_holder;
    for (size_t i = 0; i < params.aggregates_size; ++i)
    {
        for (size_t j = 0; j < aggregate_columns[i].size(); ++j)
@ -627,6 +597,62 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData

        aggregate_functions_instructions[i].batch_that = that;
    }
+}
+
+
+bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & result,
+                                ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+{
+    UInt64 num_rows = block.rows();
+    return executeOnBlock(block.getColumns(), num_rows, result, key_columns, aggregate_columns, no_more_keys);
+}
+
+
+bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result,
+    ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+{
+    if (isCancelled())
+        return true;
+
+    /// `result` will destroy the states of aggregate functions in the destructor
+    result.aggregator = this;
+
+    /// How to perform the aggregation?
+    if (result.empty())
+    {
+        result.init(method_chosen);
+        result.keys_size = params.keys_size;
+        result.key_sizes = key_sizes;
+        LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
+    }
+
+    if (isCancelled())
+        return true;
+
+    /** Constant columns are not supported directly during aggregation.
+      * To make them work anyway, we materialize them.
+      */
+    Columns materialized_columns;
+
+    /// Remember the columns we will work with
+    for (size_t i = 0; i < params.keys_size; ++i)
+    {
+        materialized_columns.push_back(columns.at(params.keys[i])->convertToFullColumnIfConst());
+        key_columns[i] = materialized_columns.back().get();
+
+        if (!result.isLowCardinality())
+        {
+            auto column_no_lc = recursiveRemoveLowCardinality(key_columns[i]->getPtr());
+            if (column_no_lc.get() != key_columns[i])
+            {
+                materialized_columns.emplace_back(std::move(column_no_lc));
+                key_columns[i] = materialized_columns.back().get();
+            }
+        }
+    }
+    NestedColumnsHolder nested_columns_holder;
+    AggregateFunctionInstructions aggregate_functions_instructions;
+    prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_functions_instructions, nested_columns_holder);

    if (isCancelled())
        return true;
@ -666,7 +692,8 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
        if (auto * memory_tracker = memory_tracker_child->getParent())
            current_memory_usage = memory_tracker->get();

-    auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;    /// Here all the results in the sum are taken into account, from different threads.
+    /// Here all the results in the sum are taken into account, from different threads.
+    auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;

    bool worth_convert_to_two_level
        = (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
@ -972,6 +999,73 @@ void Aggregator::convertToBlockImpl(
    data.clearAndShrink();
 }

+
+template <typename Mapped>
+inline void Aggregator::insertAggregatesIntoColumns(
+    Mapped & mapped,
+    MutableColumns & final_aggregate_columns) const
+{
+    /** Final values of aggregate functions are inserted to columns.
+      * Then states of aggregate functions, that are not longer needed, are destroyed.
+      *
+      * We mark already destroyed states with "nullptr" in data,
+      *  so they will not be destroyed in destructor of Aggregator
+      * (other values will be destroyed in destructor in case of exception).
+      *
+      * But it becomes tricky, because we have multiple aggregate states pointed by a single pointer in data.
+      * So, if exception is thrown in the middle of moving states for different aggregate functions,
+      *  we have to catch exceptions and destroy all the states that are no longer needed,
+      *  to keep the data in consistent state.
+      *
+      * It is also tricky, because there are aggregate functions with "-State" modifier.
+      * When we call "insertResultInto" for them, they insert a pointer to the state to ColumnAggregateFunction
+      *  and ColumnAggregateFunction will take ownership of this state.
+      * So, for aggregate functions with "-State" modifier, the state must not be destroyed
+      *  after it has been transferred to ColumnAggregateFunction.
+      * But we should mark that the data no longer owns these states.
+      */
+
+    size_t insert_i = 0;
+    std::exception_ptr exception;
+
+    try
+    {
+        /// Insert final values of aggregate functions into columns.
+        for (; insert_i < params.aggregates_size; ++insert_i)
+            aggregate_functions[insert_i]->insertResultInto(
+                mapped + offsets_of_aggregate_states[insert_i],
+                *final_aggregate_columns[insert_i]);
+    }
+    catch (...)
+    {
+        exception = std::current_exception();
+    }
+
+    /** Destroy states that are no longer needed. This loop does not throw.
+        *
+        * Don't destroy states for "-State" aggregate functions,
+        *  because the ownership of this state is transferred to ColumnAggregateFunction
+        *  and ColumnAggregateFunction will take care.
+        *
+        * But it's only for states that has been transferred to ColumnAggregateFunction
+        *  before exception has been thrown;
+        */
+    for (size_t destroy_i = 0; destroy_i < params.aggregates_size; ++destroy_i)
+    {
+        /// If ownership was not transferred to ColumnAggregateFunction.
+        if (!(destroy_i < insert_i && aggregate_functions[destroy_i]->isState()))
+            aggregate_functions[destroy_i]->destroy(
+                mapped + offsets_of_aggregate_states[destroy_i]);
+    }
+
+    /// Mark the cell as destroyed so it will not be destroyed in destructor.
+    mapped = nullptr;
+
+    if (exception)
+        std::rethrow_exception(exception);
+}
+
+
 template <typename Method, typename Table>
 void NO_INLINE Aggregator::convertToBlockImplFinal(
    Method & method,
@ -984,25 +1078,15 @@ void NO_INLINE Aggregator::convertToBlockImplFinal(
        if (data.hasNullKeyData())
        {
            key_columns[0]->insertDefault();
-
-            for (size_t i = 0; i < params.aggregates_size; ++i)
-                aggregate_functions[i]->insertResultInto(
-                    data.getNullKeyData() + offsets_of_aggregate_states[i],
-                    *final_aggregate_columns[i]);
+            insertAggregatesIntoColumns(data.getNullKeyData(), final_aggregate_columns);
        }
    }

    data.forEachValue([&](const auto & key, auto & mapped)
    {
        method.insertKeyIntoColumns(key, key_columns, key_sizes);
-
-        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_functions[i]->insertResultInto(
-                mapped + offsets_of_aggregate_states[i],
-                *final_aggregate_columns[i]);
+        insertAggregatesIntoColumns(mapped, final_aggregate_columns);
    });
-
-    destroyImpl<Method>(data);
 }

 template <typename Method, typename Table>
@ -1020,6 +1104,8 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal(

            for (size_t i = 0; i < params.aggregates_size; ++i)
                aggregate_columns[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]);
+
+            data.getNullKeyData() = nullptr;
        }
    }

@ -1112,7 +1198,39 @@ Block Aggregator::prepareBlockAndFill(

    return res;
 }
+void Aggregator::fillAggregateColumnsWithSingleKey(
+    AggregatedDataVariants & data_variants,
+    MutableColumns & final_aggregate_columns)
+{
+    AggregatedDataWithoutKey & data = data_variants.without_key;

+    for (size_t i = 0; i < params.aggregates_size; ++i)
+    {
+        ColumnAggregateFunction & column_aggregate_func = assert_cast<ColumnAggregateFunction &>(*final_aggregate_columns[i]);
+        for (auto & pool : data_variants.aggregates_pools)
+        {
+            column_aggregate_func.addArena(pool);
+        }
+        column_aggregate_func.getData().push_back(data + offsets_of_aggregate_states[i]);
+    }
+    data = nullptr;
+}
+
+void Aggregator::createStatesAndFillKeyColumnsWithSingleKey(
+    AggregatedDataVariants & data_variants,
+    Columns & key_columns,
+    size_t key_row,
+    MutableColumns & final_key_columns)
+{
+    AggregateDataPtr place = data_variants.aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+    createAggregateStates(place);
+    data_variants.without_key = place;
+
+    for (size_t i = 0; i < params.keys_size; ++i)
+    {
+        final_key_columns[i]->insertFrom(*key_columns[i].get(), key_row);
+    }
+}

 Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const
 {
@ -1128,16 +1246,16 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va
        {
            AggregatedDataWithoutKey & data = data_variants.without_key;

-            for (size_t i = 0; i < params.aggregates_size; ++i)
-            {
-                if (!final_)
-                    aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]);
-                else
-                    aggregate_functions[i]->insertResultInto(data + offsets_of_aggregate_states[i], *final_aggregate_columns[i]);
-            }
-
            if (!final_)
+            {
+                for (size_t i = 0; i < params.aggregates_size; ++i)
+                    aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]);
                data = nullptr;
+            }
+            else
+            {
+                insertAggregatesIntoColumns(data, final_aggregate_columns);
+            }

            if (params.overflow_row)
                for (size_t i = 0; i < params.keys_size; ++i)
@ -2328,8 +2446,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const
            return;

        for (size_t i = 0; i < params.aggregates_size; ++i)
-            if (!aggregate_functions[i]->isState())
-                aggregate_functions[i]->destroy(data + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(data + offsets_of_aggregate_states[i]);

        data = nullptr;
    });
@ -2343,8 +2460,7 @@ void Aggregator::destroyWithoutKey(AggregatedDataVariants & result) const
    if (nullptr != res_data)
    {
        for (size_t i = 0; i < params.aggregates_size; ++i)
-            if (!aggregate_functions[i]->isState())
-                aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]);

        res_data = nullptr;
    }
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@ -1002,6 +1002,7 @@ protected:
    friend class MergingAndConvertingBlockInputStream;
    friend class ConvertingAggregatedToChunksTransform;
    friend class ConvertingAggregatedToChunksSource;
+    friend class AggregatingInOrderTransform;

    Params params;

@ -1033,12 +1034,13 @@ protected:
    };

    using AggregateFunctionInstructions = std::vector<AggregateFunctionInstruction>;
+    using NestedColumnsHolder = std::vector<std::vector<const IColumn *>>;

    Sizes offsets_of_aggregate_states;    /// The offset to the n-th aggregate function in a row of aggregate functions.
    size_t total_size_of_aggregate_states = 0;    /// The total size of the row from the aggregate functions.

    // add info to track alignment requirement
-    // If there are states whose alignmentment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
+    // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
    size_t align_aggregate_states = 1;

    bool all_aggregates_has_trivial_destructor = false;
@ -1105,6 +1107,13 @@ protected:
        AggregateFunctionInstruction * aggregate_instructions,
        Arena * arena);

+    static void executeOnIntervalWithoutKeyImpl(
+        AggregatedDataWithoutKey & res,
+        size_t row_begin,
+        size_t row_end,
+        AggregateFunctionInstruction * aggregate_instructions,
+        Arena * arena);
+
    template <typename Method>
    void writeToTemporaryFileImpl(
        AggregatedDataVariants & data_variants,
@ -1157,6 +1166,11 @@ protected:
        MutableColumns & final_aggregate_columns,
        bool final) const;

+    template <typename Mapped>
+    void insertAggregatesIntoColumns(
+        Mapped & mapped,
+        MutableColumns & final_aggregate_columns) const;
+
    template <typename Method, typename Table>
    void convertToBlockImplFinal(
        Method & method,
@ -1250,6 +1264,22 @@ protected:
      * - sets the variable no_more_keys to true.
      */
    bool checkLimits(size_t result_size, bool & no_more_keys) const;
+
+    void prepareAggregateInstructions(
+        Columns columns,
+        AggregateColumns & aggregate_columns,
+        Columns & materialized_columns,
+        AggregateFunctionInstructions & instructions,
+        NestedColumnsHolder & nested_columns_holder);
+
+    void fillAggregateColumnsWithSingleKey(
+        AggregatedDataVariants & data_variants,
+        MutableColumns & final_aggregate_columns);
+
+    void createStatesAndFillKeyColumnsWithSingleKey(
+        AggregatedDataVariants & data_variants,
+        Columns & key_columns, size_t key_row,
+        MutableColumns & final_key_columns);
 };


--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@ -26,8 +26,8 @@ public:
    struct Data
    {
        TableJoin & analyzed_join;
-        const TableWithColumnNames & left_table;
-        const TableWithColumnNames & right_table;
+        const TableWithColumnNamesAndTypes & left_table;
+        const TableWithColumnNamesAndTypes & right_table;
        const Aliases & aliases;
        const bool is_asof{false};
        ASTPtr asof_left_key{};
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -822,7 +822,11 @@ const Block & Context::getScalar(const String & name) const
 {
    auto it = scalars.find(name);
    if (scalars.end() == it)
-        throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::LOGICAL_ERROR);
+    {
+        // This should be a logical error, but it fails the sql_fuzz test too
+        // often, so 'bad arguments' for now.
+        throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::BAD_ARGUMENTS);
+    }
    return it->second;
 }

--- a/src/Interpreters/DatabaseAndTableWithAlias.h
+++ b/src/Interpreters/DatabaseAndTableWithAlias.h
@ -45,34 +45,6 @@ struct DatabaseAndTableWithAlias
    }
 };

-struct TableWithColumnNames
-{
-    DatabaseAndTableWithAlias table;
-    Names columns;
-    Names hidden_columns; /// Not general columns like MATERIALIZED and ALIAS. They are omitted in * and t.* results.
-
-    TableWithColumnNames(const DatabaseAndTableWithAlias & table_, const Names & columns_)
-        : table(table_)
-        , columns(columns_)
-    {
-        columns_set.insert(columns.begin(), columns.end());
-    }
-
-    TableWithColumnNames(const DatabaseAndTableWithAlias table_, Names && columns_, Names && hidden_columns_)
-        : table(table_)
-        , columns(columns_)
-        , hidden_columns(hidden_columns_)
-    {
-        columns_set.insert(columns.begin(), columns.end());
-        columns_set.insert(hidden_columns.begin(), hidden_columns.end());
-    }
-
-    bool hasColumn(const String & name) const { return columns_set.count(name); }
-
-private:
-    NameSet columns_set;
-};
-
 struct TableWithColumnNamesAndTypes
 {
    DatabaseAndTableWithAlias table;
@ -96,21 +68,6 @@ struct TableWithColumnNamesAndTypes
            names.insert(col.name);
    }

-    TableWithColumnNames removeTypes() const
-    {
-        Names out_columns;
-        out_columns.reserve(columns.size());
-        for (auto & col : columns)
-            out_columns.push_back(col.name);
-
-        Names out_hidden_columns;
-        out_hidden_columns.reserve(hidden_columns.size());
-        for (auto & col : hidden_columns)
-            out_hidden_columns.push_back(col.name);
-
-        return TableWithColumnNames(table, std::move(out_columns), std::move(out_hidden_columns));
-    }
-
 private:
    NameSet names;
 };
@ -118,7 +75,6 @@ private:
 std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);
 std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);

-using TablesWithColumnNames = std::vector<TableWithColumnNames>;
-using TablesWithColumnNamesAndTypes = std::vector<TableWithColumnNames>;
+using TablesWithColumns = std::vector<TableWithColumnNamesAndTypes>;

 }
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -726,7 +726,8 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
    return true;
 }

-bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
+bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order,
+                                                  ManyExpressionActions & group_by_elements_actions)
 {
    const auto * select_query = getAggregatingQuery();

@ -743,6 +744,20 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
        getRootActions(ast, only_types, step.actions);
    }

+    if (optimize_aggregation_in_order)
+    {
+        auto all_columns = sourceWithJoinedColumns();
+        for (auto & child : asts)
+        {
+            group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
+            getRootActions(child, only_types, group_by_elements_actions.back());
+        }
+//        std::cerr << "group_by_elements_actions\n";
+//        for (const auto & elem : group_by_elements_actions) {
+//            std::cerr << elem->dumpActions() << "\n";
+//        }
+    }
+
    return true;
 }

@ -834,8 +849,11 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain
            order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
            getRootActions(child, only_types, order_by_elements_actions.back());
        }
+//        std::cerr << "order_by_elements_actions\n";
+//        for (const auto & elem : order_by_elements_actions) {
+//            std::cerr << elem->dumpActions() << "\n";
+//        }
    }
-
    return true;
 }

@ -1115,7 +1133,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(

        if (need_aggregate)
        {
-            query_analyzer.appendGroupBy(chain, only_types || !first_stage);
+            /// TODO correct conditions
+            optimize_aggregation_in_order =
+                    context.getSettingsRef().optimize_aggregation_in_order
+                    && storage && query.groupBy();
+
+            query_analyzer.appendGroupBy(chain, only_types || !first_stage, optimize_aggregation_in_order, group_by_elements_actions);
            query_analyzer.appendAggregateFunctionsArguments(chain, only_types || !first_stage);
            before_aggregation = chain.getLastActions();

@ -1128,13 +1151,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
            }
        }

-        bool has_stream_with_non_joned_rows = (before_join && before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
+        bool has_stream_with_non_joined_rows = (before_join && before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
        optimize_read_in_order =
            settings.optimize_read_in_order
            && storage && query.orderBy()
            && !query_analyzer.hasAggregation()
            && !query.final()
-            && !has_stream_with_non_joned_rows;
+            && !has_stream_with_non_joined_rows;

        /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
        query_analyzer.appendSelect(chain, only_types || (need_aggregate ? !second_stage : !first_stage));
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@ -174,6 +174,7 @@ struct ExpressionAnalysisResult

    bool remove_where_filter = false;
    bool optimize_read_in_order = false;
+    bool optimize_aggregation_in_order = false;

    ExpressionActionsPtr before_join;   /// including JOIN
    ExpressionActionsPtr before_where;
@ -195,6 +196,7 @@ struct ExpressionAnalysisResult
    ConstantFilterDescription where_constant_filter_description;
    /// Actions by every element of ORDER BY
    ManyExpressionActions order_by_elements_actions;
+    ManyExpressionActions group_by_elements_actions;

    ExpressionAnalysisResult() = default;

@ -303,7 +305,7 @@ private:
    /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
    bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
    bool appendWhere(ExpressionActionsChain & chain, bool only_types);
-    bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
+    bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
    void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);

    /// After aggregation:
--- a/src/Interpreters/ExtractExpressionInfoVisitor.cpp
+++ b/src/Interpreters/ExtractExpressionInfoVisitor.cpp
@ -38,10 +38,10 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr
    {
        for (size_t index = 0; index < data.tables.size(); ++index)
        {
-            const auto & columns = data.tables[index].columns;
+            const auto & table = data.tables[index];

            // TODO: make sure no collision ever happens
-            if (std::find(columns.begin(), columns.end(), identifier.name) != columns.end())
+            if (table.hasColumn(identifier.name))
            {
                data.unique_reference_tables_pos.emplace(index);
                break;
--- a/src/Interpreters/ExtractExpressionInfoVisitor.h
+++ b/src/Interpreters/ExtractExpressionInfoVisitor.h
@ -16,7 +16,7 @@ struct ExpressionInfoMatcher
    struct Data
    {
        const Context & context;
-        const std::vector<TableWithColumnNames> & tables;
+        const TablesWithColumns & tables;

        bool is_array_join = false;
        bool is_stateful_function = false;
--- a/src/Interpreters/IdentifierSemantic.cpp
+++ b/src/Interpreters/IdentifierSemantic.cpp
@ -125,12 +125,6 @@ std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & iden
    return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, ambiguous);
 }

-std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNames> & tables,
-                                                      bool ambiguous)
-{
-    return tryChooseTable<TableWithColumnNames>(identifier, tables, ambiguous);
-}
-
 std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNamesAndTypes> & tables,
                                                      bool ambiguous)
 {
@ -196,19 +190,14 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const
    return ColumnMatch::NoMatch;
 }

-IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
-                                                                          const TableWithColumnNames & db_and_table)
-{
-    /// TODO: ColumnName match logic is disabled cause caller's code is not ready for it
-    return canReferColumnToTable(identifier, db_and_table.table);
-}
-
 IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
                                                                          const TableWithColumnNamesAndTypes & db_and_table)
 {
    ColumnMatch match = canReferColumnToTable(identifier, db_and_table.table);
+#if 0
    if (match == ColumnMatch::NoMatch && identifier.isShort() && db_and_table.hasColumn(identifier.shortName()))
        match = ColumnMatch::ColumnName;
+#endif
    return match;
 }

--- a/src/Interpreters/IdentifierSemantic.h
+++ b/src/Interpreters/IdentifierSemantic.h
@ -41,7 +41,6 @@ struct IdentifierSemantic
    static std::optional<String> extractNestedName(const ASTIdentifier & identifier, const String & table_name);

    static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
-    static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNames & db_and_table);
    static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & db_and_table);

    static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
@ -53,8 +52,6 @@ struct IdentifierSemantic
    static std::optional<size_t> getMembership(const ASTIdentifier & identifier);
    static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<DatabaseAndTableWithAlias> & tables,
                            bool allow_ambiguous = false);
-    static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNames> & tables,
-                            bool allow_ambiguous = false);
    static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNamesAndTypes> & tables,
                            bool allow_ambiguous = false);

--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@ -209,10 +209,7 @@ BlockIO InterpreterInsertQuery::execute()
            if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
                out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams());

-            if (out_streams_size == 1)
-                res.pipeline.addPipe({std::make_shared<ConcatProcessor>(res.pipeline.getHeader(), res.pipeline.getNumStreams())});
-            else
-                res.pipeline.resize(out_streams_size);
+            res.pipeline.resize(out_streams_size);
        }
        else if (query.watch)
        {
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -71,6 +71,8 @@
 #include <Processors/Pipe.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Processors/Transforms/ConvertingTransform.h>
+#include <Processors/Transforms/AggregatingInOrderTransform.h>
+#include <Processors/Merges/AggregatingSortedTransform.h>


 namespace DB
@ -601,6 +603,20 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
    return order_descr;
 }

+static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
+{
+    SortDescription order_descr;
+    order_descr.reserve(query.groupBy()->children.size());
+
+    for (const auto & elem : query.groupBy()->children)
+    {
+        String name = elem->getColumnName();
+        order_descr.emplace_back(name, 1, 1);
+    }
+
+    return order_descr;
+}
+
 static UInt64 getLimitUIntValue(const ASTPtr & node, const Context & context, const std::string & expr)
 {
    const auto & [field, type] = evaluateConstantExpression(node, context);
@ -648,8 +664,8 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
 {
    /** Streams of data. When the query is executed in parallel, we have several data streams.
     *  If there is no GROUP BY, then perform all operations before ORDER BY and LIMIT in parallel, then
-     *  if there is an ORDER BY, then glue the streams using UnionBlockInputStream, and then MergeSortingBlockInputStream,
-     *  if not, then glue it using UnionBlockInputStream,
+     *  if there is an ORDER BY, then glue the streams using ResizeProcessor, and then MergeSorting transforms,
+     *  if not, then glue it using ResizeProcessor,
     *  then apply LIMIT.
     *  If there is GROUP BY, then we will perform all operations up to GROUP BY, inclusive, in parallel;
     *  a parallel GROUP BY will glue streams into one,
@ -739,7 +755,7 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
            if (!expressions.second_stage && !expressions.need_aggregate && !expressions.hasHaving())
            {
                if (expressions.has_order_by)
-                    executeOrder(pipeline, query_info.input_sorting_info);
+                    executeOrder(pipeline, query_info.input_order_info);

                if (expressions.has_order_by && query.limitLength())
                    executeDistinct(pipeline, false, expressions.selected_columns);
@ -832,7 +848,11 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
                executeWhere(pipeline, expressions.before_where, expressions.remove_where_filter);

            if (expressions.need_aggregate)
-                executeAggregation(pipeline, expressions.before_aggregation, aggregate_overflow_row, aggregate_final);
+            {
+                executeAggregation(pipeline, expressions.before_aggregation, aggregate_overflow_row, aggregate_final, query_info.input_order_info);
+                /// We need to reset input order info, so that executeOrder can't use  it
+                query_info.input_order_info.reset();
+            }
            else
            {
                executeExpression(pipeline, expressions.before_order_and_select);
@ -898,7 +918,7 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
                if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
                    executeMergeSorted(pipeline);
                else    /// Otherwise, just sort.
-                    executeOrder(pipeline, query_info.input_sorting_info);
+                    executeOrder(pipeline, query_info.input_order_info);
            }

            /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT,
@ -1262,15 +1282,21 @@ void InterpreterSelectQuery::executeFetchColumns(
        query_info.prewhere_info = prewhere_info;

        /// Create optimizer with prepared actions.
-        /// Maybe we will need to calc input_sorting_info later, e.g. while reading from StorageMerge.
-        if (analysis_result.optimize_read_in_order)
+        /// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
+        if (analysis_result.optimize_read_in_order || analysis_result.optimize_aggregation_in_order)
        {
-            query_info.order_by_optimizer = std::make_shared<ReadInOrderOptimizer>(
-                analysis_result.order_by_elements_actions,
-                getSortDescription(query, *context),
-                query_info.syntax_analyzer_result);
+            if (analysis_result.optimize_read_in_order)
+                query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
+                    analysis_result.order_by_elements_actions,
+                    getSortDescription(query, *context),
+                    query_info.syntax_analyzer_result);
+            else
+                query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
+                    analysis_result.group_by_elements_actions,
+                    getSortDescriptionFromGroupBy(query),
+                    query_info.syntax_analyzer_result);

-            query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
+            query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage);
        }

        Pipes pipes = storage->read(required_columns, query_info, *context, processing_stage, max_block_size, max_streams);
@ -1376,7 +1402,7 @@ void InterpreterSelectQuery::executeWhere(QueryPipeline & pipeline, const Expres
 }


-void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final)
+void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info)
 {
    pipeline.addSimpleTransform([&](const Block & header)
    {
@ -1414,6 +1440,62 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const
    /// Forget about current totals and extremes. They will be calculated again after aggregation if needed.
    pipeline.dropTotalsAndExtremes();

+    if (group_by_info && settings.optimize_aggregation_in_order)
+    {
+        auto & query = getSelectQuery();
+        SortDescription group_by_descr = getSortDescriptionFromGroupBy(query);
+        bool need_finish_sorting = (group_by_info->order_key_prefix_descr.size() < group_by_descr.size());
+
+        if (need_finish_sorting)
+        {
+            /// TOO SLOW
+        }
+        else
+        {
+            if (pipeline.getNumStreams() > 1)
+            {
+                auto many_data = std::make_shared<ManyAggregatedData>(pipeline.getNumStreams());
+                size_t counter = 0;
+                pipeline.addSimpleTransform([&](const Block & header)
+                {
+                    return std::make_shared<AggregatingInOrderTransform>(header, transform_params, group_by_descr, settings.max_block_size, many_data, counter++);
+                });
+
+                for (auto & column_description : group_by_descr)
+                {
+                    if (!column_description.column_name.empty())
+                    {
+                        column_description.column_number = pipeline.getHeader().getPositionByName(column_description.column_name);
+                        column_description.column_name.clear();
+                    }
+                }
+
+                auto transform = std::make_shared<AggregatingSortedTransform>(
+                    pipeline.getHeader(),
+                    pipeline.getNumStreams(),
+                    group_by_descr,
+                    settings.max_block_size);
+
+                pipeline.addPipe({ std::move(transform) });
+            }
+            else
+            {
+                pipeline.addSimpleTransform([&](const Block & header)
+                {
+                    return std::make_shared<AggregatingInOrderTransform>(header, transform_params, group_by_descr, settings.max_block_size);
+                });
+            }
+
+            pipeline.addSimpleTransform([&](const Block & header)
+            {
+                return std::make_shared<FinalizingSimpleTransform>(header, transform_params);
+            });
+
+            pipeline.enableQuotaForCurrentStreams();
+            return;
+        }
+    }
+
    /// If there are several sources, then we perform parallel aggregation
    if (pipeline.getNumStreams() > 1)
    {
@ -1576,7 +1658,47 @@ void InterpreterSelectQuery::executeExpression(QueryPipeline & pipeline, const E
 }


-void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSortingInfoPtr input_sorting_info)
+void InterpreterSelectQuery::executeOrderOptimized(QueryPipeline & pipeline, InputOrderInfoPtr input_sorting_info, UInt64 limit, SortDescription & output_order_descr)
+{
+    const Settings & settings = context->getSettingsRef();
+
+    bool need_finish_sorting = (input_sorting_info->order_key_prefix_descr.size() < output_order_descr.size());
+    if (pipeline.getNumStreams() > 1)
+    {
+        UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit);
+        auto transform = std::make_shared<MergingSortedTransform>(
+                pipeline.getHeader(),
+                pipeline.getNumStreams(),
+                input_sorting_info->order_key_prefix_descr,
+                settings.max_block_size, limit_for_merging);
+
+        pipeline.addPipe({ std::move(transform) });
+    }
+
+    pipeline.enableQuotaForCurrentStreams();
+
+    if (need_finish_sorting)
+    {
+        pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+        {
+            if (stream_type != QueryPipeline::StreamType::Main)
+                return nullptr;
+
+            return std::make_shared<PartialSortingTransform>(header, output_order_descr, limit);
+        });
+
+            /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform
+
+            pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr
+            {
+                return std::make_shared<FinishSortingTransform>(
+                    header, input_sorting_info->order_key_prefix_descr,
+                    output_order_descr, settings.max_block_size, limit);
+        });
+    }
+}
+
+void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputOrderInfoPtr input_sorting_info)
 {
    auto & query = getSelectQuery();
    SortDescription output_order_descr = getSortDescription(query, *context);
@ -1596,43 +1718,7 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting
         *  and then merge them into one sorted stream.
         * At this stage we merge per-thread streams into one.
         */
-
-        bool need_finish_sorting = (input_sorting_info->order_key_prefix_descr.size() < output_order_descr.size());
-
-        if (pipeline.getNumStreams() > 1)
-        {
-            UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit);
-            auto transform = std::make_shared<MergingSortedTransform>(
-                pipeline.getHeader(),
-                pipeline.getNumStreams(),
-                input_sorting_info->order_key_prefix_descr,
-                settings.max_block_size, limit_for_merging);
-
-            pipeline.addPipe({ std::move(transform) });
-        }
-
-        pipeline.enableQuotaForCurrentStreams();
-
-        if (need_finish_sorting)
-        {
-            pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
-            {
-                if (stream_type != QueryPipeline::StreamType::Main)
-                    return nullptr;
-
-                return std::make_shared<PartialSortingTransform>(header, output_order_descr, limit);
-            });
-
-            /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform
-
-            pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr
-            {
-                return std::make_shared<FinishSortingTransform>(
-                    header, input_sorting_info->order_key_prefix_descr,
-                    output_order_descr, settings.max_block_size, limit);
-            });
-        }
-
+        executeOrderOptimized(pipeline, input_sorting_info, limit, output_order_descr);
        return;
    }

@ -1905,8 +1991,8 @@ void InterpreterSelectQuery::executeExtremes(QueryPipeline & pipeline)

 void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, const SubqueriesForSets & subqueries_for_sets)
 {
-    if (query_info.input_sorting_info)
-        executeMergeSorted(pipeline, query_info.input_sorting_info->order_key_prefix_descr, 0);
+    if (query_info.input_order_info)
+        executeMergeSorted(pipeline, query_info.input_order_info->order_key_prefix_descr, 0);

    const Settings & settings = context->getSettingsRef();

--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@ -113,12 +113,13 @@ private:
        const Names & columns_to_remove_after_prewhere);

    void executeWhere(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_filter);
-    void executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
+    void executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info);
    void executeMergeAggregated(QueryPipeline & pipeline, bool overflow_row, bool final);
    void executeTotalsAndHaving(QueryPipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
    void executeHaving(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
    static void executeExpression(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
-    void executeOrder(QueryPipeline & pipeline, InputSortingInfoPtr sorting_info);
+    void executeOrder(QueryPipeline & pipeline, InputOrderInfoPtr sorting_info);
+    void executeOrderOptimized(QueryPipeline & pipeline, InputOrderInfoPtr sorting_info, UInt64 limit, SortDescription & output_order_descr);
    void executeWithFill(QueryPipeline & pipeline);
    void executeMergeSorted(QueryPipeline & pipeline);
    void executePreLimit(QueryPipeline & pipeline, bool do_not_skip_offset);
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@ -204,7 +204,6 @@ BlockIO InterpreterSystemQuery::execute()
        case Type::DROP_DNS_CACHE:
            context.checkAccess(AccessType::SYSTEM_DROP_DNS_CACHE);
            DNSResolver::instance().dropCache();
-            AllowedClientHosts::dropDNSCaches();
            /// Reinitialize clusters to update their resolved_addresses
            system_context.reloadClusterConfig();
            break;
--- a/src/Interpreters/PredicateExpressionsOptimizer.cpp
+++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp
@ -18,14 +18,17 @@ namespace ErrorCodes
 }

 PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
-    const Context & context_, const TablesWithColumnNames & tables_with_columns_, const Settings & settings_)
-    : context(context_), tables_with_columns(tables_with_columns_), settings(settings_)
+    const Context & context_, const TablesWithColumns & tables_with_columns_, const Settings & settings)
+    : enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression)
+    , enable_optimize_predicate_expression_to_final_subquery(settings.enable_optimize_predicate_expression_to_final_subquery)
+    , context(context_)
+    , tables_with_columns(tables_with_columns_)
 {
 }

 bool PredicateExpressionsOptimizer::optimize(ASTSelectQuery & select_query)
 {
-    if (!settings.enable_optimize_predicate_expression)
+    if (!enable_optimize_predicate_expression)
        return false;

    if (select_query.having() && (!select_query.group_by_with_cube && !select_query.group_by_with_rollup && !select_query.group_by_with_totals))
@ -133,7 +136,7 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e
                break;  /// Skip left and right table optimization

            is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos],
-                tables_with_columns[table_pos].columns);
+                tables_with_columns[table_pos].columns.getNames());

            if (table_element->table_join && isRight(table_element->table_join->as<ASTTableJoin>()->kind))
                break;  /// Skip left table optimization
@ -143,12 +146,12 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e
    return is_rewrite_tables;
 }

-bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, const Names & table_column) const
+bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, Names && table_columns) const
 {
    if (!table_predicates.empty())
    {
-        auto optimize_final = settings.enable_optimize_predicate_expression_to_final_subquery;
-        PredicateRewriteVisitor::Data data(context, table_predicates, table_column, optimize_final);
+        auto optimize_final = enable_optimize_predicate_expression_to_final_subquery;
+        PredicateRewriteVisitor::Data data(context, table_predicates, std::move(table_columns), optimize_final);

        PredicateRewriteVisitor(data).visit(table_element);
        return data.is_rewrite;
--- a/src/Interpreters/PredicateExpressionsOptimizer.h
+++ b/src/Interpreters/PredicateExpressionsOptimizer.h
@ -18,34 +18,21 @@ struct Settings;
 class PredicateExpressionsOptimizer
 {
 public:
-    PredicateExpressionsOptimizer(const Context & context_, const TablesWithColumnNames & tables_with_columns_, const Settings & settings_);
+    PredicateExpressionsOptimizer(const Context & context_, const TablesWithColumns & tables_with_columns_, const Settings & settings_);

    bool optimize(ASTSelectQuery & select_query);

 private:
-    /// Extracts settings, mostly to show which are used and which are not.
-    struct ExtractedSettings
-    {
-        const bool enable_optimize_predicate_expression;
-        const bool enable_optimize_predicate_expression_to_final_subquery;
-
-        template<typename T>
-        ExtractedSettings(const T & settings_)
-            :   enable_optimize_predicate_expression(settings_.enable_optimize_predicate_expression),
-                enable_optimize_predicate_expression_to_final_subquery(settings_.enable_optimize_predicate_expression_to_final_subquery)
-        {}
-    };
-
+    const bool enable_optimize_predicate_expression;
+    const bool enable_optimize_predicate_expression_to_final_subquery;
    const Context & context;
-    const std::vector<TableWithColumnNames> & tables_with_columns;
-
-    const ExtractedSettings settings;
+    const TablesWithColumns & tables_with_columns;

    std::vector<ASTs> extractTablesPredicates(const ASTPtr & where, const ASTPtr & prewhere);

    bool tryRewritePredicatesToTables(ASTs & tables_element, const std::vector<ASTs> & tables_predicates);

-    bool tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, const Names & table_column) const;
+    bool tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, Names && table_columns) const;

    bool tryMovePredicatesFromHavingToWhere(ASTSelectQuery & select_query);
 };
--- a/src/Interpreters/PredicateRewriteVisitor.cpp
+++ b/src/Interpreters/PredicateRewriteVisitor.cpp
@ -17,7 +17,7 @@ namespace DB
 {

 PredicateRewriteVisitorData::PredicateRewriteVisitorData(
-    const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_)
+    const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_)
    : context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_)
 {
 }
--- a/src/Interpreters/PredicateRewriteVisitor.h
+++ b/src/Interpreters/PredicateRewriteVisitor.h
@ -24,12 +24,12 @@ public:
        return true;
    }

-    PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_);
+    PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_);

 private:
    const Context & context;
    const ASTs & predicates;
-    const Names & column_names;
+    const Names column_names;
    bool optimize_final;

    void visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &);
--- a/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/src/Interpreters/SyntaxAnalyzer.cpp
@ -102,7 +102,7 @@ using CustomizeGlobalNotInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeF
 /// Expand asterisks and qualified asterisks with column names.
 /// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
 void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const NameSet & source_columns_set,
-                             const std::vector<TableWithColumnNames> & tables_with_columns)
+                             const TablesWithColumns & tables_with_columns)
 {
    LogAST log;
    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
@ -528,7 +528,7 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul

 /// Find the columns that are obtained by JOIN.
 void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query,
-                          const std::vector<TableWithColumnNames> & tables, const Aliases & aliases)
+                          const TablesWithColumns & tables, const Aliases & aliases)
 {
    const ASTTablesInSelectQueryElement * node = select_query.join();
    if (!node)
@ -793,12 +793,6 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
    if (remove_duplicates)
        renameDuplicatedColumns(select_query);

-    /// TODO: Remove unneeded conversion
-    std::vector<TableWithColumnNames> tables_with_column_names;
-    tables_with_column_names.reserve(tables_with_columns.size());
-    for (const auto & table : tables_with_columns)
-        tables_with_column_names.emplace_back(table.removeTypes());
-
    if (tables_with_columns.size() > 1)
    {
        result.analyzed_join->columns_from_joined_table = tables_with_columns[1].columns;
@ -806,7 +800,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
            source_columns_set, tables_with_columns[1].table.getQualifiedNamePrefix());
    }

-    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_column_names);
+    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);

    /// Optimizes logical expressions.
    LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
@ -825,10 +819,11 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
    {
        optimizeIf(query, result.aliases, settings.optimize_if_chain_to_miltiif);

-        optimizeArithmeticOperationsInAgr(query, settings.optimize_arithmetic_operations_in_agr_func);
+        /// Move arithmetic operations out of aggregation functions
+        optimizeArithmeticOperationsInAgr(query, settings.optimize_arithmetic_operations_in_aggregate_functions);

        /// Push the predicate expression down to the subqueries.
-        result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_column_names, settings).optimize(*select_query);
+        result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query);

        /// GROUP BY injective function elimination.
        optimizeGroupBy(select_query, source_columns_set, context);
@ -847,7 +842,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(

        setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
                          result.analyzed_join->table_join);
-        collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_column_names, result.aliases);
+        collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
    }

    result.aggregates = getAggregates(query, *select_query);
@ -857,7 +852,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
        result.optimize_trivial_count = settings.optimize_trivial_count_query &&
            !select_query->where() && !select_query->prewhere() && !select_query->groupBy() && !select_query->having() &&
            !select_query->sampleSize() && !select_query->sampleOffset() && !select_query->final() &&
-            (tables_with_column_names.size() < 2 || isLeft(result.analyzed_join->kind()));
+            (tables_with_columns.size() < 2 || isLeft(result.analyzed_join->kind()));

    return std::make_shared<const SyntaxAnalyzerResult>(result);
 }
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@ -8,6 +8,7 @@
 #include <Common/QueryProfiler.h>
 #include <Common/ThreadProfileEvents.h>
 #include <Common/TraceCollector.h>
+#include <common/errnoToString.h>

 #if defined(OS_LINUX)
 #   include <Common/hasLinuxCapability.h>
@ -134,6 +135,54 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool
    setupState(thread_group_);
 }

+void ThreadStatus::initPerformanceCounters()
+{
+    performance_counters_finalized = false;
+
+    /// Clear stats from previous query if a new query is started
+    /// TODO: make separate query_thread_performance_counters and thread_performance_counters
+    performance_counters.resetCounters();
+    memory_tracker.resetCounters();
+    memory_tracker.setDescription("(for thread)");
+
+    query_start_time_nanoseconds = getCurrentTimeNanoseconds();
+    query_start_time = time(nullptr);
+    ++queries_started;
+
+    *last_rusage = RUsageCounters::current(query_start_time_nanoseconds);
+
+    if (query_context)
+    {
+        const Settings & settings = query_context->getSettingsRef();
+        if (settings.metrics_perf_events_enabled)
+        {
+            try
+            {
+                current_thread_counters.initializeProfileEvents(
+                    settings.metrics_perf_events_list);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+    }
+
+    if (!taskstats)
+    {
+        try
+        {
+            taskstats = TasksStatsCounters::create(thread_id);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log);
+        }
+    }
+    if (taskstats)
+        taskstats->reset();
+}
+
 void ThreadStatus::finalizePerformanceCounters()
 {
    if (performance_counters_finalized)
@ -142,6 +191,21 @@ void ThreadStatus::finalizePerformanceCounters()
    performance_counters_finalized = true;
    updatePerformanceCounters();

+    bool close_perf_descriptors = true;
+    if (query_context)
+        close_perf_descriptors = !query_context->getSettingsRef().metrics_perf_events_enabled;
+
+    try
+    {
+        current_thread_counters.finalizeProfileEvents(performance_counters);
+        if (close_perf_descriptors)
+            current_thread_counters.closeEventDescriptors();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
+    }
+
    try
    {
        if (global_context && query_context)
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@ -37,9 +37,10 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const
    auto nested2 = IdentifierSemantic::extractNestedName(identifier, table.alias);

    const String & short_name = identifier.shortName();
-    const Names & column_names = tables[table_pos].columns;
-    for (const auto & known_name : column_names)
+    const auto & columns = tables[table_pos].columns;
+    for (const auto & column : columns)
    {
+        const String & known_name = column.name;
        if (short_name == known_name)
            return false;
        if (nested1 && *nested1 == known_name)
@ -48,9 +49,10 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const
            return false;
    }

-    const Names & hidden_names = tables[table_pos].hidden_columns;
-    for (const auto & known_name : hidden_names)
+    const auto & hidden_columns = tables[table_pos].hidden_columns;
+    for (const auto & column : hidden_columns)
    {
+        const String & known_name = column.name;
        if (short_name == known_name)
            return false;
        if (nested1 && *nested1 == known_name)
@ -59,7 +61,7 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const
            return false;
    }

-    return !column_names.empty();
+    return !columns.empty();
 }

 bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
@ -232,11 +234,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
            bool first_table = true;
            for (const auto & table : tables_with_columns)
            {
-                for (const auto & column_name : table.columns)
+                for (const auto & column : table.columns)
                {
-                    if (first_table || !data.join_using_columns.count(column_name))
+                    if (first_table || !data.join_using_columns.count(column.name))
                    {
-                        addIdentifier(node.children, table.table, column_name, AsteriskSemantic::getAliases(*asterisk));
+                        addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk));
                    }
                }

@ -248,11 +250,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
            bool first_table = true;
            for (const auto & table : tables_with_columns)
            {
-                for (const auto & column_name : table.columns)
+                for (const auto & column : table.columns)
                {
-                    if (asterisk_pattern->isColumnMatching(column_name) && (first_table || !data.join_using_columns.count(column_name)))
+                    if (asterisk_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name)))
                    {
-                        addIdentifier(node.children, table.table, column_name, AsteriskSemantic::getAliases(*asterisk_pattern));
+                        addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk_pattern));
                    }
                }

@ -267,9 +269,9 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
            {
                if (ident_db_and_name.satisfies(table.table, true))
                {
-                    for (const auto & column_name : table.columns)
+                    for (const auto & column : table.columns)
                    {
-                        addIdentifier(node.children, table.table, column_name, AsteriskSemantic::getAliases(*qualified_asterisk));
+                        addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*qualified_asterisk));
                    }
                    break;
                }
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.h
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h
@ -25,11 +25,11 @@ public:
    struct Data
    {
        const NameSet source_columns;
-        const std::vector<TableWithColumnNames> & tables;
+        const TablesWithColumns & tables;
        std::unordered_set<String> join_using_columns;
        bool has_columns;

-        Data(const NameSet & source_columns_, const std::vector<TableWithColumnNames> & tables_, bool has_columns_ = true)
+        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true)
            : source_columns(source_columns_)
            , tables(tables_)
            , has_columns(has_columns_)
--- a/src/Interpreters/getTableExpressions.cpp
+++ b/src/Interpreters/getTableExpressions.cpp
@ -115,10 +115,9 @@ NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table
    return getColumnsFromTableExpression(table_expression, context, materialized, aliases, virtuals);
 }

-std::vector<TableWithColumnNamesAndTypes> getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                          const Context & context)
+TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, const Context & context)
 {
-    std::vector<TableWithColumnNamesAndTypes> tables_with_columns;
+    TablesWithColumns tables_with_columns;

    if (!table_expressions.empty())
    {
@ -146,15 +145,4 @@ std::vector<TableWithColumnNamesAndTypes> getDatabaseAndTablesWithColumns(const
    return tables_with_columns;
 }

-std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                      const Context & context)
-{
-    std::vector<TableWithColumnNamesAndTypes> tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
-    std::vector<TableWithColumnNames> out;
-    out.reserve(tables_with_columns.size());
-    for (auto & table : tables_with_columns)
-        out.emplace_back(table.removeTypes());
-    return out;
-}
-
 }
--- a/src/Interpreters/getTableExpressions.h
+++ b/src/Interpreters/getTableExpressions.h
@ -17,9 +17,6 @@ const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, siz
 ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);

 NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, const Context & context);
-std::vector<TableWithColumnNamesAndTypes> getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                          const Context & context);
-std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                      const Context & context);
+TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, const Context & context);

 }
--- a/src/Interpreters/tests/CMakeLists.txt
+++ b/src/Interpreters/tests/CMakeLists.txt
@ -1,15 +1,3 @@
-add_executable (expression expression.cpp)
-target_link_libraries (expression PRIVATE dbms clickhouse_parsers)
-
-add_executable (create_query create_query.cpp)
-target_link_libraries (create_query PRIVATE dbms clickhouse_parsers)
-
-add_executable (select_query select_query.cpp)
-target_link_libraries (select_query PRIVATE clickhouse_storages_system dbms clickhouse_common_io)
-
-add_executable (aggregate aggregate.cpp)
-target_link_libraries (aggregate PRIVATE dbms)
-
 add_executable (hash_map hash_map.cpp)
 target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map PRIVATE dbms)
--- a/src/Interpreters/tests/aggregate.cpp
+++ b/src/Interpreters/tests/aggregate.cpp
@ -1,105 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnString.h>
-
-#include <DataStreams/OneBlockInputStream.h>
-
-#include <Interpreters/Aggregator.h>
-
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-
-
-int main(int argc, char ** argv)
-{
-    using namespace DB;
-
-    try
-    {
-        size_t n = argc == 2 ? std::stol(argv[1]) : 10;
-
-        Block block;
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "x";
-            column.type = std::make_shared<DataTypeInt16>();
-            auto col = ColumnInt16::create();
-            auto & vec_x = col->getData();
-
-            vec_x.resize(n);
-            for (size_t i = 0; i < n; ++i)
-                vec_x[i] = i % 9;
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        const char * strings[] = {"abc", "def", "abcd", "defg", "ac"};
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s1";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 5]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s2";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 3]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        BlockInputStreamPtr stream = std::make_shared<OneBlockInputStream>(block);
-        AggregatedDataVariants aggregated_data_variants;
-
-        AggregateFunctionFactory factory;
-
-        AggregateDescriptions aggregate_descriptions(1);
-
-        DataTypes empty_list_of_types;
-        aggregate_descriptions[0].function = factory.get("count", empty_list_of_types);
-
-        Aggregator::Params params(
-            stream->getHeader(), {0, 1}, aggregate_descriptions,
-            false, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, 1, 0);
-
-        Aggregator aggregator(params);
-
-        {
-            Stopwatch stopwatch;
-            stopwatch.start();
-
-            aggregator.execute(stream, aggregated_data_variants);
-
-            stopwatch.stop();
-            std::cout << std::fixed << std::setprecision(2)
-                << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-                << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-                << std::endl;
-        }
-    }
-    catch (const Exception & e)
-    {
-        std::cerr << e.displayText() << std::endl;
-    }
-
-    return 0;
-}
--- a/src/Interpreters/tests/create_query.cpp
+++ b/src/Interpreters/tests/create_query.cpp
@ -1,103 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-
-#include <Databases/DatabaseOrdinary.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/InterpreterCreateQuery.h>
-
-
-using namespace DB;
-
-int main(int, char **)
-try
-{
-    std::string input = "CREATE TABLE IF NOT EXISTS hits (\n"
-        "WatchID                UInt64,\n"
-        "JavaEnable             UInt8,\n"
-        "Title                  String,\n"
-        "EventTime              DateTime,\n"
-        "CounterID              UInt32,\n"
-        "ClientIP               UInt32,\n"
-        "RegionID               UInt32,\n"
-        "UniqID                 UInt64,\n"
-        "CounterClass           UInt8,\n"
-        "OS                     UInt8,\n"
-        "UserAgent              UInt8,\n"
-        "URL                    String,\n"
-        "Referer                String,\n"
-        "ResolutionWidth        UInt16,\n"
-        "ResolutionHeight       UInt16,\n"
-        "ResolutionDepth        UInt8,\n"
-        "FlashMajor             UInt8,\n"
-        "FlashMinor             UInt8,\n"
-        "FlashMinor2            String,\n"
-        "NetMajor               UInt8,\n"
-        "NetMinor               UInt8,\n"
-        "UserAgentMajor         UInt16,\n"
-        "UserAgentMinor         FixedString(2),\n"
-        "CookieEnable           UInt8,\n"
-        "JavascriptEnable       UInt8,\n"
-        "IsMobile               UInt8,\n"
-        "MobilePhone            UInt8,\n"
-        "MobilePhoneModel       String,\n"
-        "Params                 String,\n"
-        "IPNetworkID            UInt32,\n"
-        "TraficSourceID         Int8,\n"
-        "SearchEngineID         UInt16,\n"
-        "SearchPhrase           String,\n"
-        "AdvEngineID            UInt8,\n"
-        "IsArtifical            UInt8,\n"
-        "WindowClientWidth      UInt16,\n"
-        "WindowClientHeight     UInt16,\n"
-        "ClientTimeZone         Int16,\n"
-        "ClientEventTime        DateTime,\n"
-        "SilverlightVersion1    UInt8,\n"
-        "SilverlightVersion2    UInt8,\n"
-        "SilverlightVersion3    UInt32,\n"
-        "SilverlightVersion4    UInt16,\n"
-        "PageCharset            String,\n"
-        "CodeVersion            UInt32,\n"
-        "IsLink                 UInt8,\n"
-        "IsDownload             UInt8,\n"
-        "IsNotBounce            UInt8,\n"
-        "FUniqID                UInt64,\n"
-        "OriginalURL            String,\n"
-        "HID                    UInt32,\n"
-        "IsOldCounter           UInt8,\n"
-        "IsEvent                UInt8,\n"
-        "IsParameter            UInt8,\n"
-        "DontCountHits          UInt8,\n"
-        "WithHash               UInt8\n"
-        ") ENGINE = Log";
-
-    ParserCreateQuery parser;
-    ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    context.setPath("./");
-    auto database = std::make_shared<DatabaseOrdinary>("test", "./metadata/test/", context);
-    DatabaseCatalog::instance().attachDatabase("test", database);
-    database->loadStoredObjects(context, false);
-    context.setCurrentDatabase("test");
-
-    InterpreterCreateQuery interpreter(ast, context);
-    interpreter.execute();
-
-    return 0;
-}
-catch (const Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl
-        << std::endl
-        << "Stack trace:" << std::endl
-        << e.getStackTraceString();
-    return 1;
-}
--- a/src/Interpreters/tests/expression.cpp
+++ b/src/Interpreters/tests/expression.cpp
@ -1,140 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromOStream.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/ParserSelectQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-
-#include <Formats/FormatFactory.h>
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/OneBlockInputStream.h>
-#include <DataStreams/copyData.h>
-
-#include <Interpreters/SyntaxAnalyzer.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/Context.h>
-
-
-int main(int argc, char ** argv)
-{
-    using namespace DB;
-
-    try
-    {
-        std::string input = "SELECT x, s1, s2, "
-            "/*"
-            "2 + x * 2, x * 2, x % 3 == 1, "
-            "s1 == 'abc', s1 == s2, s1 != 'abc', s1 != s2, "
-            "s1 <  'abc', s1 <  s2, s1 >  'abc', s1 >  s2, "
-            "s1 <= 'abc', s1 <= s2, s1 >= 'abc', s1 >= s2, "
-            "*/"
-            "s1 < s2 AND x % 3 < x % 5";
-
-        ParserSelectQuery parser;
-        ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-        formatAST(*ast, std::cerr);
-        std::cerr << std::endl;
-
-        SharedContextHolder shared_context = Context::createShared();
-        Context context = Context::createGlobal(shared_context.get());
-        context.makeGlobalContext();
-        NamesAndTypesList columns
-        {
-            {"x", std::make_shared<DataTypeInt16>()},
-            {"s1", std::make_shared<DataTypeString>()},
-            {"s2", std::make_shared<DataTypeString>()}
-        };
-
-        auto syntax_result = SyntaxAnalyzer(context).analyze(ast, columns);
-        SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context);
-        ExpressionActionsChain chain(context);
-        analyzer.appendSelect(chain, false);
-        analyzer.appendProjectResult(chain);
-        chain.finalize();
-        ExpressionActionsPtr expression = chain.getLastActions();
-
-        size_t n = argc == 2 ? std::stol(argv[1]) : 10;
-
-        Block block;
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "x";
-            column.type = std::make_shared<DataTypeInt16>();
-            auto col = ColumnInt16::create();
-            auto & vec_x = col->getData();
-
-            vec_x.resize(n);
-            for (size_t i = 0; i < n; ++i)
-                vec_x[i] = i % 9;
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        const char * strings[] = {"abc", "def", "abcd", "defg", "ac"};
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s1";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 5]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s2";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 3]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        {
-            Stopwatch stopwatch;
-            stopwatch.start();
-
-            expression->execute(block);
-
-            stopwatch.stop();
-            std::cout << std::fixed << std::setprecision(2)
-                << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-                << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-                << std::endl;
-        }
-
-        auto is = std::make_shared<OneBlockInputStream>(block);
-        LimitBlockInputStream lis(is, 20, std::max(0, static_cast<int>(n) - 20));
-        WriteBufferFromOStream out_buf(std::cout);
-        BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out_buf, block, context);
-
-        copyData(lis, *out);
-    }
-    catch (const Exception & e)
-    {
-        std::cerr << e.displayText() << std::endl;
-    }
-
-    return 0;
-}
--- a/src/Interpreters/tests/select_query.cpp
+++ b/src/Interpreters/tests/select_query.cpp
@ -1,61 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <common/DateLUT.h>
-
-#include <Poco/ConsoleChannel.h>
-
-#include <IO/ReadBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromFileDescriptor.h>
-
-#include <Storages/StorageLog.h>
-#include <Storages/System/attachSystemTables.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/loadMetadata.h>
-#include <Interpreters/executeQuery.h>
-#include <Databases/IDatabase.h>
-#include <Databases/DatabaseOrdinary.h>
-
-
-using namespace DB;
-
-int main(int, char **)
-try
-{
-    Poco::AutoPtr<Poco::ConsoleChannel> channel = new Poco::ConsoleChannel(std::cerr);
-    Poco::Logger::root().setChannel(channel);
-    Poco::Logger::root().setLevel("trace");
-
-    /// Pre-initialize the `DateLUT` so that the first initialization does not affect the measured execution speed.
-    DateLUT::instance();
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    context.setPath("./");
-
-    loadMetadata(context);
-
-    DatabasePtr system = std::make_shared<DatabaseOrdinary>("system", "./metadata/system/", context);
-    DatabaseCatalog::instance().attachDatabase("system", system);
-    system->loadStoredObjects(context, false);
-    attachSystemTablesLocal(*DatabaseCatalog::instance().getSystemDatabase());
-    context.setCurrentDatabase("default");
-
-    ReadBufferFromFileDescriptor in(STDIN_FILENO);
-    WriteBufferFromFileDescriptor out(STDOUT_FILENO);
-
-    executeQuery(in, out, /* allow_into_outfile = */ false, context, {});
-
-    return 0;
-}
-catch (const Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl
-        << std::endl
-        << "Stack trace:" << std::endl
-        << e.getStackTraceString();
-    return 1;
-}
--- a/src/Parsers/IParserBase.cpp
+++ b/src/Parsers/IParserBase.cpp
@ -4,11 +4,6 @@
 namespace DB
 {

-namespace ErrorCodes
-{
-}
-
-
 bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected)
 {
    expected.add(pos, getName());
--- a/src/Processors/ConcatProcessor.cpp
+++ b/src/Processors/ConcatProcessor.cpp
@ -4,6 +4,11 @@
 namespace DB
 {

+ConcatProcessor::ConcatProcessor(const Block & header, size_t num_inputs)
+    : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin())
+{
+}
+
 ConcatProcessor::Status ConcatProcessor::prepare()
 {
    auto & output = outputs.front();
--- a/Show More
+++ b/Show More