diff --git a/CMakeLists.txt b/CMakeLists.txt
index 27005bd8d87..4683bf8dec1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -327,20 +327,16 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE
 
 include (GNUInstallDirs)
 include (cmake/contrib_finder.cmake)
-include (cmake/lib_name.cmake)
 
 find_contrib_lib(double-conversion) # Must be before parquet
 include (cmake/find/ssl.cmake)
 include (cmake/find/ldap.cmake) # after ssl
 include (cmake/find/icu.cmake)
-include (cmake/find/boost.cmake)
 include (cmake/find/zlib.cmake)
 include (cmake/find/zstd.cmake)
 include (cmake/find/ltdl.cmake) # for odbc
 include (cmake/find/termcap.cmake)
 # openssl, zlib before poco
-include (cmake/find/lz4.cmake)
-include (cmake/find/xxhash.cmake)
 include (cmake/find/sparsehash.cmake)
 include (cmake/find/re2.cmake)
 include (cmake/find/libgsasl.cmake)
@@ -358,7 +354,6 @@ include (cmake/find/hdfs3.cmake) # uses protobuf
 include (cmake/find/s3.cmake)
 include (cmake/find/base64.cmake)
 include (cmake/find/parquet.cmake)
-include (cmake/find/hyperscan.cmake)
 include (cmake/find/simdjson.cmake)
 include (cmake/find/rapidjson.cmake)
 include (cmake/find/fastops.cmake)
@@ -369,7 +364,6 @@ include (cmake/find/cassandra.cmake)
 
 find_contrib_lib(cityhash)
 find_contrib_lib(farmhash)
-find_contrib_lib(metrohash)
 find_contrib_lib(btrie)
 
 if (ENABLE_TESTS)
diff --git a/base/common/CMakeLists.txt b/base/common/CMakeLists.txt
index 9b827cdb468..074f73b158b 100644
--- a/base/common/CMakeLists.txt
+++ b/base/common/CMakeLists.txt
@@ -16,6 +16,7 @@ set (SRCS
     shift10.cpp
     sleep.cpp
     terminalColors.cpp
+    errnoToString.cpp
 )
 
 if (ENABLE_REPLXX)
@@ -43,10 +44,6 @@ endif()
 
 target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..)
 
-if (NOT USE_INTERNAL_BOOST_LIBRARY)
-    target_include_directories (common SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
-endif ()
-
 # Allow explicit fallback to readline
 if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
     message (STATUS "Attempt to fallback to readline explicitly")
@@ -72,7 +69,8 @@ endif ()
 target_link_libraries (common
     PUBLIC
         ${CITYHASH_LIBRARIES}
-        ${Boost_SYSTEM_LIBRARY}
+        boost::headers_only
+        boost::system
         FastMemcpy
         Poco::Net
         Poco::Net::SSL
diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp
index 141237d5d94..251170ab5c1 100644
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@@ -1,9 +1,11 @@
 #include <common/ReplxxLineReader.h>
+#include <common/errnoToString.h>
 
 #include <errno.h>
 #include <string.h>
 #include <unistd.h>
 #include <functional>
+#include <sys/file.h>
 
 namespace
 {
@@ -17,14 +19,41 @@ void trim(String & s)
 }
 
 ReplxxLineReader::ReplxxLineReader(
-    const Suggest & suggest, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
-    : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_))
+    const Suggest & suggest,
+    const String & history_file_path_,
+    bool multiline_,
+    Patterns extenders_,
+    Patterns delimiters_,
+    replxx::Replxx::highlighter_callback_t highlighter_)
+    : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
 {
     using namespace std::placeholders;
     using Replxx = replxx::Replxx;
 
     if (!history_file_path.empty())
-        rx.history_load(history_file_path);
+    {
+        history_file_fd = open(history_file_path.c_str(), O_RDWR);
+        if (history_file_fd < 0)
+        {
+            rx.print("Open of history file failed: %s\n", errnoToString(errno).c_str());
+        }
+        else
+        {
+            if (flock(history_file_fd, LOCK_SH))
+            {
+                rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str());
+            }
+            else
+            {
+                rx.history_load(history_file_path);
+
+                if (flock(history_file_fd, LOCK_UN))
+                {
+                    rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
+                }
+            }
+        }
+    }
 
     auto callback = [&suggest] (const String & context, size_t context_size)
     {
@@ -36,6 +65,9 @@ ReplxxLineReader::ReplxxLineReader(
     rx.set_complete_on_empty(false);
     rx.set_word_break_characters(word_break_characters);
 
+    if (highlighter)
+        rx.set_highlighter_callback(highlighter);
+
     /// By default C-p/C-n binded to COMPLETE_NEXT/COMPLETE_PREV,
     /// bind C-p/C-n to history-previous/history-next like readline.
     rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
@@ -49,8 +81,8 @@ ReplxxLineReader::ReplxxLineReader(
 
 ReplxxLineReader::~ReplxxLineReader()
 {
-    if (!history_file_path.empty())
-        rx.history_save(history_file_path);
+    if (close(history_file_fd))
+        rx.print("Close of history file failed: %s\n", strerror(errno));
 }
 
 LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@@ -68,7 +100,20 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
 
 void ReplxxLineReader::addToHistory(const String & line)
 {
+    // locking history file to prevent from inconsistent concurrent changes
+    bool locked = false;
+    if (flock(history_file_fd, LOCK_EX))
+        rx.print("Lock of history file failed: %s\n", strerror(errno));
+    else
+        locked = true;
+
     rx.history_add(line);
+
+    // flush changes to the disk
+    rx.history_save(history_file_path);
+
+    if (locked && 0 != flock(history_file_fd, LOCK_UN))
+        rx.print("Unlock of history file failed: %s\n", strerror(errno));
 }
 
 void ReplxxLineReader::enableBracketedPaste()
diff --git a/base/common/ReplxxLineReader.h b/base/common/ReplxxLineReader.h
index 472198bcfaf..1fbfd53457b 100644
--- a/base/common/ReplxxLineReader.h
+++ b/base/common/ReplxxLineReader.h
@@ -4,10 +4,17 @@
 
 #include <replxx.hxx>
 
+
 class ReplxxLineReader : public LineReader
 {
 public:
-    ReplxxLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_);
+    ReplxxLineReader(
+        const Suggest & suggest,
+        const String & history_file_path,
+        bool multiline,
+        Patterns extenders_,
+        Patterns delimiters_,
+        replxx::Replxx::highlighter_callback_t highlighter_);
     ~ReplxxLineReader() override;
 
     void enableBracketedPaste() override;
@@ -17,4 +24,8 @@ private:
     void addToHistory(const String & line) override;
 
     replxx::Replxx rx;
+    replxx::Replxx::highlighter_callback_t highlighter;
+
+    // used to call flock() to synchronize multiple clients using same history file
+    int history_file_fd = -1;
 };
diff --git a/base/common/errnoToString.cpp b/base/common/errnoToString.cpp
new file mode 100644
index 00000000000..cdadba2c615
--- /dev/null
+++ b/base/common/errnoToString.cpp
@@ -0,0 +1,29 @@
+#include "errnoToString.h"
+
+#include <fmt/format.h>
+
+
+std::string errnoToString(int code, int the_errno)
+{
+    const size_t buf_size = 128;
+    char buf[buf_size];
+#ifndef _GNU_SOURCE
+    int rc = strerror_r(the_errno, buf, buf_size);
+#ifdef __APPLE__
+    if (rc != 0 && rc != EINVAL)
+#else
+    if (rc != 0)
+#endif
+    {
+        std::string tmp = std::to_string(code);
+        const char * code_str = tmp.c_str();
+        const char * unknown_message = "Unknown error ";
+        strcpy(buf, unknown_message);
+        strcpy(buf + strlen(unknown_message), code_str);
+    }
+    return fmt::format("errno: {}, strerror: {}", the_errno, buf);
+#else
+    (void)code;
+    return fmt::format("errno: {}, strerror: {}", the_errno, strerror_r(the_errno, buf, sizeof(buf)));
+#endif
+}
diff --git a/base/common/errnoToString.h b/base/common/errnoToString.h
new file mode 100644
index 00000000000..fd5f81ec2c7
--- /dev/null
+++ b/base/common/errnoToString.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <cerrno>
+#include <string>
+
+std::string errnoToString(int code, int the_errno = errno);
diff --git a/base/common/strong_typedef.h b/base/common/strong_typedef.h
index a46eb415e15..d9850a25c37 100644
--- a/base/common/strong_typedef.h
+++ b/base/common/strong_typedef.h
@@ -1,6 +1,8 @@
 #pragma once
 
+#include <functional>
 #include <type_traits>
+#include <utility>
 
 template <class T, class Tag>
 struct StrongTypedef
diff --git a/base/common/ya.make b/base/common/ya.make
index 6e45b0193c5..d40b1f5abfd 100644
--- a/base/common/ya.make
+++ b/base/common/ya.make
@@ -47,6 +47,7 @@ SRCS(
     shift10.cpp
     sleep.cpp
     terminalColors.cpp
+    errnoToString.cpp
 )
 
 END()
diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt
index 702e0197ffb..7d35c1bd31d 100644
--- a/base/mysqlxx/CMakeLists.txt
+++ b/base/mysqlxx/CMakeLists.txt
@@ -32,10 +32,18 @@ else ()
     endif ()
 endif ()
 
-target_link_libraries(mysqlxx PUBLIC common PRIVATE ${MYSQLCLIENT_LIBRARIES} PUBLIC ${Boost_SYSTEM_LIBRARY} PRIVATE ${ZLIB_LIBRARIES})
+target_link_libraries (mysqlxx
+    PUBLIC
+        common
+    PRIVATE
+        ${MYSQLCLIENT_LIBRARIES}
+        ${ZLIB_LIBRARIES}
+)
+
 if(OPENSSL_LIBRARIES)
     target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES})
 endif()
+
 target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES})
 
 if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR)
diff --git a/cmake/Modules/Findmetrohash.cmake b/cmake/Modules/Findmetrohash.cmake
deleted file mode 100644
index c51665795bd..00000000000
--- a/cmake/Modules/Findmetrohash.cmake
+++ /dev/null
@@ -1,44 +0,0 @@
-# - Try to find metrohash headers and libraries.
-#
-# Usage of this module as follows:
-#
-#     find_package(metrohash)
-#
-# Variables used by this module, they can change the default behaviour and need
-# to be set before calling find_package:
-#
-#  METROHASH_ROOT_DIR Set this variable to the root installation of
-#                    metrohash if the module has problems finding
-#                    the proper installation path.
-#
-# Variables defined by this module:
-#
-#  METROHASH_FOUND             System has metrohash libs/headers
-#  METROHASH_LIBRARIES         The metrohash library/libraries
-#  METROHASH_INCLUDE_DIR       The location of metrohash headers
-
-find_path(METROHASH_ROOT_DIR
-    NAMES include/metrohash.h
-)
-
-find_library(METROHASH_LIBRARIES
-    NAMES metrohash
-    PATHS ${METROHASH_ROOT_DIR}/lib ${METROHASH_LIBRARIES_PATHS}
-)
-
-find_path(METROHASH_INCLUDE_DIR
-    NAMES metrohash.h
-    PATHS ${METROHASH_ROOT_DIR}/include PATH_SUFFIXES metrohash ${METROHASH_INCLUDE_PATHS}
-)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(metrohash DEFAULT_MSG
-    METROHASH_LIBRARIES
-    METROHASH_INCLUDE_DIR
-)
-
-mark_as_advanced(
-    METROHASH_ROOT_DIR
-    METROHASH_LIBRARIES
-    METROHASH_INCLUDE_DIR
-)
diff --git a/cmake/find/boost.cmake b/cmake/find/boost.cmake
deleted file mode 100644
index ec10a34d839..00000000000
--- a/cmake/find/boost.cmake
+++ /dev/null
@@ -1,52 +0,0 @@
-option (USE_INTERNAL_BOOST_LIBRARY "Set to FALSE to use system boost library instead of bundled" ${NOT_UNBUNDLED})
-
-# Test random file existing in all package variants
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/system/src/error_code.cpp")
-    if(USE_INTERNAL_BOOST_LIBRARY)
-        message(WARNING "submodules in contrib/boost is missing. to fix try run: \n git submodule update --init --recursive")
-    endif()
-    set (USE_INTERNAL_BOOST_LIBRARY 0)
-    set (MISSING_INTERNAL_BOOST_LIBRARY 1)
-endif ()
-
-if (NOT USE_INTERNAL_BOOST_LIBRARY)
-    set (Boost_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES})
-    set (BOOST_ROOT "/usr/local")
-    find_package (Boost 1.60 COMPONENTS program_options system filesystem thread regex)
-    # incomplete, no include search, who use it?
-    if (NOT Boost_FOUND)
-        #    # Try to find manually.
-        #    set (BOOST_PATHS "")
-        #    find_library (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options PATHS ${BOOST_PATHS})
-        #    find_library (Boost_SYSTEM_LIBRARY boost_system PATHS ${BOOST_PATHS})
-        #    find_library (Boost_FILESYSTEM_LIBRARY boost_filesystem PATHS ${BOOST_PATHS})
-        # maybe found but incorrect version.
-        set (Boost_INCLUDE_DIRS "")
-        set (Boost_SYSTEM_LIBRARY "")
-    endif ()
-endif ()
-
-if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY)
-    set (USE_INTERNAL_BOOST_LIBRARY 1)
-    set (Boost_SYSTEM_LIBRARY boost_system_internal)
-    set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal)
-    set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY})
-    set (Boost_IOSTREAMS_LIBRARY boost_iostreams_internal)
-    set (Boost_REGEX_LIBRARY boost_regex_internal)
-
-    set (Boost_INCLUDE_DIRS)
-
-    set (BOOST_ROOT "${ClickHouse_SOURCE_DIR}/contrib/boost")
-
-    # For boost from github:
-    file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/*/include")
-    list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
-    # numeric has additional level
-    file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/numeric/*/include")
-    list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
-
-    # For packaged version:
-    list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost")
-endif ()
-
-message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_IOSTREAMS_LIBRARY},${Boost_REGEX_LIBRARY}")
diff --git a/cmake/find/hyperscan.cmake b/cmake/find/hyperscan.cmake
deleted file mode 100644
index 039981fce81..00000000000
--- a/cmake/find/hyperscan.cmake
+++ /dev/null
@@ -1,33 +0,0 @@
-if (HAVE_SSSE3)
-    option (ENABLE_HYPERSCAN "Enable hyperscan" ${ENABLE_LIBRARIES})
-endif ()
-
-if (ENABLE_HYPERSCAN)
-
-option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED})
-
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
-    if (USE_INTERNAL_HYPERSCAN_LIBRARY)
-        message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive")
-    endif ()
-   set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1)
-   set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
-endif ()
-
-if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
-    find_library (HYPERSCAN_LIBRARY hs)
-    find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
-endif ()
-
-if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
-    set (USE_HYPERSCAN 1)
-elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY)
-    set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
-    set (HYPERSCAN_LIBRARY hs)
-    set (USE_HYPERSCAN 1)
-    set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
-endif()
-
-message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}")
-
-endif ()
diff --git a/cmake/find/lz4.cmake b/cmake/find/lz4.cmake
deleted file mode 100644
index 5f5e058b53d..00000000000
--- a/cmake/find/lz4.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-option (USE_INTERNAL_LZ4_LIBRARY "Set to FALSE to use system lz4 library instead of bundled" ${NOT_UNBUNDLED})
-
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lz4/lib/lz4.h")
-    if (USE_INTERNAL_LZ4_LIBRARY)
-       message (WARNING "submodule contrib/lz4 is missing. to fix try run: \n git submodule update --init --recursive")
-       set (USE_INTERNAL_LZ4_LIBRARY 0)
-    endif ()
-    set (MISSING_INTERNAL_LZ4_LIBRARY 1)
-endif ()
-
-if (NOT USE_INTERNAL_LZ4_LIBRARY)
-    find_library (LZ4_LIBRARY lz4)
-    find_path (LZ4_INCLUDE_DIR NAMES lz4.h PATHS ${LZ4_INCLUDE_PATHS})
-endif ()
-
-if (LZ4_LIBRARY AND LZ4_INCLUDE_DIR)
-elseif (NOT MISSING_INTERNAL_LZ4_LIBRARY)
-    set (LZ4_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
-    set (USE_INTERNAL_LZ4_LIBRARY 1)
-    set (LZ4_LIBRARY lz4)
-endif ()
-
-message (STATUS "Using lz4: ${LZ4_INCLUDE_DIR} : ${LZ4_LIBRARY}")
diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake
index 4c91286dae0..d4f62b87d29 100644
--- a/cmake/find/parquet.cmake
+++ b/cmake/find/parquet.cmake
@@ -63,7 +63,7 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
         set(ARROW_LIBRARY arrow_shared)
         set(PARQUET_LIBRARY parquet_shared)
         if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
-            list(APPEND PARQUET_LIBRARY ${Boost_REGEX_LIBRARY})
+            list(APPEND PARQUET_LIBRARY boost::regex)
         endif()
         set(THRIFT_LIBRARY thrift)
     endif()
diff --git a/cmake/find/xxhash.cmake b/cmake/find/xxhash.cmake
deleted file mode 100644
index 8af871e8fd5..00000000000
--- a/cmake/find/xxhash.cmake
+++ /dev/null
@@ -1,22 +0,0 @@
-option (USE_INTERNAL_XXHASH_LIBRARY "Set to FALSE to use system xxHash library instead of bundled" ${NOT_UNBUNDLED})
-
-if (USE_INTERNAL_XXHASH_LIBRARY AND NOT USE_INTERNAL_LZ4_LIBRARY)
-    message (WARNING "can not use internal xxhash without internal lz4")
-    set (USE_INTERNAL_XXHASH_LIBRARY 0)
-endif ()
-
-if (USE_INTERNAL_XXHASH_LIBRARY)
-    set (XXHASH_LIBRARY lz4)
-    set (XXHASH_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
-else ()
-    find_library (XXHASH_LIBRARY xxhash)
-    find_path (XXHASH_INCLUDE_DIR NAMES xxhash.h PATHS ${XXHASH_INCLUDE_PATHS})
-endif ()
-
-if (XXHASH_LIBRARY AND XXHASH_INCLUDE_DIR)
-    set (USE_XXHASH 1)
-else ()
-    set (USE_XXHASH 0)
-endif ()
-
-message (STATUS "Using xxhash=${USE_XXHASH}: ${XXHASH_INCLUDE_DIR} : ${XXHASH_LIBRARY}")
diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake
deleted file mode 100644
index f18b2e52576..00000000000
--- a/cmake/lib_name.cmake
+++ /dev/null
@@ -1,4 +0,0 @@
-set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
-set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src)
-set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion)
-set(METROHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake
index 62ebd434320..cc2098cb397 100644
--- a/cmake/print_include_directories.cmake
+++ b/cmake/print_include_directories.cmake
@@ -21,11 +21,6 @@ if (TARGET double-conversion)
     list(APPEND dirs ${dirs1})
 endif ()
 
-if (TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY})
-    get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
-    list(APPEND dirs ${dirs1})
-endif ()
-
 list(REMOVE_DUPLICATES dirs)
 file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")
 foreach (dir ${dirs})
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 99b94d04473..b8029124712 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -16,13 +16,18 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
 
 set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
 
+add_subdirectory (boost-cmake)
 add_subdirectory (cctz-cmake)
 add_subdirectory (consistent-hashing-sumbur)
 add_subdirectory (consistent-hashing)
 add_subdirectory (croaring)
 add_subdirectory (FastMemcpy)
+add_subdirectory (hyperscan-cmake)
 add_subdirectory (jemalloc-cmake)
 add_subdirectory (libcpuid-cmake)
+add_subdirectory (libdivide)
+add_subdirectory (libmetrohash)
+add_subdirectory (lz4-cmake)
 add_subdirectory (murmurhash)
 add_subdirectory (replxx-cmake)
 add_subdirectory (ryu-cmake)
@@ -33,14 +38,6 @@ add_subdirectory (poco-cmake)
 
 # TODO: refactor the contrib libraries below this comment.
 
-if (USE_INTERNAL_BOOST_LIBRARY)
-    add_subdirectory (boost-cmake)
-endif ()
-
-if (USE_INTERNAL_LZ4_LIBRARY)
-    add_subdirectory (lz4-cmake)
-endif ()
-
 if (USE_INTERNAL_ZSTD_LIBRARY)
     add_subdirectory (zstd-cmake)
 endif ()
@@ -63,10 +60,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
     add_subdirectory (libfarmhash)
 endif ()
 
-if (USE_INTERNAL_METROHASH_LIBRARY)
-    add_subdirectory (libmetrohash)
-endif ()
-
 if (USE_INTERNAL_BTRIE_LIBRARY)
     add_subdirectory (libbtrie)
 endif ()
@@ -294,18 +287,6 @@ if (USE_BASE64)
     add_subdirectory (base64-cmake)
 endif()
 
-if (USE_INTERNAL_HYPERSCAN_LIBRARY)
-    # The library is large - avoid bloat.
-    if (USE_STATIC_LIBRARIES)
-        add_subdirectory (hyperscan)
-        target_compile_options (hs PRIVATE -g0)
-    else ()
-        set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
-        add_subdirectory (hyperscan)
-        target_compile_options (hs_shared PRIVATE -g0)
-    endif ()
-endif()
-
 if (USE_SIMDJSON)
     add_subdirectory (simdjson-cmake)
 endif()
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 46c6b0e3918..afcdae68e77 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -47,7 +47,8 @@ set(thriftcpp_threads_SOURCES
         )
 add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
 set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
-target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS})
+target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src)
+target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only)
 
 
 # === orc
@@ -146,7 +147,7 @@ add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
 add_dependencies(metadata_fbs flatc)
 
 # arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
-# Apple Clang compiler failed to compile this code without specifying c++11 standard. 
+# Apple Clang compiler failed to compile this code without specifying c++11 standard.
 # As result these compiler features detected as absent. In result it failed to compile orc itself.
 # In orc makefile there is code that sets flags, but arrow-cmake ignores these flags.
 if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
@@ -286,10 +287,6 @@ set(ARROW_SRCS ${ARROW_SRCS}
         ${LIBRARY_DIR}/compute/kernels/util_internal.cc
         )
 
-if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
-    set(ARROW_WITH_LZ4 1)
-endif ()
-
 if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
     set(ARROW_WITH_SNAPPY 1)
 endif ()
@@ -302,10 +299,8 @@ if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
     set(ARROW_WITH_ZSTD 1)
 endif ()
 
-if (ARROW_WITH_LZ4)
-    add_definitions(-DARROW_WITH_LZ4)
-    SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
-endif ()
+add_definitions(-DARROW_WITH_LZ4)
+SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
 
 if (ARROW_WITH_SNAPPY)
     add_definitions(-DARROW_WITH_SNAPPY)
@@ -328,18 +323,15 @@ add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
 # Arrow dependencies
 add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs)
 
-target_link_libraries(${ARROW_LIBRARY} PRIVATE boost_system_internal boost_filesystem_internal boost_regex_internal)
-target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY})
+target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)
 
 if (USE_INTERNAL_PROTOBUF_LIBRARY)
     add_dependencies(${ARROW_LIBRARY} protoc)
 endif ()
 
-target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS})
+target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
 target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
-if (ARROW_WITH_LZ4)
-    target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY})
-endif ()
+target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4)
 if (ARROW_WITH_SNAPPY)
     target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY})
 endif ()
@@ -396,8 +388,7 @@ list(APPEND PARQUET_SRCS
 add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
 target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
 include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
-target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY})
-target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS})
+target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex)
 
 if (SANITIZE STREQUAL "undefined")
     target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined)
diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt
index f544b3c50cd..052a19ee804 100644
--- a/contrib/avro-cmake/CMakeLists.txt
+++ b/contrib/avro-cmake/CMakeLists.txt
@@ -45,13 +45,12 @@ set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_V
 
 target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR})
 
-target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS})
-target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY})
+target_link_libraries (avrocpp PRIVATE boost::headers_only boost::iostreams)
 
 if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
     target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
     target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
-    target_link_libraries (avrocpp ${SNAPPY_LIBRARY})
+    target_link_libraries (avrocpp PRIVATE ${SNAPPY_LIBRARY})
 endif ()
 
 if (COMPILER_GCC)
@@ -67,4 +66,4 @@ ADD_CUSTOM_TARGET(avro_symlink_headers ALL
     COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include
     COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro
 )
-add_dependencies(avrocpp avro_symlink_headers)
\ No newline at end of file
+add_dependencies(avrocpp avro_symlink_headers)
diff --git a/contrib/aws b/contrib/aws
index fb5c604525f..f7d9ce39f41 160000
--- a/contrib/aws
+++ b/contrib/aws
@@ -1 +1 @@
-Subproject commit fb5c604525f5151d75a856462653e7e38b559b79
+Subproject commit f7d9ce39f41323300044567be007c233338bb94a
diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt
index 582cc84a552..fb7b236d30d 100644
--- a/contrib/boost-cmake/CMakeLists.txt
+++ b/contrib/boost-cmake/CMakeLists.txt
@@ -1,45 +1,101 @@
-# Supported contrib/boost source variants:
-# 1. Default - Minimized vrsion from release archive : https://github.com/ClickHouse-Extras/boost
-# 2. Release archive unpacked to contrib/boost
-# 3. Full boost https://github.com/boostorg/boost
+option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ${NOT_UNBUNDLED})
 
-# if boostorg/boost connected as submodule: Update all boost internal submodules to tag:
-# git submodule foreach "git fetch --all && git checkout boost-1.66.0 || true"
+if (USE_INTERNAL_BOOST_LIBRARY)
+    set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
 
-#
-# Important boost patch: 094c18b
-#
+    # filesystem
 
-include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
+    set (SRCS_FILESYSTEM
+        ${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/operations.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/path.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/portability.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp
+    )
 
-set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
+    add_library (_boost_filesystem ${SRCS_FILESYSTEM})
+    add_library (boost::filesystem ALIAS _boost_filesystem)
+    target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
 
-if(NOT MSVC)
-    add_definitions(-Wno-unused-variable -Wno-deprecated-declarations)
-endif()
+    # headers-only
 
-macro(add_boost_lib lib_name)
-    add_headers_and_sources(boost_${lib_name} ${LIBRARY_DIR}/libs/${lib_name}/src)
-    add_library(boost_${lib_name}_internal ${boost_${lib_name}_sources})
-    target_include_directories(boost_${lib_name}_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
-    target_compile_definitions(boost_${lib_name}_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
-endmacro()
+    add_library (_boost_headers_only INTERFACE)
+    add_library (boost::headers_only ALIAS _boost_headers_only)
+    target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR})
 
-add_boost_lib(system)
+    # iostreams
 
-add_boost_lib(program_options)
+    set (SRCS_IOSTREAMS
+        ${LIBRARY_DIR}/libs/iostreams/src/file_descriptor.cpp
+        ${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp
+        ${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp
+        ${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp
+    )
 
-add_boost_lib(filesystem)
-target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal)
+    add_library (_boost_iostreams ${SRCS_IOSTREAMS})
+    add_library (boost::iostreams ALIAS _boost_iostreams)
+    target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
+    target_link_libraries (_boost_iostreams PRIVATE zlib)
 
-#add_boost_lib(random)
+    # program_options
 
-if (USE_INTERNAL_PARQUET_LIBRARY)
-    add_boost_lib(regex)
-endif()
+    set (SRCS_PROGRAM_OPTIONS
+        ${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/config_file.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/convert.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/options_description.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/parsers.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/split.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/winmain.cpp
+    )
 
-if (USE_INTERNAL_AVRO_LIBRARY)
-    add_boost_lib(iostreams)
-    target_link_libraries(boost_iostreams_internal PUBLIC ${ZLIB_LIBRARIES})
-    target_include_directories(boost_iostreams_internal SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR})
-endif()
+    add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS})
+    add_library (boost::program_options ALIAS _boost_program_options)
+    target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
+
+    # regex
+
+    set (SRCS_REGEX
+        ${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/cregex.cpp
+        ${LIBRARY_DIR}/libs/regex/src/fileiter.cpp
+        ${LIBRARY_DIR}/libs/regex/src/icu.cpp
+        ${LIBRARY_DIR}/libs/regex/src/instances.cpp
+        ${LIBRARY_DIR}/libs/regex/src/internals.hpp
+        ${LIBRARY_DIR}/libs/regex/src/posix_api.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex.cpp
+        ${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp
+        ${LIBRARY_DIR}/libs/regex/src/usinstances.cpp
+        ${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp
+        ${LIBRARY_DIR}/libs/regex/src/winstances.cpp
+    )
+
+    add_library (_boost_regex ${SRCS_REGEX})
+    add_library (boost::regex ALIAS _boost_regex)
+    target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR})
+
+    # system
+
+    set (SRCS_SYSTEM
+        ${LIBRARY_DIR}/libs/system/src/error_code.cpp
+    )
+
+    add_library (_boost_system ${SRCS_SYSTEM})
+    add_library (boost::system ALIAS _boost_system)
+    target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
+else ()
+    message (FATAL_ERROR "TODO: external Boost library is not supported!")
+endif ()
diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt
index 2725eaf7a77..9f512974948 100644
--- a/contrib/cppkafka-cmake/CMakeLists.txt
+++ b/contrib/cppkafka-cmake/CMakeLists.txt
@@ -1,31 +1,33 @@
-set(CPPKAFKA_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)
+set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)
 
 set(SRCS
-  ${CPPKAFKA_DIR}/src/configuration.cpp
-  ${CPPKAFKA_DIR}/src/topic_configuration.cpp
-  ${CPPKAFKA_DIR}/src/configuration_option.cpp
-  ${CPPKAFKA_DIR}/src/exceptions.cpp
-  ${CPPKAFKA_DIR}/src/topic.cpp
-  ${CPPKAFKA_DIR}/src/buffer.cpp
-  ${CPPKAFKA_DIR}/src/queue.cpp
-  ${CPPKAFKA_DIR}/src/message.cpp
-  ${CPPKAFKA_DIR}/src/message_timestamp.cpp
-  ${CPPKAFKA_DIR}/src/message_internal.cpp
-  ${CPPKAFKA_DIR}/src/topic_partition.cpp
-  ${CPPKAFKA_DIR}/src/topic_partition_list.cpp
-  ${CPPKAFKA_DIR}/src/metadata.cpp
-  ${CPPKAFKA_DIR}/src/group_information.cpp
-  ${CPPKAFKA_DIR}/src/error.cpp
-  ${CPPKAFKA_DIR}/src/event.cpp
-
-  ${CPPKAFKA_DIR}/src/kafka_handle_base.cpp
-  ${CPPKAFKA_DIR}/src/producer.cpp
-  ${CPPKAFKA_DIR}/src/consumer.cpp
+    ${LIBRARY_DIR}/src/buffer.cpp
+    ${LIBRARY_DIR}/src/configuration_option.cpp
+    ${LIBRARY_DIR}/src/configuration.cpp
+    ${LIBRARY_DIR}/src/consumer.cpp
+    ${LIBRARY_DIR}/src/error.cpp
+    ${LIBRARY_DIR}/src/event.cpp
+    ${LIBRARY_DIR}/src/exceptions.cpp
+    ${LIBRARY_DIR}/src/group_information.cpp
+    ${LIBRARY_DIR}/src/kafka_handle_base.cpp
+    ${LIBRARY_DIR}/src/message_internal.cpp
+    ${LIBRARY_DIR}/src/message_timestamp.cpp
+    ${LIBRARY_DIR}/src/message.cpp
+    ${LIBRARY_DIR}/src/metadata.cpp
+    ${LIBRARY_DIR}/src/producer.cpp
+    ${LIBRARY_DIR}/src/queue.cpp
+    ${LIBRARY_DIR}/src/topic_configuration.cpp
+    ${LIBRARY_DIR}/src/topic_partition_list.cpp
+    ${LIBRARY_DIR}/src/topic_partition.cpp
+    ${LIBRARY_DIR}/src/topic.cpp
 )
 
 add_library(cppkafka ${SRCS})
 
-target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY})
-target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka)
-target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS})
-target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include)
+target_link_libraries(cppkafka
+    PRIVATE
+        ${RDKAFKA_LIBRARY}
+        boost::headers_only
+)
+target_include_directories(cppkafka PRIVATE ${LIBRARY_DIR}/include/cppkafka)
+target_include_directories(cppkafka SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)
diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..bed774afdbf
--- /dev/null
+++ b/contrib/hyperscan-cmake/CMakeLists.txt
@@ -0,0 +1,252 @@
+option (ENABLE_HYPERSCAN "Enable hyperscan library" ${ENABLE_LIBRARIES})
+
+if (NOT HAVE_SSSE3)
+    set (ENABLE_HYPERSCAN OFF)
+endif ()
+
+if (ENABLE_HYPERSCAN)
+    option (USE_INTERNAL_HYPERSCAN_LIBRARY "Use internal hyperscan library" ${NOT_UNBUNDLED})
+
+    if (USE_INTERNAL_HYPERSCAN_LIBRARY)
+        set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan)
+
+        set (SRCS
+            ${LIBRARY_DIR}/src/alloc.c
+            ${LIBRARY_DIR}/src/compiler/asserts.cpp
+            ${LIBRARY_DIR}/src/compiler/compiler.cpp
+            ${LIBRARY_DIR}/src/compiler/error.cpp
+            ${LIBRARY_DIR}/src/crc32.c
+            ${LIBRARY_DIR}/src/database.c
+            ${LIBRARY_DIR}/src/fdr/engine_description.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr.c
+            ${LIBRARY_DIR}/src/fdr/flood_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/teddy_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp
+            ${LIBRARY_DIR}/src/fdr/teddy.c
+            ${LIBRARY_DIR}/src/grey.cpp
+            ${LIBRARY_DIR}/src/hs_valid_platform.c
+            ${LIBRARY_DIR}/src/hs_version.c
+            ${LIBRARY_DIR}/src/hs.cpp
+            ${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp
+            ${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp
+            ${LIBRARY_DIR}/src/hwlm/hwlm.c
+            ${LIBRARY_DIR}/src/hwlm/noodle_build.cpp
+            ${LIBRARY_DIR}/src/hwlm/noodle_engine.c
+            ${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp
+            ${LIBRARY_DIR}/src/nfa/accel.c
+            ${LIBRARY_DIR}/src/nfa/accelcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/castle.c
+            ${LIBRARY_DIR}/src/nfa/castlecompile.cpp
+            ${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp
+            ${LIBRARY_DIR}/src/nfa/dfa_min.cpp
+            ${LIBRARY_DIR}/src/nfa/gough.c
+            ${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp
+            ${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp
+            ${LIBRARY_DIR}/src/nfa/goughcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/lbr.c
+            ${LIBRARY_DIR}/src/nfa/limex_64.c
+            ${LIBRARY_DIR}/src/nfa/limex_accel.c
+            ${LIBRARY_DIR}/src/nfa/limex_compile.cpp
+            ${LIBRARY_DIR}/src/nfa/limex_native.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd128.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd256.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd384.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd512.c
+            ${LIBRARY_DIR}/src/nfa/mcclellan.c
+            ${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp
+            ${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp
+            ${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp
+            ${LIBRARY_DIR}/src/nfa/mcsheng_data.c
+            ${LIBRARY_DIR}/src/nfa/mcsheng.c
+            ${LIBRARY_DIR}/src/nfa/mpv.c
+            ${LIBRARY_DIR}/src/nfa/mpvcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c
+            ${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp
+            ${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp
+            ${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp
+            ${LIBRARY_DIR}/src/nfa/rdfa.cpp
+            ${LIBRARY_DIR}/src/nfa/repeat.c
+            ${LIBRARY_DIR}/src/nfa/repeatcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/sheng.c
+            ${LIBRARY_DIR}/src/nfa/shengcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/shufti.c
+            ${LIBRARY_DIR}/src/nfa/shufticompile.cpp
+            ${LIBRARY_DIR}/src/nfa/tamarama.c
+            ${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp
+            ${LIBRARY_DIR}/src/nfa/truffle.c
+            ${LIBRARY_DIR}/src/nfa/trufflecompile.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_builder.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_calc_components.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_cyclic_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_depth.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_dominators.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_edge_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_equivalence.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_execute.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_expr_info.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_extparam.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_fixed_width.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_fuzzy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_haig.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_mcclellan.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_misc_opt.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_netflow.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_prefilter.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_region.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_som.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_split.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_uncalc_components.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_utf8.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_util.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_width.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng.cpp
+            ${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp
+            ${LIBRARY_DIR}/src/parser/buildstate.cpp
+            ${LIBRARY_DIR}/src/parser/check_refs.cpp
+            ${LIBRARY_DIR}/src/parser/Component.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentAtomicGroup.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentBackReference.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentBoundary.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentByte.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentClass.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentEUS.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentSequence.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp
+            ${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp
+            ${LIBRARY_DIR}/src/parser/control_verbs.cpp
+            ${LIBRARY_DIR}/src/parser/logical_combination.cpp
+            ${LIBRARY_DIR}/src/parser/parse_error.cpp
+            ${LIBRARY_DIR}/src/parser/parser_util.cpp
+            ${LIBRARY_DIR}/src/parser/Parser.cpp
+            ${LIBRARY_DIR}/src/parser/prefilter.cpp
+            ${LIBRARY_DIR}/src/parser/shortcut_literal.cpp
+            ${LIBRARY_DIR}/src/parser/ucp_table.cpp
+            ${LIBRARY_DIR}/src/parser/unsupported.cpp
+            ${LIBRARY_DIR}/src/parser/utf8_validate.cpp
+            ${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp
+            ${LIBRARY_DIR}/src/rose/block.c
+            ${LIBRARY_DIR}/src/rose/catchup.c
+            ${LIBRARY_DIR}/src/rose/init.c
+            ${LIBRARY_DIR}/src/rose/match.c
+            ${LIBRARY_DIR}/src/rose/program_runtime.c
+            ${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_add.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_castle.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_compile.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_convert.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_dedupe.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_engine_blob.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_exclusive.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_groups.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_infix.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_instructions.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_lit_accel.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_long_lit.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_lookaround.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_merge.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_misc.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_program.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_width.cpp
+            ${LIBRARY_DIR}/src/rose/rose_in_util.cpp
+            ${LIBRARY_DIR}/src/rose/stream.c
+            ${LIBRARY_DIR}/src/runtime.c
+            ${LIBRARY_DIR}/src/scratch.c
+            ${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp
+            ${LIBRARY_DIR}/src/som/slot_manager.cpp
+            ${LIBRARY_DIR}/src/som/som_runtime.c
+            ${LIBRARY_DIR}/src/som/som_stream.c
+            ${LIBRARY_DIR}/src/stream_compress.c
+            ${LIBRARY_DIR}/src/util/alloc.cpp
+            ${LIBRARY_DIR}/src/util/charreach.cpp
+            ${LIBRARY_DIR}/src/util/clique.cpp
+            ${LIBRARY_DIR}/src/util/compile_context.cpp
+            ${LIBRARY_DIR}/src/util/compile_error.cpp
+            ${LIBRARY_DIR}/src/util/cpuid_flags.c
+            ${LIBRARY_DIR}/src/util/depth.cpp
+            ${LIBRARY_DIR}/src/util/fatbit_build.cpp
+            ${LIBRARY_DIR}/src/util/multibit_build.cpp
+            ${LIBRARY_DIR}/src/util/multibit.c
+            ${LIBRARY_DIR}/src/util/report_manager.cpp
+            ${LIBRARY_DIR}/src/util/simd_utils.c
+            ${LIBRARY_DIR}/src/util/state_compress.c
+            ${LIBRARY_DIR}/src/util/target_info.cpp
+            ${LIBRARY_DIR}/src/util/ue2string.cpp
+        )
+
+        add_library (hyperscan ${SRCS})
+
+        target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
+        target_compile_options (hyperscan
+            PRIVATE -g0 -march=corei7 # library has too much debug information
+            PUBLIC -Wno-documentation
+        )
+        target_include_directories (hyperscan
+            PRIVATE
+                common
+                ${LIBRARY_DIR}/include
+            PUBLIC
+                ${LIBRARY_DIR}/src
+        )
+        if (ARCH_AMD64)
+            target_include_directories (hyperscan PRIVATE x86_64)
+        endif ()
+        target_link_libraries (hyperscan PRIVATE boost::headers_only)
+    else ()
+        find_library (LIBRARY_HYPERSCAN hs)
+        find_path (INCLUDE_HYPERSCAN NAMES hs.h HINTS /usr/include/hs) # Ubuntu puts headers in this folder
+
+        add_library (hyperscan UNKNOWN IMPORTED GLOBAL)
+        set_target_properties (hyperscan PROPERTIES IMPORTED_LOCATION ${LIBRARY_HYPERSCAN})
+        set_target_properties (hyperscan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_HYPERSCAN})
+        set_property(TARGET hyperscan APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_HYPERSCAN=1)
+    endif ()
+
+    message (STATUS "Using hyperscan")
+else ()
+    add_library (hyperscan INTERFACE)
+    target_compile_definitions (hyperscan INTERFACE USE_HYPERSCAN=0)
+
+    message (STATUS "Not using hyperscan")
+endif ()
diff --git a/contrib/hyperscan-cmake/common/hs_version.h b/contrib/hyperscan-cmake/common/hs_version.h
new file mode 100644
index 00000000000..f6fa8cb209f
--- /dev/null
+++ b/contrib/hyperscan-cmake/common/hs_version.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HS_VERSION_H_C6428FAF8E3713
+#define HS_VERSION_H_C6428FAF8E3713
+
+/**
+ * A version string to identify this release of Hyperscan.
+ */
+#define HS_VERSION_STRING "5.1.1 2000-01-01"
+
+#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (1 << 8) | 0)
+
+#endif /* HS_VERSION_H_C6428FAF8E3713 */
+
diff --git a/contrib/hyperscan-cmake/x86_64/config.h b/contrib/hyperscan-cmake/x86_64/config.h
new file mode 100644
index 00000000000..4786e3f4e21
--- /dev/null
+++ b/contrib/hyperscan-cmake/x86_64/config.h
@@ -0,0 +1,106 @@
+/* used by cmake */
+
+#ifndef CONFIG_H_
+#define CONFIG_H_
+
+/* "Define if the build is 32 bit" */
+/* #undef ARCH_32_BIT */
+
+/* "Define if the build is 64 bit" */
+#define ARCH_64_BIT
+
+/* "Define if building for IA32" */
+/* #undef ARCH_IA32 */
+
+/* "Define if building for EM64T" */
+#define ARCH_X86_64
+
+/* internal build, switch on dump support. */
+/* #undef DUMP_SUPPORT */
+
+/* Define if building "fat" runtime. */
+/* #undef FAT_RUNTIME */
+
+/* Define if building AVX-512 in the fat runtime. */
+/* #undef BUILD_AVX512 */
+
+/* Define to 1 if `backtrace' works. */
+#define HAVE_BACKTRACE
+
+/* C compiler has __builtin_assume_aligned */
+#define HAVE_CC_BUILTIN_ASSUME_ALIGNED
+
+/* C++ compiler has __builtin_assume_aligned */
+#define HAVE_CXX_BUILTIN_ASSUME_ALIGNED
+
+/* C++ compiler has x86intrin.h */
+#define HAVE_CXX_X86INTRIN_H
+
+/* C compiler has x86intrin.h */
+#define HAVE_C_X86INTRIN_H
+
+/* C++ compiler has intrin.h */
+/* #undef HAVE_CXX_INTRIN_H */
+
+/* C compiler has intrin.h */
+/* #undef HAVE_C_INTRIN_H */
+
+/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
+   0 if you don't. */
+/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */
+
+/* #undef HAVE_PTHREAD_NP_H */
+
+/* Define to 1 if you have the `malloc_info' function. */
+/* #undef HAVE_MALLOC_INFO */
+
+/* Define to 1 if you have the `memmem' function. */
+/* #undef HAVE_MEMMEM */
+
+/* Define to 1 if you have a working `mmap' system call. */
+#define HAVE_MMAP
+
+/* Define to 1 if `posix_memalign' works. */
+#define HAVE_POSIX_MEMALIGN
+
+/* Define to 1 if you have the `setrlimit' function. */
+#define HAVE_SETRLIMIT
+
+/* Define to 1 if you have the `shmget' function. */
+/* #undef HAVE_SHMGET */
+
+/* Define to 1 if you have the `sigaction' function. */
+#define HAVE_SIGACTION
+
+/* Define to 1 if you have the `sigaltstack' function. */
+#define HAVE_SIGALTSTACK
+
+/* Define if the sqlite3_open_v2 call is available */
+/* #undef HAVE_SQLITE3_OPEN_V2 */
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H
+
+/* Define to 1 if you have the `_aligned_malloc' function. */
+/* #undef HAVE__ALIGNED_MALLOC */
+
+/* Define if compiler has __builtin_constant_p */
+#define HAVE__BUILTIN_CONSTANT_P
+
+/* Optimize, inline critical functions */
+#define HS_OPTIMIZE
+
+#define HS_VERSION
+#define HS_MAJOR_VERSION
+#define HS_MINOR_VERSION
+#define HS_PATCH_VERSION
+
+#define BUILD_DATE
+
+/* define if this is a release build. */
+#define RELEASE_BUILD
+
+/* define if reverse_graph requires patch for boost 1.62.0 */
+/* #undef BOOST_REVGRAPH_PATCH */
+
+#endif /* CONFIG_H_ */
diff --git a/contrib/libdivide/CMakeLists.txt b/contrib/libdivide/CMakeLists.txt
new file mode 100644
index 00000000000..57e9f254db5
--- /dev/null
+++ b/contrib/libdivide/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library (libdivide INTERFACE)
+target_include_directories (libdivide SYSTEM BEFORE INTERFACE .)
diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt
index e68f0bacf46..4c71770f5b6 100644
--- a/contrib/libhdfs3-cmake/CMakeLists.txt
+++ b/contrib/libhdfs3-cmake/CMakeLists.txt
@@ -209,9 +209,8 @@ endif()
 target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARY})
 
 # inherit from parent cmake
-target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS})
 target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR})
-target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY})
+target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY} boost::headers_only)
 if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES)
     target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR})
     target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES})
diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt
index d71a5432715..9304cb3644c 100644
--- a/contrib/libmetrohash/CMakeLists.txt
+++ b/contrib/libmetrohash/CMakeLists.txt
@@ -1,13 +1,10 @@
-if (HAVE_SSE42) # Not used. Pretty easy to port.
-    set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp src/metrohash128crc.h)
-endif ()
-
-add_library(metrohash
-    src/metrohash.h
-    src/testvector.h
-
+set (SRCS
     src/metrohash64.cpp
     src/metrohash128.cpp
-    ${SOURCES_SSE42_ONLY})
+)
+if (HAVE_SSE42) # Not used. Pretty easy to port.
+    list (APPEND SRCS src/metrohash128crc.cpp)
+endif ()
 
-target_include_directories(metrohash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
+add_library(metrohash ${SRCS})
+target_include_directories(metrohash PUBLIC src)
diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt
index 93ef9d2357b..b8dcb0a9340 100644
--- a/contrib/librdkafka-cmake/CMakeLists.txt
+++ b/contrib/librdkafka-cmake/CMakeLists.txt
@@ -82,7 +82,7 @@ target_compile_options(rdkafka PRIVATE -fno-sanitize=undefined)
 target_include_directories(rdkafka SYSTEM PUBLIC include)
 target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR})         # Because weird logic with "include_next" is used.
 target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common)   # Because wrong path to "zstd_errors.h" is used.
-target_link_libraries(rdkafka PRIVATE ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${LIBGSASL_LIBRARY})
+target_link_libraries(rdkafka PRIVATE lz4 ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LIBGSASL_LIBRARY})
 if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY)
     target_link_libraries(rdkafka PRIVATE ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
 endif()
diff --git a/contrib/lz4-cmake/CMakeLists.txt b/contrib/lz4-cmake/CMakeLists.txt
index 856389395ca..b8121976213 100644
--- a/contrib/lz4-cmake/CMakeLists.txt
+++ b/contrib/lz4-cmake/CMakeLists.txt
@@ -1,17 +1,28 @@
-SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
+option (USE_INTERNAL_LZ4_LIBRARY "Use internal lz4 library" ${NOT_UNBUNDLED})
 
-add_library (lz4
-    ${LIBRARY_DIR}/lz4.c
-    ${LIBRARY_DIR}/lz4hc.c
-    ${LIBRARY_DIR}/lz4frame.c
-    ${LIBRARY_DIR}/lz4frame.h
-    ${LIBRARY_DIR}/xxhash.c
-    ${LIBRARY_DIR}/xxhash.h
+if (USE_INTERNAL_LZ4_LIBRARY)
+    set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4)
 
-    ${LIBRARY_DIR}/lz4.h
-    ${LIBRARY_DIR}/lz4hc.h)
+    set (SRCS
+        ${LIBRARY_DIR}/lib/lz4.c
+        ${LIBRARY_DIR}/lib/lz4hc.c
+        ${LIBRARY_DIR}/lib/lz4frame.c
+        ${LIBRARY_DIR}/lib/xxhash.c
+    )
 
-target_compile_definitions(lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1)
-target_compile_options(lz4 PRIVATE -fno-sanitize=undefined)
+    add_library (lz4 ${SRCS})
 
-target_include_directories(lz4 PUBLIC ${LIBRARY_DIR})
+    target_compile_definitions (lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1 USE_XXHASH=1)
+    if (SANITIZE STREQUAL "undefined")
+        target_compile_options (lz4 PRIVATE -fno-sanitize=undefined)
+    endif ()
+    target_include_directories(lz4 PUBLIC ${LIBRARY_DIR}/lib)
+else ()
+    find_library (LIBRARY_LZ4 lz4)
+    find_path (INCLUDE_LZ4 lz4.h)
+
+    add_library (lz4 UNKNOWN IMPORTED)
+    set_property (TARGET lz4 PROPERTY IMPORTED_LOCATION ${LIBRARY_LZ4})
+    set_property (TARGET lz4 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_LZ4})
+    set_property (TARGET lz4 APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_XXHASH=0)
+endif ()
diff --git a/contrib/replxx b/contrib/replxx
index f1332626639..2d37daaad24 160000
--- a/contrib/replxx
+++ b/contrib/replxx
@@ -1 +1 @@
-Subproject commit f1332626639d6492eaf170758642da14fbbda7bf
+Subproject commit 2d37daaad24be71e76514a36b0a47120be2f9086
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 0d1bdc2a88a..4b566ef2158 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -18,8 +18,7 @@ ccache --zero-stats ||:
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
 rm -f CMakeCache.txt
 cmake .. -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS
-ninja
-ccache --show-stats ||:
+ninja clickhouse-bundle
 mv ./programs/clickhouse* /output
 mv ./src/unit_tests_dbms /output
 find . -name '*.so' -print -exec mv '{}' /output \;
@@ -47,3 +46,4 @@ then
     rm -r /output/*
     mv "$COMBINED_OUTPUT.tgz" /output
 fi
+ccache --show-stats ||:
diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 786e6620eac..b2e4f76c00c 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -82,8 +82,8 @@ RUN apt-get --allow-unauthenticated update -y \
             libcctz-dev \
             libldap2-dev \
             libsasl2-dev \
-            heimdal-multidev
-
+            heimdal-multidev \
+            libhyperscan-dev
 
 
 # This symlink required by gcc to find lld compiler
diff --git a/docker/packager/packager b/docker/packager/packager
index 8a5bdda60e8..85dd3cc421c 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -120,6 +120,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
         result.append("CCACHE_BASEDIR=/build")
         result.append("CCACHE_NOHASHDIR=true")
         result.append("CCACHE_COMPILERCHECK=content")
+        result.append("CCACHE_MAXSIZE=15G")
         # result.append("CCACHE_UMASK=777")
 
     if distcc_hosts:
@@ -141,7 +142,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
 
     if unbundled:
         # TODO: fix build with ENABLE_RDKAFKA
-        cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0')
+        cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_BOOST_LIBRARY=1')
 
     if split_binary:
         cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1')
diff --git a/docker/test/integration/compose/docker_compose_minio.yml b/docker/test/integration/compose/docker_compose_minio.yml
index c52c45b9d69..eefbe4abff5 100644
--- a/docker/test/integration/compose/docker_compose_minio.yml
+++ b/docker/test/integration/compose/docker_compose_minio.yml
@@ -43,7 +43,10 @@ services:
 
 # Empty container to run proxy resolver.
   resolver:
-    image: python:3
+    build:
+      context: ../../../docker/test/integration/
+      dockerfile: resolver/Dockerfile
+      network: host
     ports:
       - "4083:8080"
     tty: true
diff --git a/docker/test/integration/resolver/Dockerfile b/docker/test/integration/resolver/Dockerfile
new file mode 100644
index 00000000000..37118b7a555
--- /dev/null
+++ b/docker/test/integration/resolver/Dockerfile
@@ -0,0 +1,4 @@
+# Helper docker container to run python bottle apps
+
+FROM python:3
+RUN python -m pip install bottle
\ No newline at end of file
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 209b36f59af..e63ba6122c8 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -104,13 +104,12 @@ function run_tests
         # allows the tests to pass even when we add new functions and tests for
         # them, that are not supported in the old revision.
         test_prefix=left/performance
-    elif [ "$PR_TO_TEST" != "" ] && [ "$PR_TO_TEST" != "0" ]
-    then
+    else
         # For PRs, use newer test files so we can test these changes.
         test_prefix=right/performance
 
-        # If some tests were changed in the PR, we may want to run only these
-        # ones. The list of changed tests in changed-test.txt is prepared in
+        # If only the perf tests were changed in the PR, we will run only these
+        # tests. The list of changed tests in changed-test.txt is prepared in
         # entrypoint.sh from git diffs, because it has the cloned repo.  Used
         # to use rsync for that but it was really ugly and not always correct
         # (e.g. when the reference SHA is really old and has some other
diff --git a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
index e41ab8eb75d..5dcc3c51eca 100644
--- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml
@@ -19,6 +19,5 @@
         <collect_interval_milliseconds>1000</collect_interval_milliseconds>
     </metric_log>
 
-    <use_uncompressed_cache>0</use_uncompressed_cache>
     <uncompressed_cache_size>1000000000</uncompressed_cache_size>
 </yandex>
diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
index ce1416ac9dc..6e3e3df5d39 100644
--- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
+++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
@@ -5,6 +5,7 @@
             <query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
             <allow_introspection_functions>1</allow_introspection_functions>
             <log_queries>1</log_queries>
+            <metrics_perf_events_enabled>1</metrics_perf_events_enabled>
         </default>
     </profiles>
 </yandex>
diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh
index ef62c8981e9..5afaf725c50 100755
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@@ -83,10 +83,17 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
 
 if [ "$PR_TO_TEST" != "0" ]
 then
-    # Prepare the list of tests changed in the PR for use by compare.sh. Compare to
-    # merge base, because master might be far in the future and have unrelated test
-    # changes.
-    git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST" master)" -- tests/performance | tee changed-tests.txt
+    # If the PR only changes the tests and nothing else, prepare a list of these
+    # tests for use by compare.sh. Compare to merge base, because master might be
+    # far in the future and have unrelated test changes.
+    base=$(git -C ch merge-base "$SHA_TO_TEST" master)
+    git -C ch diff --name-only "$SHA_TO_TEST" "$base" | tee changed-tests.txt
+    if grep -vq '^tests/performance' changed-tests.txt
+    then
+        # Have some other changes besides the tests, so truncate the test list,
+        # meaning, run all tests.
+        : > changed-tests.txt
+    fi
 fi
 
 # Set python output encoding so that we can print queries with Russian letters.
@@ -124,5 +131,5 @@ done
 
 dmesg -T > dmesg.log
 
-7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
+7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
 cp compare.log /output
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index ac506d046b1..308d4760b48 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -100,11 +100,20 @@ for c in connections:
 
 report_stage_end('drop1')
 
-# Apply settings
+# Apply settings.
+# If there are errors, report them and continue -- maybe a new test uses a setting
+# that is not in master, but the queries can still run. If we have multiple
+# settings and one of them throws an exception, all previous settings for this
+# connection will be reset, because the driver reconnects on error (not
+# configurable). So the end result is uncertain, but hopefully we'll be able to
+# run at least some queries.
 settings = root.findall('settings/*')
 for c in connections:
     for s in settings:
-        c.execute("set {} = '{}'".format(s.tag, s.text))
+        try:
+            c.execute("set {} = '{}'".format(s.tag, s.text))
+        except:
+            print(traceback.format_exc(), file=sys.stderr)
 
 report_stage_end('settings')
 
diff --git a/docs/en/development/build.md b/docs/en/development/build.md
index b9b22c737ac..842e565b132 100644
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@@ -5,9 +5,13 @@ toc_title: How to Build ClickHouse on Linux
 
 # How to Build ClickHouse for Development {#how-to-build-clickhouse-for-development}
 
-The following tutorial is based on the Ubuntu Linux system.
-With appropriate changes, it should also work on any other Linux distribution.
-Supported platforms: x86\_64 and AArch64. Support for Power9 is experimental.
+The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
+
+Supported platforms:
+
+-   x86\_64
+-   AArch64
+-   Power9 (experimental)
 
 ## Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}
 
@@ -21,8 +25,18 @@ Or cmake3 instead of cmake on older systems.
 
 There are several ways to do this.
 
+### Install from Repository {#install-from-repository}
+
+On Ubuntu 19.10 or newer:
+```
+$ sudo apt-get update
+$ sudo apt-get install gcc-9 g++-9
+```
+
 ### Install from a PPA Package {#install-from-a-ppa-package}
 
+On older Ubuntu:
+
 ``` bash
 $ sudo apt-get install software-properties-common
 $ sudo apt-add-repository ppa:ubuntu-toolchain-r/test
@@ -32,7 +46,7 @@ $ sudo apt-get install gcc-9 g++-9
 
 ### Install from Sources {#install-from-sources}
 
-Look at [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)
+See [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)
 
 ## Use GCC 9 for Builds {#use-gcc-9-for-builds}
 
@@ -61,7 +75,6 @@ $ mkdir build
 $ cd build
 $ cmake ..
 $ ninja
-$ cd ..
 ```
 
 To create an executable, run `ninja clickhouse`.
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 3a6774037c1..3776c9b513f 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl
 
 To install GCC on Ubuntu run: `sudo apt install gcc g++`
 
-Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
+Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9.
 
 Mac OS X build is supported only for Clang. Just run `brew install llvm`
 
@@ -245,7 +245,7 @@ The Code Style Guide: https://clickhouse.tech/docs/en/development/style/
 
 Writing tests: https://clickhouse.tech/docs/en/development/tests/
 
-List of tasks: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
+List of tasks: https://github.com/ClickHouse/ClickHouse/contribute
 
 ## Test Data {#test-data}
 
diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md
index ac23120b9cd..ee28bfda905 100644
--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@@ -60,7 +60,7 @@ Engines in the family:
 -   [Distributed](special/distributed.md#distributed)
 -   [MaterializedView](special/materializedview.md#materializedview)
 -   [Dictionary](special/dictionary.md#dictionary)
--   [Merge](special/merge.md#merge
+-   [Merge](special/merge.md#merge)
 -   [File](special/file.md#file)
 -   [Null](special/null.md#null)
 -   [Set](special/set.md#set)
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 5826adb7e9c..33d12293172 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -41,8 +41,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
     INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
     INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
 ) ENGINE = MergeTree()
+ORDER BY expr
 [PARTITION BY expr]
-[ORDER BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
 [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
@@ -58,23 +58,27 @@ For a description of parameters, see the [CREATE query description](../../../sql
 
 -   `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
 
--   `PARTITION BY` — The [partitioning key](custom-partitioning-key.md).
+-   `ORDER BY` — The sorting key.
+
+    A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
+
+    ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause. 
+    
+    Use the `ORDER BY tuple()` syntax, if you don't need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
+
+-   `PARTITION BY` — The [partitioning key](custom-partitioning-key.md). Optional.
 
     For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
 
--   `ORDER BY` — The sorting key.
-
-    A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
-
--   `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key).
+-   `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
 
     By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.
 
--   `SAMPLE BY` — An expression for sampling.
+-   `SAMPLE BY` — An expression for sampling. Optional.
 
     If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
 
--   `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes).
+-   `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.
 
     Expression must have one `Date` or `DateTime` column as a result. Example:
     `TTL date + INTERVAL 1 DAY`
@@ -83,7 +87,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
 
     For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
 
--   `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
+-   `SETTINGS` — Additional parameters that control the behavior of the `MergeTree` (optional):
 
     -   `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
     -   `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
@@ -198,6 +202,10 @@ The number of columns in the primary key is not explicitly limited. Depending on
 
 A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
 
+You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
+    
+To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
+
 ### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}
 
 It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
@@ -332,8 +340,8 @@ The `set` index can be used with all functions. Function subsets for other index
 |------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
 | [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals)                 | ✔           | ✔      | ✔           | ✔           | ✔             |
 | [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals)         | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [like](../../../sql-reference/functions/string-search-functions.md#function-like)                          | ✔           | ✔      | ✔           | ✗           | ✗             |
-| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike)                    | ✔           | ✔      | ✔           | ✗           | ✗             |
+| [like](../../../sql-reference/functions/string-search-functions.md#function-like)                          | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike)                    | ✔           | ✔      | ✗           | ✗           | ✗             |
 | [startsWith](../../../sql-reference/functions/string-functions.md#startswith)                              | ✔           | ✔      | ✔           | ✔           | ✗             |
 | [endsWith](../../../sql-reference/functions/string-functions.md#endswith)                                  | ✗           | ✗      | ✔           | ✔           | ✗             |
 | [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany)      | ✗           | ✗      | ✔           | ✗           | ✗             |
@@ -349,7 +357,8 @@ The `set` index can be used with all functions. Function subsets for other index
 
 Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization.
 
-Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example:
+!!! note "Note"
+    Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example:
 
 -   Can be optimized:
     -   `s LIKE '%test%'`
@@ -652,4 +661,3 @@ After the completion of background merges and mutations, old parts are removed o
 During this time, they are not moved to other volumes or disks. Therefore, until the parts are finally removed, they are still taken into account for evaluation of the occupied disk space.
 
 [Original article](https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/) <!--hide-->
-
diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md
index 423f7d1ef33..72316284e3b 100644
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@@ -31,6 +31,7 @@ For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tab
 ## Manipulations with Parts {#manipulations-with-parts}
 
 ClickHouse allows using the `ALTER TABLE ... FREEZE PARTITION ...` query to create a local copy of table partitions. This is implemented using hardlinks to the `/var/lib/clickhouse/shadow/` folder, so it usually does not consume extra disk space for old data. The created copies of files are not handled by ClickHouse server, so you can just leave them there: you will have a simple backup that doesn’t require any additional external system, but it will still be prone to hardware issues. For this reason, it’s better to remotely copy them to another location and then remove the local copies. Distributed filesystems and object stores are still a good options for this, but normal attached file servers with a large enough capacity might work as well (in this case the transfer will occur via the network filesystem or maybe [rsync](https://en.wikipedia.org/wiki/Rsync)).
+Data can be restored from backup using the `ALTER TABLE ... ATTACH PARTITION ...`
 
 For more information about queries related to partition manipulations, see the [ALTER documentation](../sql-reference/statements/alter.md#alter_manipulations-with-partitions).
 
diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md
index 30ea1f2e562..e1f9e427413 100644
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@@ -18,7 +18,7 @@ If `replace` is specified, it replaces the entire element with the specified one
 
 If `remove` is specified, it deletes the element.
 
-The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)).
+The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)).
 
 Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
 
diff --git a/docs/en/operations/system-tables.md b/docs/en/operations/system-tables.md
index d3d58834e60..7b76f737824 100644
--- a/docs/en/operations/system-tables.md
+++ b/docs/en/operations/system-tables.md
@@ -18,9 +18,11 @@ System tables:
 - Available only for reading data.
 - Can't be dropped or altered, but can be detached.
 
-Most of system tables store their data in RAM. ClickHouse server creates such system tables at the start.
+Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
 
-The [metric_log](#system_tables-metric_log), [query_log](#system_tables-query_log), [query_thread_log](#system_tables-query_thread_log), [trace_log](#system_tables-trace_log) system tables store data in a storage filesystem. You can alter them or remove from a disk manually. If you remove one of that tables from a disk, the ClickHouse server creates the table again at the time of the next recording. A storage period for these tables is not limited, and ClickHouse server doesn't delete their data automatically. You need to organize removing of outdated logs by yourself. For example, you can use [TTL](../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. 
+Unlike other system tables, the system tables [metric_log](#system_tables-metric_log), [query_log](#system_tables-query_log), [query_thread_log](#system_tables-query_thread_log), [trace_log](#system_tables-trace_log) are served by [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
+
+By default, table growth is unlimited. To control a size of a table, you can use [TTL](../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.
 
 
 ### Sources of System Metrics {#system-tables-sources-of-system-metrics}
@@ -636,9 +638,9 @@ You can change settings of queries logging in the [query_log](server-configurati
 
 You can disable queries logging by setting [log_queries = 0](settings/settings.md#settings-log-queries). We don't recommend to turn off logging because information in this table is important for solving issues.
 
-The flushing period of logs is set in `flush_interval_milliseconds` parameter of the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing logs, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query.
+The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query.
 
-ClickHouse doesn't delete logs from the table automatically. See [Introduction](#system-tables-introduction) for more details.
+ClickHouse doesn't delete data from the table automatically. See [Introduction](#system-tables-introduction) for more details.
 
 The `system.query_log` table registers two kinds of queries:
 
@@ -766,68 +768,117 @@ Settings.Values:      ['0','random','1','10000000000']
 
 ## system.query_thread_log {#system_tables-query_thread_log}
 
-The table contains information about each query execution thread.
+Contains information about threads which execute queries, for example, thread name, thread start time, duration of query processing.
 
-ClickHouse creates this table only if the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in.
+To start logging:
 
-To enable query logging, set the [log\_query\_threads](settings/settings.md#settings-log-query-threads) parameter to 1. For details, see the [Settings](settings/settings.md) section.
+1. Configure parameters in the [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
+2. Set [log_query_threads](settings/settings.md#settings-log-query-threads) to 1.
+
+The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query.
+
+ClickHouse doesn't delete data from the table automatically. See [Introduction](#system-tables-introduction) for more details.
 
 Columns:
 
--   `event_date` (Date) — the date when the thread has finished execution of the query.
--   `event_time` (DateTime) — the date and time when the thread has finished execution of the query.
--   `query_start_time` (DateTime) — Start time of query execution.
--   `query_duration_ms` (UInt64) — Duration of query execution.
--   `read_rows` (UInt64) — Number of read rows.
--   `read_bytes` (UInt64) — Number of read bytes.
--   `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
--   `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
--   `memory_usage` (Int64) — The difference between the amount of allocated and freed memory in context of this thread.
--   `peak_memory_usage` (Int64) — The maximum difference between the amount of allocated and freed memory in context of this thread.
--   `thread_name` (String) — Name of the thread.
--   `thread_number` (UInt32) — Internal thread ID.
--   `os_thread_id` (Int32) — OS thread ID.
--   `master_thread_id` (UInt64) — OS initial ID of initial thread.
--   `query` (String) — Query string.
--   `is_initial_query` (UInt8) — Query type. Possible values:
+-   `event_date` ([Date](../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
+-   `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
+-   `query_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Start time of query execution.
+-   `query_duration_ms` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
+-   `read_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows.
+-   `read_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes.
+-   `written_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
+-   `written_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
+-   `memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
+-   `peak_memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
+-   `thread_name` ([String](../sql-reference/data-types/string.md)) — Name of the thread.
+-   `thread_number` ([UInt32](../sql-reference/data-types/int-uint.md)) — Internal thread ID.
+-   `thread_id` ([Int32](../sql-reference/data-types/int-uint.md)) — thread ID.
+-   `master_thread_id` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
+-   `query` ([String](../sql-reference/data-types/string.md)) — Query string.
+-   `is_initial_query` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
     -   1 — Query was initiated by the client.
     -   0 — Query was initiated by another query for distributed query execution.
--   `user` (String) — Name of the user who initiated the current query.
--   `query_id` (String) — ID of the query.
--   `address` (IPv6) — IP address that was used to make the query.
--   `port` (UInt16) — The client port that was used to make the query.
--   `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution).
--   `initial_query_id` (String) — ID of the initial query (for distributed query execution).
--   `initial_address` (IPv6) — IP address that the parent query was launched from.
--   `initial_port` (UInt16) — The client port that was used to make the parent query.
--   `interface` (UInt8) — Interface that the query was initiated from. Possible values:
+-   `user` ([String](../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
+-   `query_id` ([String](../sql-reference/data-types/string.md)) — ID of the query.
+-   `address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
+-   `port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query.
+-   `initial_user` ([String](../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
+-   `initial_query_id` ([String](../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
+-   `initial_address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
+-   `initial_port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query.
+-   `interface` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values:
     -   1 — TCP.
     -   2 — HTTP.
--   `os_user` (String) — OS’s username who runs [clickhouse-client](../interfaces/cli.md).
--   `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run.
--   `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name.
--   `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
--   `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
--   `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
--   `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version.
--   `http_method` (UInt8) — HTTP method that initiated the query. Possible values:
+-   `os_user` ([String](../sql-reference/data-types/string.md)) — OS’s username who runs [clickhouse-client](../interfaces/cli.md).
+-   `client_hostname` ([String](../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run.
+-   `client_name` ([String](../sql-reference/data-types/string.md)) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name.
+-   `client_revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
+-   `client_version_major` ([UInt32](../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
+-   `client_version_minor` ([UInt32](../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
+-   `client_version_patch` ([UInt32](../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version.
+-   `http_method` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values:
     -   0 — The query was launched from the TCP interface.
     -   1 — `GET` method was used.
     -   2 — `POST` method was used.
--   `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request.
--   `quota_key` (String) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`).
--   `revision` (UInt32) — ClickHouse revision.
--   `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events)
--   `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column.
+-   `http_user_agent` ([String](../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request.
+-   `quota_key` ([String](../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`).
+-   `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
+-   `ProfileEvents.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events).
+-   `ProfileEvents.Values` ([Array(UInt64)](../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column.
 
-By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query.
+**Example**
 
-When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted.
+``` sql
+ SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical
+```
 
-!!! note "Note"
-    The storage period for logs is unlimited. Logs aren’t automatically deleted from the table. You need to organize the removal of outdated logs yourself.
+``` text
+Row 1:
+──────
+event_date:           2020-05-13
+event_time:           2020-05-13 14:02:28
+query_start_time:     2020-05-13 14:02:28
+query_duration_ms:    0
+read_rows:            1
+read_bytes:           1
+written_rows:         0
+written_bytes:        0
+memory_usage:         0
+peak_memory_usage:    0
+thread_name:          QueryPipelineEx
+thread_id:            28952
+master_thread_id:     28924
+query:                SELECT 1
+is_initial_query:     1
+user:                 default
+query_id:             5e834082-6f6d-4e34-b47b-cd1934f4002a
+address:              ::ffff:127.0.0.1
+port:                 57720
+initial_user:         default
+initial_query_id:     5e834082-6f6d-4e34-b47b-cd1934f4002a
+initial_address:      ::ffff:127.0.0.1
+initial_port:         57720
+interface:            1
+os_user:              bayonet
+client_hostname:      clickhouse.ru-central1.internal
+client_name:          ClickHouse client
+client_revision:      54434
+client_version_major: 20
+client_version_minor: 4
+client_version_patch: 1
+http_method:          0
+http_user_agent:
+quota_key:
+revision:             54434
+ProfileEvents.Names:  ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds']
+ProfileEvents.Values: [1,97,81,5,81]
+...
+```
 
-You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server setting (see the `partition_by` parameter).
+**See Also**
+
+- [system.query_log](#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.
 
 ## system.trace\_log {#system_tables-trace_log}
 
diff --git a/docs/en/sql-reference/functions/comparison-functions.md b/docs/en/sql-reference/functions/comparison-functions.md
index f03dc04a21a..0b6d8b6e36e 100644
--- a/docs/en/sql-reference/functions/comparison-functions.md
+++ b/docs/en/sql-reference/functions/comparison-functions.md
@@ -22,7 +22,7 @@ Strings are compared by bytes. A shorter string is smaller than all strings that
 
 ## equals, a = b and a == b operator {#function-equals}
 
-## notEquals, a ! operator= b and a \<\> b {#function-notequals}
+## notEquals, a != b and a \<\> b operator {#function-notequals}
 
 ## less, \< operator {#function-less}
 
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 9544998334f..e4823686c68 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -5,10 +5,13 @@ toc_title: SYSTEM
 
 # SYSTEM Queries {#query-language-system}
 
+-   [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) 
 -   [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
 -   [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
 -   [DROP DNS CACHE](#query_language-system-drop-dns-cache)
 -   [DROP MARK CACHE](#query_language-system-drop-mark-cache)
+-   [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) 
+-   [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
 -   [FLUSH LOGS](#query_language-system-flush_logs)
 -   [RELOAD CONFIG](#query_language-system-reload-config)
 -   [SHUTDOWN](#query_language-system-shutdown)
@@ -18,7 +21,25 @@ toc_title: SYSTEM
 -   [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
 -   [STOP MERGES](#query_language-system-stop-merges)
 -   [START MERGES](#query_language-system-start-merges)
+-   [STOP TTL MERGES](#query_language-stop-ttl-merges) 
+-   [START TTL MERGES](#query_language-start-ttl-merges) 
+-   [STOP MOVES](#query_language-stop-moves) 
+-   [START MOVES](#query_language-start-moves) 
+-   [STOP FETCHES](#query_language-system-stop-fetches)
+-   [START FETCHES](#query_language-system-start-fetches)
+-   [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
+-   [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
+-   [SYNC REPLICA](#query_language-system-sync-replica)
+-   [RESTART REPLICA](#query_language-system-restart-replica)
+-   [RESTART REPLICAS](#query_language-system-restart-replicas) 
 
+## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} 
+Reload all [Internal dictionaries](../dictionaries/internal-dicts.md).
+By default, internal dictionaries are disabled. 
+Always returns `Ok.` regardless of the result of the internal dictionary update.
+   
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
 
 Reloads all dictionaries that have been successfully loaded before.
@@ -45,6 +66,16 @@ For more convenient (automatic) cache management, see disable\_internal\_dns\_ca
 
 Resets the mark cache. Used in development of ClickHouse and performance tests.
 
+## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
+
+Reset the uncompressed data cache. Used in development of ClickHouse and performance tests.
+For manage uncompressed data cache parameters use following server level settings [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) and query/user/profile level settings [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache)
+
+
+## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
+Reset the compiled expression cache. Used in development of ClickHouse and performance tests.
+Complied expression cache used when query/user/profile enable option [compile](../../operations/settings/settings.md#compile)
+
 ## FLUSH LOGS {#query_language-system-flush_logs}
 
 Flushes buffers of log messages to system tables (e.g. system.query\_log). Allows you to not wait 7.5 seconds when debugging.
@@ -89,6 +120,10 @@ Enables background data distribution when inserting data into distributed tables
 SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```
 
+## Managing MergeTree Tables {#query-language-system-mergetree}
+
+ClickHouse can manage background processes in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
+
 ### STOP MERGES {#query_language-system-stop-merges}
 
 Provides possibility to stop background merges for tables in the MergeTree family:
@@ -108,4 +143,110 @@ Provides possibility to start background merges for tables in the MergeTree fami
 SYSTEM START MERGES [[db.]merge_tree_family_table_name]
 ```
 
+### STOP TTL MERGES {#query_language-stop-ttl-merges}
+
+Provides possibility to stop background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists or table have not MergeTree engine. Return error when database doesn't exists:
+
+``` sql
+SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### START TTL MERGES {#query_language-start-ttl-merges} 
+
+Provides possibility to start background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists. Return error when database doesn't exists:
+
+``` sql
+SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### STOP MOVES {#query_language-stop-moves} 
+
+Provides possibility to stop background move data according to [TTL table expression with TO VOLUME or TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists. Return error when database doesn't exists:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+### START MOVES {#query_language-start-moves} 
+
+Provides possibility to start background move data according to [TTL table expression with TO VOLUME and TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Return `Ok.` even table doesn't exists. Return error when database doesn't exists:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
+
+ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables.
+
+### STOP FETCHES {#query_language-system-stop-fetches}
+Provides possibility to stop background fetches for inserted parts for tables in the `ReplicatedMergeTree` family:
+Always returns `Ok.` regardless of the table engine and even table or database doesn't exists.
+
+``` sql
+SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START FETCHES {#query_language-system-start-fetches}
+Provides possibility to start background fetches for inserted parts for tables in the `ReplicatedMergeTree` family:
+Always returns `Ok.` regardless of the table engine and even table or database doesn't exists.
+
+``` sql
+SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Provides possibility to stop background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:
+
+``` sql
+SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Provides possibility to start background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:
+
+``` sql
+SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
+Provides possibility to stop background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: 
+
+``` sql
+SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
+Provides possibility to start background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: 
+
+``` sql
+SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### SYNC REPLICA {#query_language-system-sync-replica}
+Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster. Will run until `receive_timeout` if fetches currently disabled for the table.
+
+``` sql
+SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICA {#query_language-system-restart-replica}
+Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed  
+Initialization replication quene based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations.
+
+``` sql
+SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICAS {#query_language-system-restart-replicas}
+Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed  
+
+``` sql
+SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name
+```
+
 [Original article](https://clickhouse.tech/docs/en/query_language/system/) <!--hide-->
diff --git a/docs/es/development/developer-instruction.md b/docs/es/development/developer-instruction.md
index 9c56abe33a1..8f21e851b89 100644
--- a/docs/es/development/developer-instruction.md
+++ b/docs/es/development/developer-instruction.md
@@ -141,7 +141,7 @@ Las compilaciones oficiales de Yandex actualmente usan GCC porque genera código
 
 Para instalar GCC en Ubuntu, ejecute: `sudo apt install gcc g++`
 
-Compruebe la versión de gcc: `gcc --version`. Si está por debajo de 9, siga las instrucciones aquí: https://clickhouse .tech/docs/en/development/build/\#install-gcc-9.
+Compruebe la versión de gcc: `gcc --version`. Si está por debajo de 9, siga las instrucciones aquí: https://clickhouse.tech/docs/es/development/build/#install-gcc-9.
 
 La compilación de Mac OS X solo es compatible con Clang. Sólo tiene que ejecutar `brew install llvm`
 
@@ -249,7 +249,7 @@ La Guía de estilo de código: https://clickhouse.tech/docs/en/development/style
 
 Pruebas de escritura: https://clickhouse.tech/docs/en/development/tests/
 
-Lista de tareas: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
+Lista de tareas: https://github.com/ClickHouse/ClickHouse/contribute
 
 # Datos de prueba {#test-data}
 
diff --git a/docs/fa/development/developer-instruction.md b/docs/fa/development/developer-instruction.md
index cbc4734cec8..287960aff04 100644
--- a/docs/fa/development/developer-instruction.md
+++ b/docs/fa/development/developer-instruction.md
@@ -143,7 +143,7 @@ toc_title: "\u062F\u0633\u062A\u0648\u0631\u0627\u0644\u0639\u0645\u0644 \u062A\
 
 برای نصب شورای همکاری خلیج فارس در اوبونتو اجرای: `sudo apt install gcc g++`
 
-بررسی نسخه شورای همکاری خلیج فارس: `gcc --version`. اگر زیر است 9, سپس دستورالعمل اینجا را دنبال کنید: https://clickhouse.فناوری / اسناد / ارتباطات / توسعه/ساختن / \#نصب شورای همکاری خلیج فارس-9.
+بررسی نسخه شورای همکاری خلیج فارس: `gcc --version`. اگر زیر است 9, سپس دستورالعمل اینجا را دنبال کنید: https://clickhouse.tech/docs/fa/development/build/#install-gcc-9.
 
 سیستم عامل مک ایکس ساخت فقط برای صدای جرنگ جرنگ پشتیبانی می شود. فقط فرار کن `brew install llvm`
 
@@ -251,7 +251,7 @@ KDevelop و QTCreator دیگر از جایگزین های بسیار خوبی ا
 
 تست نوشتن: https://clickhouse.فناوری / اسناد/توسعه/تست/
 
-فهرست تکلیفها: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
+فهرست تکلیفها: https://github.com/ClickHouse/ClickHouse/contribute
 
 # داده های تست {#test-data}
 
diff --git a/docs/fr/development/developer-instruction.md b/docs/fr/development/developer-instruction.md
index 414cfc1d339..a20066fa3f7 100644
--- a/docs/fr/development/developer-instruction.md
+++ b/docs/fr/development/developer-instruction.md
@@ -141,7 +141,7 @@ Les builds officiels de Yandex utilisent actuellement GCC car ils génèrent du
 
 Pour installer GCC sur Ubuntu Exécutez: `sudo apt install gcc g++`
 
-Vérifiez la version de gcc: `gcc --version`. Si elle est inférieure à 9, suivez les instructions ici: https://clickhouse.tech/docs/fr/développement/construction/\#install-gcc-9.
+Vérifiez la version de gcc: `gcc --version`. Si elle est inférieure à 9, suivez les instructions ici: https://clickhouse.tech/docs/fr/development/build/#install-gcc-9.
 
 Mac OS X build est pris en charge uniquement pour Clang. Il suffit d'exécuter `brew install llvm`
 
@@ -249,7 +249,7 @@ Le code Style Guide: https://clickhouse.tech/docs/fr/développement/style/
 
 Rédaction de tests: https://clickhouse.tech/docs/fr/développement/tests/
 
-Liste des tâches: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
+Liste des tâches: https://github.com/ClickHouse/ClickHouse/contribute
 
 # Des Données De Test {#test-data}
 
diff --git a/docs/ja/development/developer-instruction.md b/docs/ja/development/developer-instruction.md
index d65b25bd98c..6441e77185f 100644
--- a/docs/ja/development/developer-instruction.md
+++ b/docs/ja/development/developer-instruction.md
@@ -141,7 +141,7 @@ ClickHouseのビルドには、バージョン9以降のGCCとClangバージョ
 
 UBUNTUにGCCをインストールするには: `sudo apt install gcc g++`
 
-Gccのバージョンを確認する: `gcc --version`. の場合は下記9その指示に従う。https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
+Gccのバージョンを確認する: `gcc --version`. の場合は下記9その指示に従う。https://clickhouse.tech/docs/ja/development/build/#install-gcc-9.
 
 Mac OS XのビルドはClangでのみサポートされています。 ちょうど実行 `brew install llvm`
 
@@ -249,7 +249,7 @@ KDevelopとQTCreatorは、ClickHouseを開発するためのIDEの他の優れ
 
 筆記試験：https://clickhouse.tech/docs/en/development/tests/
 
-タスクのリスト：https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
+タスクのリスト：https://github.com/ClickHouse/ClickHouse/contribute
 
 # テストデータ {#test-data}
 
diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md
index 11ac3a73f6e..775b156dff5 100644
--- a/docs/ru/development/developer-instruction.md
+++ b/docs/ru/development/developer-instruction.md
@@ -135,7 +135,7 @@ ClickHouse использует для сборки некоторое коли
 
 Для установки GCC под Ubuntu, выполните: `sudo apt install gcc g++`.
 
-Проверьте версию gcc: `gcc --version`. Если версия меньше 9, то следуйте инструкции: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9
+Проверьте версию gcc: `gcc --version`. Если версия меньше 9, то следуйте инструкции: https://clickhouse.tech/docs/ru/development/build/#install-gcc-9.
 
 Сборка под Mac OS X поддерживается только для компилятора Clang. Чтобы установить его выполните `brew install llvm`
 
@@ -244,7 +244,7 @@ Mac OS X:
 
 Разработка тестов: https://clickhouse.tech/docs/ru/development/tests/
 
-Список задач: https://github.com/ClickHouse/ClickHouse/blob/master/tests/instructions/easy\_tasks\_sorted\_ru.md
+Список задач: https://github.com/ClickHouse/ClickHouse/contribute
 
 # Тестовые данные {#testovye-dannye}
 
diff --git a/docs/ru/operations/system-tables.md b/docs/ru/operations/system-tables.md
index 38971d6ee99..6e57e7a63f3 100644
--- a/docs/ru/operations/system-tables.md
+++ b/docs/ru/operations/system-tables.md
@@ -593,15 +593,9 @@ CurrentMetric_ReplicatedChecks:                             0
 
 Можно отключить логгирование настройкой [log_queries = 0](settings/settings.md#settings-log-queries). По-возможности, не отключайте логгирование, поскольку информация из таблицы важна при решении проблем.
 
-Период сброса логов в таблицу задаётся параметром `flush_interval_milliseconds` в конфигурационной секции [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs).
+Период сброса данных в таблицу задаётся параметром `flush_interval_milliseconds` в конфигурационной секции [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs).
 
-ClickHouse не удаляет логи из таблица автоматически. Смотрите [Введение](#system-tables-introduction).
-
-Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) (параметр `partition_by`).
-
-
-
-Если таблицу удалить вручную, она создается заново автоматически «на лету». При этом все логи на момент удаления таблицы будут убраны.
+ClickHouse не удаляет данные из таблица автоматически. Смотрите [Введение](#system-tables-introduction).
 
 Таблица `system.query_log` содержит информацию о двух видах запросов:
 
@@ -729,71 +723,116 @@ Settings.Values:      ['0','random','1','10000000000']
 
 ## system.query_thread_log {#system_tables-query_thread_log}
 
-Содержит информацию о каждом потоке выполняемых запросов.
+Содержит информацию о потоках, которые выполняют запросы, например, имя потока, время его запуска, продолжительность обработки запроса.
 
-ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы.
+Чтобы начать логирование:
 
-Чтобы включить логирование, задайте значение параметра [log\_query\_threads](settings/settings.md#settings-log-query-threads) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md#settings).
+1. Настройте параметры [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) в конфигурации сервера.
+2. Установите значение [log_query_threads](settings/settings.md#settings-log-query-threads) равным 1.
+
+Интервал сброса данных в таблицу задаётся параметром `flush_interval_milliseconds` в разделе настроек сервера [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). Чтобы принудительно записать логи из буфера памяти в таблицу, используйте запрос [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs).
+
+ClickHouse не удаляет данные из таблицы автоматически. Подробности в разделе [Введение](#system-tables-introduction).
 
 Столбцы:
 
--   `event_date` (Date) — дата завершения выполнения запроса потоком.
--   `event_time` (DateTime) — дата и время завершения выполнения запроса потоком.
--   `query_start_time` (DateTime) — время начала обработки запроса.
--   `query_duration_ms` (UInt64) — длительность обработки запроса в миллисекундах.
--   `read_rows` (UInt64) — количество прочитанных строк.
--   `read_bytes` (UInt64) — количество прочитанных байтов.
--   `written_rows` (UInt64) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0.
--   `written_bytes` (UInt64) — объём записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0.
--   `memory_usage` (Int64) — разница между выделенной и освобождённой памятью в контексте потока.
--   `peak_memory_usage` (Int64) — максимальная разница между выделенной и освобождённой памятью в контексте потока.
--   `thread_name` (String) — Имя потока.
--   `thread_id` (UInt64) — tid (ID потока операционной системы).
--   `master_thread_id` (UInt64) — tid (ID потока операционной системы) главного потока.
--   `query` (String) — текст запроса.
--   `is_initial_query` (UInt8) — вид запроса. Возможные значения:
+-   `event_date` ([Date](../sql-reference/data-types/date.md)) — дата завершения выполнения запроса потоком.
+-   `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения запроса потоком.
+-   `query_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — время начала обработки запроса.
+-   `query_duration_ms` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность обработки запроса в миллисекундах.
+-   `read_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк.
+-   `read_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байтов.
+-   `written_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0.
+-   `written_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — объём записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0.
+-   `memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — разница между выделенной и освобождённой памятью в контексте потока.
+-   `peak_memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока.
+-   `thread_name` ([String](../sql-reference/data-types/string.md)) — Имя потока.
+-   `thread_id` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — tid (ID потока операционной системы).
+-   `master_thread_id` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — tid (ID потока операционной системы) главного потока.
+-   `query` ([String](../sql-reference/data-types/string.md)) — текст запроса.
+-   `is_initial_query` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — вид запроса. Возможные значения:
     -   1 — запрос был инициирован клиентом.
     -   0 — запрос был инициирован другим запросом при распределенном запросе.
--   `user` (String) — пользователь, запустивший текущий запрос.
--   `query_id` (String) — ID запроса.
--   `address` (IPv6) — IP адрес, с которого пришел запрос.
--   `port` (UInt16) — порт, с которого пришел запрос.
--   `initial_user` (String) — пользователь, запустивший первоначальный запрос (для распределенных запросов).
--   `initial_query_id` (String) — ID родительского запроса.
--   `initial_address` (IPv6) — IP адрес, с которого пришел родительский запрос.
--   `initial_port` (UInt16) — порт, пришел родительский запрос.
--   `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения:
+-   `user` ([String](../sql-reference/data-types/string.md)) — пользователь, запустивший текущий запрос.
+-   `query_id` ([String](../sql-reference/data-types/string.md)) — ID запроса.
+-   `address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел запрос.
+-   `port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — порт, с которого пришел запрос.
+-   `initial_user` ([String](../sql-reference/data-types/string.md)) — пользователь, запустивший первоначальный запрос (для распределенных запросов).
+-   `initial_query_id` ([String](../sql-reference/data-types/string.md)) — ID родительского запроса.
+-   `initial_address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел родительский запрос.
+-   `initial_port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — порт, пришел родительский запрос.
+-   `interface` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — интерфейс, с которого ушёл запрос. Возможные значения:
     -   1 — TCP.
     -   2 — HTTP.
--   `os_user` (String) — имя пользователя в OS, который запустил [clickhouse-client](../interfaces/cli.md).
--   `client_hostname` (String) — hostname клиентской машины, с которой присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент.
--   `client_name` (String) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент.
--   `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
--   `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
--   `client_version_minor` (UInt32) — младшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
--   `client_version_patch` (UInt32) — патч [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
--   `http_method` (UInt8) — HTTP метод, инициировавший запрос. Возможные значения:
+-   `os_user` ([String](../sql-reference/data-types/string.md)) — имя пользователя в OS, который запустил [clickhouse-client](../interfaces/cli.md).
+-   `client_hostname` ([String](../sql-reference/data-types/string.md)) — hostname клиентской машины, с которой присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент.
+-   `client_name` ([String](../sql-reference/data-types/string.md)) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент.
+-   `client_revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
+-   `client_version_major` ([UInt32](../sql-reference/data-types/int-uint.md)) — старшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
+-   `client_version_minor` ([UInt32](../sql-reference/data-types/int-uint.md)) — младшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
+-   `client_version_patch` ([UInt32](../sql-reference/data-types/int-uint.md)) — патч [clickhouse-client](../interfaces/cli.md) или другого TCP клиента.
+-   `http_method` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP метод, инициировавший запрос. Возможные значения:
     -   0 — запрос запущен с интерфейса TCP.
     -   1 — `GET`.
     -   2 — `POST`.
--   `http_user_agent` (String) — HTTP заголовок `UserAgent`.
--   `quota_key` (String) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`).
--   `revision` (UInt32) — ревизия ClickHouse.
--   `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(\#system\_tables-events
--   `ProfileEvents.Values` (Array(UInt64)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`.
+-   `http_user_agent` ([String](../sql-reference/data-types/string.md)) — HTTP заголовок `UserAgent`.
+-   `quota_key` ([String](../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`).
+-   `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse.
+-   `ProfileEvents.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events).
+-   `ProfileEvents.Values` ([Array(UInt64)](../sql-reference/data-types/array.md)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`.
 
-По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`.
+**Пример**
 
-Если таблицу удалить вручную, она пересоздастся автоматически «на лету». При этом все логи на момент удаления таблицы будут удалены.
+``` sql
+ SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical
+```
 
-!!! note "Примечание"
-    Срок хранения логов не ограничен. Логи не удаляются из таблицы автоматически. Вам необходимо самостоятельно организовать удаление устаревших логов.
+``` text
+Row 1:
+──────
+event_date:           2020-05-13
+event_time:           2020-05-13 14:02:28
+query_start_time:     2020-05-13 14:02:28
+query_duration_ms:    0
+read_rows:            1
+read_bytes:           1
+written_rows:         0
+written_bytes:        0
+memory_usage:         0
+peak_memory_usage:    0
+thread_name:          QueryPipelineEx
+thread_id:            28952
+master_thread_id:     28924
+query:                SELECT 1
+is_initial_query:     1
+user:                 default
+query_id:             5e834082-6f6d-4e34-b47b-cd1934f4002a
+address:              ::ffff:127.0.0.1
+port:                 57720
+initial_user:         default
+initial_query_id:     5e834082-6f6d-4e34-b47b-cd1934f4002a
+initial_address:      ::ffff:127.0.0.1
+initial_port:         57720
+interface:            1
+os_user:              bayonet
+client_hostname:      clickhouse.ru-central1.internal
+client_name:          ClickHouse client
+client_revision:      54434
+client_version_major: 20
+client_version_minor: 4
+client_version_patch: 1
+http_method:          0
+http_user_agent:
+quota_key:
+revision:             54434
+ProfileEvents.Names:  ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds']
+ProfileEvents.Values: [1,97,81,5,81]
+...
+```
 
-Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) (параметр `partition_by`).
+**Смотрите также**
 
-## system.query_thread_log {#system_tables-query_thread_log}
-
-Содержит информацию о каждом потоке исполнения запроса.
+- [system.query_log](#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах.
 
 ## system.trace\_log {#system_tables-trace_log}
 
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md
index 3534fc4e48a..1f09eb28d2e 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md
@@ -32,7 +32,7 @@ ClickHouse поддерживает иерархические словари с
 
 ClickHouse поддерживает свойство [hierarchical](external-dicts-dict-structure.md#hierarchical-dict-attr) для атрибутов [внешнего словаря](index.md). Это свойство позволяет конфигурировать словари, подобные описанному выше.
 
-С помощью функции [dictGetHierarchy](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md#dictgethierarchy) можно получить цепочку предков элемента.
+С помощью функции [dictGetHierarchy](../../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy) можно получить цепочку предков элемента.
 
 Структура словаря для нашего примера может выглядеть следующим образом:
 
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 9256fab5e0c..368da949dc8 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -2,7 +2,7 @@
 
 Словари можно размещать в памяти множеством способов.
 
-Рекомендуем [flat](#flat), [hashed](#hashed) и [complex\_key\_hashed](#complex-key-hashed). Скорость обработки словарей при этом максимальна.
+Рекомендуем [flat](#flat), [hashed](#dicts-external_dicts_dict_layout-hashed) и [complex\_key\_hashed](#complex-key-hashed). Скорость обработки словарей при этом максимальна.
 
 Размещение с кэшированием не рекомендуется использовать из-за потенциально низкой производительности и сложностей в подборе оптимальных параметров. Читайте об этом подробнее в разделе «[cache](#cache)».
 
@@ -34,7 +34,7 @@
 </yandex>
 ```
 
-Соответствущий [DDL-запрос](../../../sql-reference/statements/create.md#create-dictionary-query):
+Соответствущий [DDL-запрос](../../statements/create.md#create-dictionary-query):
 
 ``` sql
 CREATE DICTIONARY (...)
@@ -46,7 +46,7 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings
 ## Способы размещения словарей в памяти {#sposoby-razmeshcheniia-slovarei-v-pamiati}
 
 -   [flat](#flat)
--   [hashed](#hashed)
+-   [hashed](#dicts-external_dicts_dict_layout-hashed)
 -   [sparse\_hashed](#dicts-external_dicts_dict_layout-sparse_hashed)
 -   [cache](#cache)
 -   [direct](#direct)
@@ -80,7 +80,7 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings
 LAYOUT(FLAT())
 ```
 
-### hashed {#hashed}
+### hashed {#dicts-external_dicts_dict_layout-hashed}
 
 Словарь полностью хранится в оперативной памяти в виде хэш-таблиц. Словарь может содержать произвольное количество элементов с произвольными идентификаторами. На практике, количество ключей может достигать десятков миллионов элементов.
 
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
index 4190e8e1015..e5b20f3960c 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@@ -19,7 +19,7 @@
 </yandex>
 ```
 
-Аналогичный [DDL-запрос](../../../sql-reference/statements/create.md#create-dictionary-query):
+Аналогичный [DDL-запрос](../../statements/create.md#create-dictionary-query):
 
 ``` sql
 CREATE DICTIONARY dict_name (...)
@@ -150,7 +150,7 @@ SOURCE(HTTP(
 ))
 ```
 
-Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо [настроить openSSL](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) в конфигурации сервера.
+Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо [настроить openSSL](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl) в конфигурации сервера.
 
 Поля настройки:
 
@@ -531,7 +531,7 @@ SOURCE(CLICKHOUSE(
 
 Поля настройки:
 
--   `host` — хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа [Distributed](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) и прописать её в дальнейших настройках.
+-   `host` — хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа [Distributed](../../../engines/table-engines/special/distributed.md) и прописать её в дальнейших настройках.
 -   `port` — порт сервера ClickHouse.
 -   `user` — имя пользователя ClickHouse.
 -   `password` — пароль пользователя ClickHouse.
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 27702959eac..4c3b4eb22e4 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -154,7 +154,7 @@ CREATE DICTIONARY somename (
 | Тег                                                  | Описание                                                                                                                                                                                                                                                                                                                                                      | Обязательный |
 |------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
 | `name`                                               | Имя столбца.                                                                                                                                                                                                                                                                                                                                                  | Да           |
-| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. [Nullable](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) не поддерживается. | Да           |
+| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. [Nullable](../../../sql-reference/data-types/nullable.md) не поддерживается. | Да           |
 | `null_value`                                         | Значение по умолчанию для несуществующего элемента.<br/>В примере это пустая строка. Нельзя указать значение `NULL`.                                                                                                                                                                                                                                          | Да           |
 | `expression`                                         | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.<br/>Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.<br/><br/>Значение по умолчанию: нет выражения.                                                                  | Нет          |
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`.                                                                                                                                                                                   | No           |
@@ -162,6 +162,6 @@ CREATE DICTIONARY somename (
 
 ## Смотрите также {#smotrite-takzhe}
 
--   [Функции для работы с внешними словарями](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md).
+-   [Функции для работы с внешними словарями](../../../sql-reference/functions/ext-dict-functions.md).
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/dicts/external_dicts_dict_structure/) <!--hide-->
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md
index 9eb6c8d8d86..a7d3394864b 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md
@@ -24,7 +24,7 @@ XML-конфигурация словаря имеет следующую стр
 </dictionary>
 ```
 
-Соответствующий [DDL-запрос](../../../sql-reference/statements/create.md#create-dictionary-query) имеет следующий вид:
+Соответствующий [DDL-запрос](../../statements/create.md#create-dictionary-query) имеет следующий вид:
 
 ``` sql
 CREATE DICTIONARY dict_name
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md
index 7442a5dd3be..80f717dfe93 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md
@@ -5,11 +5,11 @@
 ClickHouse:
 - Полностью или частично хранит словари в оперативной памяти.
 - Периодически обновляет их и динамически подгружает отсутствующие значения.
-- Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../../../sql-reference/statements/create.md#create-dictionary-query).
+- Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../../statements/create.md#create-dictionary-query).
 
-Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries\_config](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
+Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries\_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config).
 
-Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries\_lazy\_load](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
+Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries\_lazy\_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load).
 
 Системная таблица [system.dictionaries](../../../operations/system-tables.md#system_tables-dictionaries) содержит информацию о словарях, сконфигурированных на сервере. Для каждого словаря там можно найти:
 
@@ -41,10 +41,10 @@ ClickHouse:
 
 В одном файле можно [сконфигурировать](external-dicts-dict.md) произвольное количество словарей.
 
-Если вы создаёте внешние словари [DDL-запросами](../../../sql-reference/statements/create.md#create-dictionary-query), то не задавайте конфигурацию словаря в конфигурации сервера.
+Если вы создаёте внешние словари [DDL-запросами](../../statements/create.md#create-dictionary-query), то не задавайте конфигурацию словаря в конфигурации сервера.
 
 !!! attention "Внимание"
-    Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)). Эта функциональность не связана с внешними словарями.
+    Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../../../sql-reference/functions/other-functions.md)). Эта функциональность не связана с внешними словарями.
 
 ## Смотрите также {#ext-dicts-see-also}
 
@@ -53,6 +53,6 @@ ClickHouse:
 -   [Обновление словарей](external-dicts-dict-lifetime.md)
 -   [Источники внешних словарей](external-dicts-dict-sources.md)
 -   [Ключ и поля словаря](external-dicts-dict-structure.md)
--   [Функции для работы с внешними словарями](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md#ext_dict_functions)
+-   [Функции для работы с внешними словарями](../../../sql-reference/functions/ext-dict-functions.md)
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/dicts/external_dicts/) <!--hide-->
diff --git a/docs/ru/sql-reference/index.md b/docs/ru/sql-reference/index.md
index a13e3774b86..ea611e75995 100644
--- a/docs/ru/sql-reference/index.md
+++ b/docs/ru/sql-reference/index.md
@@ -10,7 +10,7 @@ toc_title: hidden
 -   [SELECT](statements/select/index.md)
 -   [INSERT INTO](statements/insert-into.md)
 -   [CREATE](statements/create.md)
--   [ALTER](statements/alter.md)
+-   [ALTER](statements/alter.md#query_language_queries_alter)
 -   [Прочие виды запросов](statements/misc.md)
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/) <!--hide-->
diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md
index b058739c894..1b66fa039d9 100644
--- a/docs/ru/sql-reference/statements/system.md
+++ b/docs/ru/sql-reference/statements/system.md
@@ -1,9 +1,12 @@
 # Запросы SYSTEM {#query-language-system}
 
+-   [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) 
 -   [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
 -   [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
 -   [DROP DNS CACHE](#query_language-system-drop-dns-cache)
 -   [DROP MARK CACHE](#query_language-system-drop-mark-cache)
+-   [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) 
+-   [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
 -   [FLUSH LOGS](#query_language-system-flush_logs)
 -   [RELOAD CONFIG](#query_language-system-reload-config)
 -   [SHUTDOWN](#query_language-system-shutdown)
@@ -13,7 +16,25 @@
 -   [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
 -   [STOP MERGES](#query_language-system-stop-merges)
 -   [START MERGES](#query_language-system-start-merges)
+-   [STOP TTL MERGES](#query_language-stop-ttl-merges) 
+-   [START TTL MERGES](#query_language-start-ttl-merges) 
+-   [STOP MOVES](#query_language-stop-moves) 
+-   [START MOVES](#query_language-start-moves) 
+-   [STOP FETCHES](#query_language-system-stop-fetches)
+-   [START FETCHES](#query_language-system-start-fetches)
+-   [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
+-   [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
+-   [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
+-   [SYNC REPLICA](#query_language-system-sync-replica)
+-   [RESTART REPLICA](#query_language-system-restart-replica)
+-   [RESTART REPLICAS](#query_language-system-restart-replicas) 
 
+## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} 
+Перегружет все [Встроенные словари](../dictionaries/internal-dicts.md).
+По умолчанию встроенные словари выключены. 
+Всегда возвращает `Ok.`, вне зависимости от результата обновления встроенных словарей.
+   
 ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
 
 Перегружает все словари, которые были успешно загружены до этого.
@@ -40,6 +61,16 @@ SELECT name, status FROM system.dictionaries;
 
 Сбрасывает кеш «засечек» (`mark cache`). Используется при разработке ClickHouse и тестах производительности.
 
+## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
+
+Сбрасывает кеш не сжатых данных. Используется при разработке ClickHouse и тестах производительности.
+Для управления кешем не сжатых данных используйте следующие настройки уровня сервера [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) и настройки уровня запрос/пользователь/профиль [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache)
+
+
+## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
+Сбрасывает кеш скомпилированных выражений. Используется при разработке ClickHouse и тестах производительности.
+Компилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile](../../operations/settings/settings.md#compile)
+
 ## FLUSH LOGS {#query_language-system-flush_logs}
 
 Записывает буферы логов в системные таблицы (например system.query\_log). Позволяет не ждать 7.5 секунд при отладке.
@@ -84,6 +115,10 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
 SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
 ```
 
+## Managing MergeTree Tables {#query-language-system-mergetree}
+
+ClickHouse может управлять фоновыми процессами в [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) таблицах.
+
 ### STOP MERGES {#query_language-system-stop-merges}
 
 Позволяет остановить фоновые мержи для таблиц семейства MergeTree:
@@ -103,4 +138,110 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 SYSTEM START MERGES [[db.]merge_tree_family_table_name]
 ```
 
+### STOP TTL MERGES {#query_language-stop-ttl-merges}
+
+Позволяет остановить фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### START TTL MERGES {#query_language-start-ttl-merges} 
+
+Запускает фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
+```
+
+### STOP MOVES {#query_language-stop-moves} 
+
+Позволяет остановить фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+### START MOVES {#query_language-start-moves} 
+
+Запускает фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
+Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
+
+``` sql
+SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+```
+
+## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
+
+ClickHouse может управлять фоновыми процессами связанными c репликацией в таблицах семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md).
+
+### STOP FETCHES {#query_language-system-stop-fetches}
+Позволяет остановить фоновые процессы синхронизации новыми вставленными кусками данных с другими репликами в кластере для таблиц семейства `ReplicatedMergeTree`:
+Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.
+
+``` sql
+SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START FETCHES {#query_language-system-start-fetches}
+Позволяет запустить фоновые процессы синхронизации новыми вставленными кусками данных с другими репликами в кластере для таблиц семейства `ReplicatedMergeTree`:
+Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.
+
+``` sql
+SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Позволяет остановить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:
+
+``` sql
+SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
+Позволяет запустить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:
+
+``` sql
+SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+```
+
+### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
+Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: 
+
+``` sql
+SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
+Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: 
+
+``` sql
+SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+```
+
+### SYNC REPLICA {#query_language-system-sync-replica}
+Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:  
+
+``` sql
+SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICA {#query_language-system-restart-replica}
+Реинициализация состояния Zookeeper сессий для таблицы семейства `ReplicatedMergeTree`, сравнивает текущее состояние с тем что хранится в Zookeeper как источник правды и добавляет задачи Zookeeper очередь если необходимо  
+Инициализация очереди репликации на основе данных ZooKeeper, происходит так же как при attach table. На короткое время таблица станет недоступной для любых операций.
+
+``` sql
+SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name
+```
+
+### RESTART REPLICAS {#query_language-system-restart-replicas}
+Реинициализация состояния Zookeeper сессий для всех `ReplicatedMergeTree` таблиц, сравнивает текущее состояние с тем что хранится в Zookeeper как источник правды и добавляет задачи Zookeeper очередь если необходимо
+
+``` sql
+SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/system/) <!--hide-->
diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt
index d9ea19ff389..570dcf0aaf2 100644
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@@ -2,7 +2,7 @@ Babel==2.8.0
 backports-abc==0.5
 backports.functools-lru-cache==1.6.1
 beautifulsoup4==4.9.1
-certifi==2020.4.5.1
+certifi==2020.4.5.2
 chardet==3.0.4
 click==7.1.2
 closure==20191111
@@ -13,7 +13,7 @@ idna==2.9
 Jinja2==2.11.2
 jinja2-highlight==0.6.1
 jsmin==2.2.2
-livereload==2.6.1
+livereload==2.6.2
 Markdown==3.2.1
 MarkupSafe==1.1.1
 mkdocs==1.1.2
@@ -22,7 +22,7 @@ mkdocs-macros-plugin==0.4.9
 nltk==3.5
 nose==1.3.7
 protobuf==3.12.2
-numpy==1.18.4
+numpy==1.18.5
 Pygments==2.5.2
 pymdown-extensions==7.1
 python-slugify==1.2.6
diff --git a/docs/tools/translate/requirements.txt b/docs/tools/translate/requirements.txt
index 3c212ee8bc2..0c9d44a346e 100644
--- a/docs/tools/translate/requirements.txt
+++ b/docs/tools/translate/requirements.txt
@@ -1,5 +1,5 @@
 Babel==2.8.0
-certifi==2020.4.5.1
+certifi==2020.4.5.2
 chardet==3.0.4
 googletrans==2.4.0
 idna==2.9
diff --git a/docs/tr/development/developer-instruction.md b/docs/tr/development/developer-instruction.md
index a65c6666288..0ca5f9cdd63 100644
--- a/docs/tr/development/developer-instruction.md
+++ b/docs/tr/development/developer-instruction.md
@@ -141,7 +141,7 @@ Resmi Yandex şu anda GCC'Yİ kullanıyor çünkü biraz daha iyi performansa sa
 
 Ubuntu run GCC yüklemek için: `sudo apt install gcc g++`
 
-Gcc sürümünü kontrol edin: `gcc --version`. 9'un altındaysa, buradaki talimatları izleyin: https://clickhouse.tech / docs/TR/development / build / \#ınstall-gcc-9.
+Gcc sürümünü kontrol edin: `gcc --version`. 9'un altındaysa, buradaki talimatları izleyin: https://clickhouse.tech/docs/tr/development/build/#install-gcc-9.
 
 Mac OS X build sadece Clang için desteklenir. Sadece koş `brew install llvm`
 
@@ -249,7 +249,7 @@ Kod stili Kılavuzu: https://clickhouse.tech / doscs / TR / development / style/
 
 Yazma testleri: https://clickhouse.teknoloji / doscs / TR / geliştirme / testler/
 
-Görevlerin listesi: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
+Görevlerin listesi: https://github.com/ClickHouse/ClickHouse/contribute
 
 # Test Verileri {#test-data}
 
diff --git a/docs/zh/development/developer-instruction.md b/docs/zh/development/developer-instruction.md
index 6911a0e4dc9..b40e6db3af1 100644
--- a/docs/zh/development/developer-instruction.md
+++ b/docs/zh/development/developer-instruction.md
@@ -129,7 +129,7 @@ Yandex官方当前使用GCC构建ClickHouse，因为它生成的机器代码性
 
 在Ubuntu上安装GCC，请执行：`sudo apt install gcc g++`
 
-请使用`gcc --version`查看gcc的版本。如果gcc版本低于9，请参考此处的指示：https://clickhouse.tech/docs/en/development/build/\#install-gcc-9 。
+请使用`gcc --version`查看gcc的版本。如果gcc版本低于9，请参考此处的指示：https://clickhouse.tech/docs/zh/development/build/#an-zhuang-gcc-9 。
 
 在Mac OS X上安装GCC，请执行：`brew install gcc`
 
@@ -234,7 +234,7 @@ ClickHouse的架构描述可以在此处查看：https://clickhouse.tech/docs/en
 
 编写测试用例：https://clickhouse.tech/docs/en/development/tests/
 
-任务列表：https://github.com/ClickHouse/ClickHouse/blob/master/tests/instructions/easy\_tasks\_sorted\_en.md
+任务列表：https://github.com/ClickHouse/ClickHouse/contribute
 
 # 测试数据 {#ce-shi-shu-ju}
 
diff --git a/docs/zh/index.md b/docs/zh/index.md
index 522affa6250..926c4ce2fdf 100644
--- a/docs/zh/index.md
+++ b/docs/zh/index.md
@@ -4,7 +4,7 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
 
 在传统的行式数据库系统中，数据按如下顺序存储：
 
-| 行  | 小心点      | JavaEnable | 标题       | GoodEvent | 活动时间            |
+| row  | watchID      | JavaEnable | title      | GoodEvent | EventTime            |
 |-----|-------------|------------|------------|-----------|---------------------|
 | \#0 | 89354350662 | 1          | 投资者关系 | 1         | 2016-05-18 05:19:20 |
 | \#1 | 90329509958 | 0          | 联系我们   | 1         | 2016-05-18 08:10:20 |
@@ -18,23 +18,23 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
 
 在列式数据库系统中，数据按如下的顺序存储：
 
-| 行:         | \#0                 | \#1                 | \#2                 | \#N |
+| row:         | \#0                 | \#1                 | \#2                 | \#N |
 |-------------|---------------------|---------------------|---------------------|-----|
-| 小心点:     | 89354350662         | 90329509958         | 89953706054         | …   |
+| watchID:     | 89354350662         | 90329509958         | 89953706054         | …   |
 | JavaEnable: | 1                   | 0                   | 1                   | …   |
-| 标题:       | 投资者关系          | 联系我们            | 任务                | …   |
+| title:       | 投资者关系          | 联系我们            | 任务                | …   |
 | GoodEvent:  | 1                   | 1                   | 1                   | …   |
-| 活动时间:   | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | …   |
+| EventTime:   | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | …   |
 
-该示例中只展示了数据在列式数据库中数据的排列顺序。
+该示例中只展示了数据在列式数据库中数据的排列方式。
 对于存储而言，列式数据库总是将同一列的数据存储在一起，不同列的数据也总是分开存储。
 
 常见的列式数据库有： Vertica、 Paraccel (Actian Matrix，Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise， Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。
 {: .灰色 }
 
-不同的存储方式适合不同的场景，这里的查询场景包括： 进行了哪些查询，多久查询一次以及各类查询的比例； 每种查询读取多少数据————行、列和字节；读取数据和写入数据之间的关系；使用的数据集大小以及如何使用本地的数据集；是否使用事务,以及它们是如何进行隔离的；数据的复制机制与数据的完整性要求；每种类型的查询要求的延迟与吞吐量等等。
+不同的数据存储方式适用不同的业务场景，数据访问的场景包括：进行了何种查询、多久查询一次以及各类查询的比例； 每种查询读取多少数据————行、列和字节；读取数据和写入数据之间的关系；使用的数据集大小以及如何使用本地的数据集；是否使用事务,以及它们是如何进行隔离的；数据的复制机制与数据的完整性要求；每种类型的查询要求的延迟与吞吐量等等。
 
-系统负载越高，根据使用场景进行定制化就越重要，并且定制将会变的越精细。没有一个系统同样适用于明显不同的场景。如果系统适用于广泛的场景，在负载高的情况下，所有的场景可以会被公平但低效处理，或者高效处理一小部分场景。
+系统负载越高，依据使用场景进行定制化就越重要，并且定制将会变的越精细。没有一个系统能够同时适用所有明显不同的业务场景。如果系统适用于广泛的场景，在负载高的情况下，要兼顾所有的场景，那么将不得不做出选择。是要平衡还是要效率？
 
 ## OLAP场景的关键特征 {#olapchang-jing-de-guan-jian-te-zheng}
 
@@ -52,7 +52,7 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
 -   每一个查询除了一个大表外都很小
 -   查询结果明显小于源数据，换句话说，数据被过滤或聚合后能够被盛放在单台服务器的内存中
 
-很容易可以看出，OLAP场景与其他流行场景(例如,OLTP或K/V)有很大的不同， 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询是没有意义的，例如，使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。
+很容易可以看出，OLAP场景与其他通常业务场景(例如,OLTP或K/V)有很大的不同， 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询场景，并不是非常完美的适用方案。例如，使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。
 
 ## 列式数据库更适合OLAP场景的原因 {#lie-shi-shu-ju-ku-geng-gua-he-olapchang-jing-de-yuan-yin}
 
diff --git a/docs/zh/operations/utilities/clickhouse-copier.md b/docs/zh/operations/utilities/clickhouse-copier.md
index a5364bcaa71..3dc29fe16fa 100644
--- a/docs/zh/operations/utilities/clickhouse-copier.md
+++ b/docs/zh/operations/utilities/clickhouse-copier.md
@@ -24,7 +24,7 @@
 该实用程序应手动运行:
 
 ``` bash
-clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
+clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
 ```
 
 参数:
diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
index c8f942013ea..830581beba7 100644
--- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
@@ -313,7 +313,7 @@ ORDER BY level ASC
 └───────┴───┘
 ```
 
-## 保留 {#retention}
+## Retention {#retention}
 
 该函数将一组条件作为参数，类型为1到32个参数 `UInt8` 表示事件是否满足特定条件。
 任何条件都可以指定为参数（如 [WHERE](../../sql-reference/statements/select/where.md#select-where)).
diff --git a/docs/zh/sql-reference/data-types/lowcardinality.md b/docs/zh/sql-reference/data-types/lowcardinality.md
new file mode 100644
index 00000000000..b8985691f0f
--- /dev/null
+++ b/docs/zh/sql-reference/data-types/lowcardinality.md
@@ -0,0 +1,59 @@
+---
+toc_priority: 51
+toc_title: 低基数类型
+---
+
+# 低基数类型 {#lowcardinality-data-type}
+
+把其它数据类型转变为字典编码类型。
+
+## 语法 {#lowcardinality-syntax}
+
+```sql
+LowCardinality(data_type)
+```
+
+**参数**
+
+- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md)，包括数字类型，但是[Decimal](decimal.md)除外。对一些数据类型来说，`LowCardinality` 并不高效，详查[allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)设置描述。
+
+## 描述 {#lowcardinality-dscr}
+
+`LowCardinality` 是一种改变数据存储和数据处理方法的概念。 ClickHouse会把 `LowCardinality` 所在的列进行[dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder)。对很多应用来说，处理字典编码的数据可以显著的增加[SELECT](../statements/select/index.md)查询速度。
+
+使用 `LowCarditality` 数据类型的效率依赖于数据的多样性。如果一个字典包含少于10000个不同的值，那么ClickHouse可以进行更高效的数据存储和处理。反之如果字典多于10000，效率会表现的更差。
+
+当使用字符类型的时候，可以考虑使用 `LowCardinality` 代替[Enum](enum.md)。 `LowCardinality` 通常更加灵活和高效。
+
+## 例子
+
+创建一个 `LowCardinality` 类型的列：
+
+```sql
+CREATE TABLE lc_t
+(
+    `id` UInt16, 
+    `strings` LowCardinality(String)
+)
+ENGINE = MergeTree()
+ORDER BY id
+```
+
+## 相关的设置和函数
+
+设置:
+
+- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size)
+- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
+- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
+- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
+
+函数:
+
+- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality)
+
+## 参考
+
+- [高效低基数类型](https://www.altinity.com/blog/2019/3/27/low-cardinality).
+- [使用低基数类型减少ClickHouse的存储成本 – 来自Instana工程师的分享](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
+- [字符优化 (俄语视频分享)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [英语分享](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
\ No newline at end of file
diff --git a/docs/zh/sql-reference/functions/arithmetic-functions.md b/docs/zh/sql-reference/functions/arithmetic-functions.md
index 1c2ed3fccfc..b7cfa87ef94 100644
--- a/docs/zh/sql-reference/functions/arithmetic-functions.md
+++ b/docs/zh/sql-reference/functions/arithmetic-functions.md
@@ -1,3 +1,8 @@
+---
+toc_priority: 35
+toc_title: 算术函数
+---
+
 # 算术函数 {#suan-zhu-han-shu}
 
 对于所有算术函数，结果类型为结果适合的最小数字类型（如果存在这样的类型）。最小数字类型是根据数字的位数，是否有符号以及是否是浮点类型而同时进行的。如果没有足够的位，则采用最高位类型。
diff --git a/docs/zh/sql-reference/functions/comparison-functions.md b/docs/zh/sql-reference/functions/comparison-functions.md
index a3da3a3047a..8b5d72e64c2 100644
--- a/docs/zh/sql-reference/functions/comparison-functions.md
+++ b/docs/zh/sql-reference/functions/comparison-functions.md
@@ -1,3 +1,8 @@
+---
+toc_priority: 36
+toc_title: 比较函数
+---
+
 # 比较函数 {#bi-jiao-han-shu}
 
 比较函数始终返回0或1（UInt8）。
@@ -15,18 +20,16 @@
 
 字符串按字节进行比较。较短的字符串小于以其开头并且至少包含一个字符的所有字符串。
 
-注意。直到1.1.54134版本，有符号和无符号数字的比较方式与C++相同。换句话说，在SELECT 9223372036854775807 ＆gt; -1 等情况下，您可能会得到错误的结果。 此行为在版本1.1.54134中已更改，现在在数学上是正确的。
+## 等于，a=b和a==b 运算符 {#equals-a-b-and-a-b-operator}
 
-## 等于，a=b和a==b运算符 {#equals-a-b-and-a-b-operator}
+## 不等于，a!=b和a<>b 运算符 {#notequals-a-operator-b-and-a-b}
 
-## notEquals,a! 运算符=b和a `<>` b {#notequals-a-operator-b-and-a-b}
+## 少, < 运算符 {#less-operator}
 
-## 少, `< operator` {#less-operator}
+## 大于, > 运算符 {#greater-operator}
 
-## 更大, `> operator` {#greater-operator}
+## 小于等于, <= 运算符 {#lessorequals-operator}
 
-## 出租等级, `<= operator` {#lessorequals-operator}
-
-## 伟大的等级, `>= operator` {#greaterorequals-operator}
+## 大于等于, >= 运算符 {#greaterorequals-operator}
 
 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/comparison_functions/) <!--hide-->
diff --git a/docs/zh/sql-reference/functions/index.md b/docs/zh/sql-reference/functions/index.md
index 8d178592e92..52954c95cff 100644
--- a/docs/zh/sql-reference/functions/index.md
+++ b/docs/zh/sql-reference/functions/index.md
@@ -1,3 +1,9 @@
+---
+toc_folder_title: 函数
+toc_priority: 32
+toc_title: 简介
+---
+
 # 函数 {#han-shu}
 
 ClickHouse中至少存在两种类型的函数 - 常规函数（它们称之为«函数»）和聚合函数。 常规函数的工作就像分别为每一行执行一次函数计算一样（对于每一行，函数的结果不依赖于其他行）。 聚合函数则从各行累积一组值（即函数的结果以来整个结果集）。
diff --git a/docs/zh/sql-reference/functions/logical-functions.md b/docs/zh/sql-reference/functions/logical-functions.md
index b14f1bb5d37..cc168dbb1ed 100644
--- a/docs/zh/sql-reference/functions/logical-functions.md
+++ b/docs/zh/sql-reference/functions/logical-functions.md
@@ -1,15 +1,20 @@
+---
+toc_priority: 37
+toc_title: 逻辑函数
+---
+
 # 逻辑函数 {#luo-ji-han-shu}
 
 逻辑函数可以接受任何数字类型的参数，并返回UInt8类型的0或1。
 
 当向函数传递零时，函数将判定为«false»，否则，任何其他非零的值都将被判定为«true»。
 
-## 和，和运营商 {#and-and-operator}
+## 和，`AND` 运算符 {#and-and-operator}
 
-## 或，或运营商 {#or-or-operator}
+## 或，`OR` 运算符 {#or-or-operator}
 
-## 不是，不是运营商 {#not-not-operator}
+## 非，`NOT` 运算符 {#not-not-operator}
 
-## 异或 {#xor}
+## 异或，`XOR` 运算符 {#xor}
 
 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/logical_functions/) <!--hide-->
diff --git a/docs/zh/sql-reference/functions/type-conversion-functions.md b/docs/zh/sql-reference/functions/type-conversion-functions.md
index 011b2951e74..6e3ed9afb78 100644
--- a/docs/zh/sql-reference/functions/type-conversion-functions.md
+++ b/docs/zh/sql-reference/functions/type-conversion-functions.md
@@ -1,16 +1,230 @@
+---
+toc_priority: 38
+toc_title: 类型转换函数
+---
+
 # 类型转换函数 {#lei-xing-zhuan-huan-han-shu}
 
-## toUInt8,toUInt16,toUInt32,toUInt64 {#touint8-touint16-touint32-touint64}
+## 数值类型转换常见的问题 {#numeric-conversion-issues}
 
-## toInt8,toInt16,toInt32,toInt64 {#toint8-toint16-toint32-toint64}
+当你把一个值从一个类型转换为另外一个类型的时候，你需要注意的是这是一个不安全的操作，可能导致数据的丢失。数据丢失一般发生在你将一个大的数据类型转换为小的数据类型的时候，或者你把两个不同的数据类型相互转换的时候。
 
-## toFloat32,toFloat64 {#tofloat32-tofloat64}
+ClickHouse和[C++](https://en.cppreference.com/w/cpp/language/implicit_conversion)有相同的类型转换行为。
 
-## 今天，今天 {#todate-todatetime}
+## toInt(8\|16\|32\|64) {#touint8-touint16-touint32-touint64}
 
-## toUInt8OrZero,toUInt16OrZero,toUInt32OrZero,toUInt64OrZero,toInt8OrZero,toInt16OrZero,toInt32OrZero,toInt64OrZero,toFloat32OrZero,toFloat64OrZero,toDateOrZero,toDateTimeOrZero {#touint8orzero-touint16orzero-touint32orzero-touint64orzero-toint8orzero-toint16orzero-toint32orzero-toint64orzero-tofloat32orzero-tofloat64orzero-todateorzero-todatetimeorzero}
+转换一个输入值为[Int](../../sql-reference/data-types/int-uint.md)类型。这个函数包括：
+
+-   `toInt8(expr)` — 结果为`Int8`数据类型。
+-   `toInt16(expr)` — 结果为`Int16`数据类型。
+-   `toInt32(expr)` — 结果为`Int32`数据类型。
+-   `toInt64(expr)` — 结果为`Int64`数据类型。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个数字或者代表数值类型的字符串。不支持二进制、八进制、十六进制的数字形式，有效数字之前的0也会被忽略。
+
+**返回值**
+
+整形在`Int8`, `Int16`, `Int32`，或者 `Int64` 的数据类型。
+
+函数使用[rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero)原则，这意味着会截断丢弃小数部分的数值。
+
+[NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf)转换是不确定的。具体使用的时候，请参考[数值类型转换常见的问题](#numeric-conversion-issues)。
+
+**例子**
+
+``` sql
+SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
+```
+
+``` text
+┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
+│ -9223372036854775808 │          32 │            16 │           8 │
+└──────────────────────┴─────────────┴───────────────┴─────────────┘
+```
+
+## toInt(8\|16\|32\|64)OrZero {#toint8163264orzero}
+
+这个函数需要一个字符类型的入参，然后尝试把它转为`Int (8 | 16 | 32 | 64)`，如果转换失败直接返回0。
+
+**例子**
+
+``` sql
+select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
+```
+
+``` text
+┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
+│                  123123 │                         0 │
+└─────────────────────────┴───────────────────────────┘
+```
+## toInt(8\|16\|32\|64)OrNull {#toint8163264ornull}
+
+这个函数需要一个字符类型的入参，然后尝试把它转为`Int (8 | 16 | 32 | 64)`，如果转换失败直接返回`NULL`。
+
+**例子**
+
+``` sql
+select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
+```
+
+``` text
+┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
+│                  123123 │                      ᴺᵁᴸᴸ │
+└─────────────────────────┴───────────────────────────┘
+```
+
+## toUInt(8\|16\|32\|64) {#touint8163264}
+
+转换一个输入值到[UInt](../../sql-reference/data-types/int-uint.md)类型。 这个函数包括：
+
+-   `toUInt8(expr)` — 结果为`UInt8`数据类型。
+-   `toUInt16(expr)` — 结果为`UInt16`数据类型。
+-   `toUInt32(expr)` — 结果为`UInt32`数据类型。
+-   `toUInt64(expr)` — 结果为`UInt64`数据类型。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个数字或者代表数值类型的字符串。不支持二进制、八进制、十六进制的数字形式，有效数字之前的0也会被忽略。
+
+**返回值**
+
+整形在`UInt8`, `UInt16`, `UInt32`，或者 `UInt64` 的数据类型。
+
+函数使用[rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero)原则，这意味着会截断丢弃小数部分的数值。
+
+对于负数和[NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf)来说转换的结果是不确定的。如果你传入一个负数，比如：`'-32'`，ClickHouse会抛出异常。具体使用的时候，请参考[数值类型转换常见的问题](#numeric-conversion-issues)。
+
+**例子**
+
+``` sql
+SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
+```
+
+``` text
+┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
+│ 9223372036854775808 │    4294967264 │             16 │            8 │
+└─────────────────────┴───────────────┴────────────────┴──────────────┘
+```
+
+## toUInt(8\|16\|32\|64)OrZero {#touint8163264orzero}
+
+## toUInt(8\|16\|32\|64)OrNull {#touint8163264ornull}
+
+## toFloat(32\|64) {#tofloat3264}
+
+## toFloat(32\|64)OrZero {#tofloat3264orzero}
+
+## toFloat(32\|64)OrNull {#tofloat3264ornull}
+
+## toDate {#todate}
+
+## toDateOrZero {#todateorzero}
+
+## toDateOrNull {#todateornull}
+
+## toDateTime {#todatetime}
+
+## toDateTimeOrZero {#todatetimeorzero}
+
+## toDateTimeOrNull {#todatetimeornull}
+
+## toDecimal(32\|64\|128) {#todecimal3264128}
+
+转换 `value` 到[Decimal](../../sql-reference/data-types/decimal.md)类型的值，其中精度为`S`。`value`可以是一个数字或者一个字符串。`S` 指定小数位的精度。
+
+-   `toDecimal32(value, S)`
+-   `toDecimal64(value, S)`
+-   `toDecimal128(value, S)`
+
+## toDecimal(32\|64\|128)OrNull {#todecimal3264128ornull}
+
+转换一个输入的字符到[Nullable(Decimal(P,S))](../../sql-reference/data-types/decimal.md)类型的数据。这个函数包括：
+
+-   `toDecimal32OrNull(expr, S)` — 结果为`Nullable(Decimal32(S))`数据类型。
+-   `toDecimal64OrNull(expr, S)` — 结果为`Nullable(Decimal64(S))`数据类型。
+-   `toDecimal128OrNull(expr, S)` — 结果为`Nullable(Decimal128(S))`数据类型。
+
+如果在解析输入值发生错误的时候你希望得到一个`NULL`值而不是抛出异常，你可以使用该函数。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个[String](../../sql-reference/data-types/string.md)类型的数据。 ClickHouse倾向于文本类型的表示带小数类型的数值，比如`'1.111'`。
+-   `S` — 小数位的精度。
+
+**返回值**
+
+`Nullable(Decimal(P,S))`类型的数据，包括：
+
+-   如果有的话，小数位`S`。
+-   如果解析错误或者输入的数字的小数位多于`S`,那结果为`NULL`。
+
+**例子**
+
+``` sql
+SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
+```
+
+``` text
+┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
+│ -1.11100 │ Nullable(Decimal(9, 5))                            │
+└──────────┴────────────────────────────────────────────────────┘
+```
+
+``` sql
+SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
+```
+
+``` text
+┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
+│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2))                            │
+└──────┴────────────────────────────────────────────────────┘
+```
+
+## toDecimal(32\|64\|128)OrZero {#todecimal3264128orzero}
+
+转换输入值为[Decimal(P,S)](../../sql-reference/data-types/decimal.md)类型数据。这个函数包括：
+
+-   `toDecimal32OrZero( expr, S)` — 结果为`Decimal32(S)` 数据类型。
+-   `toDecimal64OrZero( expr, S)` — 结果为`Decimal64(S)` 数据类型。
+-   `toDecimal128OrZero( expr, S)` — 结果为`Decimal128(S)` 数据类型。
+
+当解析错误的时候，你不需要抛出异常而希望得到`0`值，你可以使用该函数。
+
+**参数**
+
+-   `expr` — [表达式](../syntax.md#syntax-expressions)返回一个[String](../../sql-reference/data-types/string.md)类型的数据。 ClickHouse倾向于文本类型的表示带小数类型的数值，比如`'1.111'`。
+-   `S` — 小数位的精度。
+
+**返回值**
+
+A value in the `Nullable(Decimal(P,S))` data type. The value contains:
+
+-   如果有的话，小数位`S`。
+-   如果解析错误或者输入的数字的小数位多于`S`,那结果为小数位精度为`S`的`0`。
+**例子**
+
+``` sql
+SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
+```
+
+``` text
+┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
+│ -1.11100 │ Decimal(9, 5)                                      │
+└──────────┴────────────────────────────────────────────────────┘
+```
+
+``` sql
+SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
+```
+
+``` text
+┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
+│ 0.00 │ Decimal(9, 2)                                      │
+└──────┴────────────────────────────────────────────────────┘
+```
 
-## toUInt8OrNull,toUInt16OrNull,toUInt32OrNull,toUInt64OrNull,toInt8OrNull,toInt16OrNull,toInt32OrNull,toInt64OrNull,toFloat32OrNull,toFloat64OrNull,toDateOrNull,toDateTimeOrNull {#touint8ornull-touint16ornull-touint32ornull-touint64ornull-toint8ornull-toint16ornull-toint32ornull-toint64ornull-tofloat32ornull-tofloat64ornull-todateornull-todatetimeornull}
 
 ## toString {#tostring}
 
@@ -47,10 +261,6 @@ SELECT
 
 另请参阅`toUnixTimestamp`函数。
 
-## toDecimal32(value,S),toDecimal64(value,S),toDecimal128(value,S) {#todecimal32value-s-todecimal64value-s-todecimal128value-s}
-
-将`value`转换为精度为`S`的[十进制](../../sql-reference/functions/type-conversion-functions.md)。`value`可以是数字或字符串。`S`参数为指定的小数位数。
-
 ## toFixedString(s,N) {#tofixedstrings-n}
 
 将String类型的参数转换为FixedString(N)类型的值（具有固定长度N的字符串）。N必须是一个常量。
@@ -78,17 +288,19 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
     │ foo\0bar\0 │ foo   │
     └────────────┴───────┘
 
-## reinterpretAsUInt8,reinterpretAsUInt16,reinterpretAsUInt32,reinterpretAsUInt64 {#reinterpretasuint8-reinterpretasuint16-reinterpretasuint32-reinterpretasuint64}
+## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
 
-## reinterpretAsInt8,reinterpretAsInt16,reinterpretAsInt32,reinterpretAsInt64 {#reinterpretasint8-reinterpretasint16-reinterpretasint32-reinterpretasint64}
+## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
 
-## reinterpretAsFloat32,reinterpretAsFloat64 {#reinterpretasfloat32-reinterpretasfloat64}
+## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
 
-## 重新解释日期，重新解释日期时间 {#reinterpretasdate-reinterpretasdatetime}
+## reinterpretAsDate {#reinterpretasdate}
+
+## reinterpretAsDateTime {#reinterpretasdatetime}
 
 这些函数接受一个字符串，并将放在字符串开头的字节解释为主机顺序中的数字（little endian）。如果字符串不够长，则函数就像使用必要数量的空字节填充字符串一样。如果字符串比需要的长，则忽略额外的字节。Date被解释为Unix时间戳的天数，DateTime被解释为Unix时间戳。
 
-## 重新解释字符串 {#reinterpretasstring}
+## reinterpretAsString {#reinterpretasstring}
 
 此函数接受数字、Date或DateTime，并返回一个字符串，其中包含表示主机顺序（小端）的相应值的字节。从末尾删除空字节。例如，UInt32类型值255是一个字节长的字符串。
 
@@ -96,7 +308,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
 
 此函数接受数字、Date或DateTime，并返回包含表示主机顺序（小端）的相应值的字节的FixedString。从末尾删除空字节。例如，UInt32类型值255是一个长度为一个字节的FixedString。
 
-## 演员(x,t) {#type_conversion_function-cast}
+## CAST(x, T) {#type_conversion_function-cast}
 
 将’x’转换为’t’数据类型。还支持语法CAST（x AS t）
 
@@ -133,10 +345,32 @@ SELECT
     │ Nullable(UInt16)                        │
     └─────────────────────────────────────────┘
 
-## 每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每 {#function-tointerval}
+## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
 
-将数字类型参数转换为Interval类型（时间区间）。
-Interval类型实际上是非常有用的，您可以使用此类型的数据直接与Date或DateTime执行算术运算。同时，ClickHouse为Interval类型数据的声明提供了更方便的语法。例如：
+把一个数值类型的值转换为[Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的数据。
+
+**语法**
+
+``` sql
+toIntervalSecond(number)
+toIntervalMinute(number)
+toIntervalHour(number)
+toIntervalDay(number)
+toIntervalWeek(number)
+toIntervalMonth(number)
+toIntervalQuarter(number)
+toIntervalYear(number)
+```
+
+**参数**
+
+-   `number` — 正整数，持续的时间。
+
+**返回值**
+
+-   时间的`Interval`值。
+
+**例子**
 
 ``` sql
 WITH
@@ -148,22 +382,257 @@ SELECT
     date + interval_to_week
 ```
 
-    ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
-    │                2019-01-08 │                   2019-01-08 │
-    └───────────────────────────┴──────────────────────────────┘
+``` text
+┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
+│                2019-01-08 │                   2019-01-08 │
+└───────────────────────────┴──────────────────────────────┘
+```
 
-## parsedatetimebestefort {#type_conversion_functions-parsedatetimebesteffort}
+## parseDateTimeBestEffort {#parsedatetimebesteffort}
 
-将数字类型参数解析为Date或DateTime类型。
-与toDate和toDateTime不同，parseDateTimeBestEffort可以进行更复杂的日期格式。
-有关详细信息，请参阅链接：[复杂日期格式](https://xkcd.com/1179/)。
+把[String](../../sql-reference/data-types/string.md)类型的时间日期转换为[DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime)数据类型。
 
-## parsedatetimebestefortornull {#parsedatetimebesteffortornull}
+该函数可以解析[ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)，[RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55)或者ClickHouse的一些别的时间日期格式。
 
-与[parsedatetimebestefort](#type_conversion_functions-parsedatetimebesteffort)相同，但它遇到无法处理的日期格式时返回null。
+**语法**
 
-## parsedatetimebestefortorzero {#parsedatetimebesteffortorzero}
+``` sql
+parseDateTimeBestEffort(time_string [, time_zone]);
+```
 
-与[parsedatetimebestefort](#type_conversion_functions-parsedatetimebesteffort)相同，但它遇到无法处理的日期格式时返回零Date或零DateTime。
+**参数**
+
+-   `time_string` — 字符类型的时间和日期。
+-   `time_zone` — 字符类型的时区。
+
+**非标准格式的支持**
+
+-   9位或者10位的数字时间，[unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
+-   时间和日期组成的字符串： `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`等。
+-   只有日期的字符串： `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` 等。
+-   只有天和时间： `DD`, `DD hh`, `DD hh:mm`。这种情况下 `YYYY-MM` 默认为 `2000-01`。
+-   包含时间日期以及时区信息： `YYYY-MM-DD hh:mm:ss ±h:mm`等。例如： `2020-12-12 17:36:00 -5:00`。
+
+对于所有的格式来说，这个函数通过全称或者第一个三个字符的月份名称来解析月份，比如：`24/DEC/18`, `24-Dec-18`, `01-September-2018`。
+
+**返回值**
+
+-   `DateTime`类型数据。
+
+**例子**
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('12/12/2020 12:12:57')
+AS parseDateTimeBestEffort;
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2020-12-12 12:12:57 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
+AS parseDateTimeBestEffort
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2018-08-18 10:22:16 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('1284101485')
+AS parseDateTimeBestEffort
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2015-07-07 12:04:41 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
+AS parseDateTimeBestEffort
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort─┐
+│     2018-12-12 10:12:12 │
+└─────────────────────────┘
+```
+
+查询:
+
+``` sql
+SELECT parseDateTimeBestEffort('10 20:19')
+```
+
+结果:
+
+``` text
+┌─parseDateTimeBestEffort('10 20:19')─┐
+│                 2000-01-10 20:19:00 │
+└─────────────────────────────────────┘
+```
+
+**除此之外**
+
+-   [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
+-   [RFC 1123](https://tools.ietf.org/html/rfc1123)
+-   [toDate](#todate)
+-   [toDateTime](#todatetime)
+
+## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull}
+
+这个函数和[parseDateTimeBestEffort](#parsedatetimebesteffort)基本一致，除了无法解析返回结果为`NULL`。
+
+## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero}
+
+这个函数和[parseDateTimeBestEffort](#parsedatetimebesteffort)基本一致，除了无法解析返回结果为`0`。
+
+## toLowCardinality {#tolowcardinality}
+
+把输入值转换为[LowCardianlity](../data-types/lowcardinality.md)的相同类型的数据。
+
+如果要把`LowCardinality`类型的数据转换为其他类型，使用[CAST](#type_conversion_function-cast)函数。比如：`CAST(x as String)`。
+
+**语法**
+
+```sql
+toLowCardinality(expr)
+```
+
+**参数**
+
+- `expr` — [表达式](../syntax.md#syntax-expressions)为[支持的数据类型](../data-types/index.md#data_types)的一种。
+
+
+**返回值**
+
+- `expr`的结果。
+
+类型： `LowCardinality(expr_result_type)`
+
+**例子**
+
+查询:
+
+```sql
+SELECT toLowCardinality('1')
+```
+
+结果:
+
+```text
+┌─toLowCardinality('1')─┐
+│ 1                     │
+└───────────────────────┘
+```
+
+
+## toUnixTimestamp64Milli
+## toUnixTimestamp64Micro
+## toUnixTimestamp64Nano
+
+把一个`DateTime64`类型的数据转换为`Int64`类型的数据，结果包含固定亚秒的精度。输入的值是变大还是变低依赖于输入的精度。需要注意的是输出的值是一个UTC的时间戳, 不是同一个时区的`DateTime64`值。
+
+**语法**
+
+``` sql
+toUnixTimestamp64Milli(value)
+```
+
+**参数**
+
+-   `value` — 任何精度的DateTime64类型的数据。
+
+**返回值**
+
+-   `value` `Int64`类型数据。
+
+**例子**
+
+查询:
+
+``` sql
+WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
+SELECT toUnixTimestamp64Milli(dt64)
+```
+
+结果:
+
+``` text
+┌─toUnixTimestamp64Milli(dt64)─┐
+│                1568650812345 │
+└──────────────────────────────┘
+```
+
+``` sql
+WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
+SELECT toUnixTimestamp64Nano(dt64)
+```
+
+结果:
+
+``` text
+┌─toUnixTimestamp64Nano(dt64)─┐
+│         1568650812345678000 │
+└─────────────────────────────┘
+```
+
+## fromUnixTimestamp64Milli
+## fromUnixTimestamp64Micro
+## fromUnixTimestamp64Nano
+
+把`Int64`类型的数据转换为`DateTime64`类型的数据，结果包含固定的亚秒精度和可选的时区。 输入的值是变大还是变低依赖于输入的精度。需要注意的是输入的值是一个UTC的时间戳, 不是一个包含时区的时间戳。
+
+
+**语法**
+
+``` sql
+fromUnixTimestamp64Milli(value [, ti])
+```
+
+**参数**
+
+-   `value` — `Int64`类型的数据，可以是任意精度。
+-   `timezone` — `String`类型的时区
+
+**返回值**
+
+-   `value` DateTime64`类型的数据。
+
+**例子**
+
+``` sql
+WITH CAST(1234567891011, 'Int64') AS i64
+SELECT fromUnixTimestamp64Milli(i64, 'UTC')
+```
+
+``` text
+┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
+│              2009-02-13 23:31:31.011 │
+└──────────────────────────────────────┘
+```
 
 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) <!--hide-->
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index baf8270d1bf..b36a2ff8194 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -207,7 +207,7 @@ if (TARGET clickhouse-server AND TARGET copy-headers)
 endif ()
 
 if (ENABLE_TESTS AND USE_GTEST)
-    set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor)
-    add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS})
+    set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_libcommon unit_tests_dbms)
+    add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})
     add_dependencies(clickhouse-bundle clickhouse-tests)
 endif()
diff --git a/programs/benchmark/CMakeLists.txt b/programs/benchmark/CMakeLists.txt
index be999aafe80..3fa8deb6bd9 100644
--- a/programs/benchmark/CMakeLists.txt
+++ b/programs/benchmark/CMakeLists.txt
@@ -1,5 +1,12 @@
-set(CLICKHOUSE_BENCHMARK_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Benchmark.cpp)
-set(CLICKHOUSE_BENCHMARK_LINK PRIVATE dbms clickhouse_aggregate_functions clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY})
+set (CLICKHOUSE_BENCHMARK_SOURCES Benchmark.cpp)
+
+set (CLICKHOUSE_BENCHMARK_LINK
+    PRIVATE
+        boost::program_options
+        clickhouse_aggregate_functions
+        clickhouse_common_config
+        dbms
+)
 
 clickhouse_program_add(benchmark)
 
diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt
index e273123afe0..6ded6a94f3a 100644
--- a/programs/client/CMakeLists.txt
+++ b/programs/client/CMakeLists.txt
@@ -1,10 +1,19 @@
-set(CLICKHOUSE_CLIENT_SOURCES
-    ${CMAKE_CURRENT_SOURCE_DIR}/Client.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/ConnectionParameters.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/Suggest.cpp
+set (CLICKHOUSE_CLIENT_SOURCES
+    Client.cpp
+    ConnectionParameters.cpp
+    Suggest.cpp
 )
 
-set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${Boost_PROGRAM_OPTIONS_LIBRARY})
+set (CLICKHOUSE_CLIENT_LINK
+    PRIVATE
+        boost::program_options
+        clickhouse_aggregate_functions
+        clickhouse_common_config
+        clickhouse_common_io
+        clickhouse_functions
+        clickhouse_parsers
+        string_utils
+)
 
 # Always use internal readpassphrase
 add_subdirectory(readpassphrase)
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 1c2e0925c2a..917acdc2a83 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -75,6 +75,7 @@
 #include <Storages/ColumnsDescription.h>
 #include <common/argsToConfig.h>
 #include <Common/TerminalSize.h>
+#include <Common/UTF8Helpers.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config_version.h>
@@ -357,6 +358,78 @@ private:
         return false;
     }
 
+#if USE_REPLXX
+    static void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors)
+    {
+        using namespace replxx;
+
+        static const std::unordered_map<TokenType, Replxx::Color> token_to_color =
+        {
+            { TokenType::Whitespace, Replxx::Color::DEFAULT },
+            { TokenType::Comment, Replxx::Color::GRAY },
+            { TokenType::BareWord, Replxx::Color::DEFAULT },
+            { TokenType::Number, Replxx::Color::GREEN },
+            { TokenType::StringLiteral, Replxx::Color::CYAN },
+            { TokenType::QuotedIdentifier, Replxx::Color::MAGENTA },
+            { TokenType::OpeningRoundBracket, Replxx::Color::BROWN },
+            { TokenType::ClosingRoundBracket, Replxx::Color::BROWN },
+            { TokenType::OpeningSquareBracket, Replxx::Color::BROWN },
+            { TokenType::ClosingSquareBracket, Replxx::Color::BROWN },
+            { TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE },
+            { TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE },
+
+            { TokenType::Comma, Replxx::Color::INTENSE },
+            { TokenType::Semicolon, Replxx::Color::INTENSE },
+            { TokenType::Dot, Replxx::Color::INTENSE },
+            { TokenType::Asterisk, Replxx::Color::INTENSE },
+            { TokenType::Plus, Replxx::Color::INTENSE },
+            { TokenType::Minus, Replxx::Color::INTENSE },
+            { TokenType::Slash, Replxx::Color::INTENSE },
+            { TokenType::Percent, Replxx::Color::INTENSE },
+            { TokenType::Arrow, Replxx::Color::INTENSE },
+            { TokenType::QuestionMark, Replxx::Color::INTENSE },
+            { TokenType::Colon, Replxx::Color::INTENSE },
+            { TokenType::Equals, Replxx::Color::INTENSE },
+            { TokenType::NotEquals, Replxx::Color::INTENSE },
+            { TokenType::Less, Replxx::Color::INTENSE },
+            { TokenType::Greater, Replxx::Color::INTENSE },
+            { TokenType::LessOrEquals, Replxx::Color::INTENSE },
+            { TokenType::GreaterOrEquals, Replxx::Color::INTENSE },
+            { TokenType::Concatenation, Replxx::Color::INTENSE },
+            { TokenType::At, Replxx::Color::INTENSE },
+
+            { TokenType::EndOfStream, Replxx::Color::DEFAULT },
+
+            { TokenType::Error, Replxx::Color::RED },
+            { TokenType::ErrorMultilineCommentIsNotClosed, Replxx::Color::RED },
+            { TokenType::ErrorSingleQuoteIsNotClosed, Replxx::Color::RED },
+            { TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED },
+            { TokenType::ErrorSinglePipeMark, Replxx::Color::RED },
+            { TokenType::ErrorWrongNumber, Replxx::Color::RED },
+            { TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }
+        };
+
+        const Replxx::Color unknown_token_color = Replxx::Color::RED;
+
+        Lexer lexer(query.data(), query.data() + query.size());
+        size_t pos = 0;
+
+        for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken())
+        {
+            size_t utf8_len = UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(token.begin), token.size());
+            for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index)
+            {
+                if (token_to_color.find(token.type) != token_to_color.end())
+                    colors[pos + code_point_index] = token_to_color.at(token.type);
+                else
+                    colors[pos + code_point_index] = unknown_token_color;
+            }
+
+            pos += utf8_len;
+        }
+    }
+#endif
+
     int mainImpl()
     {
         UseSSL use_ssl;
@@ -502,7 +575,18 @@ private:
             LineReader::Patterns query_delimiters = {";", "\\G"};
 
 #if USE_REPLXX
-            ReplxxLineReader lr(Suggest::instance(), history_file, config().has("multiline"), query_extenders, query_delimiters);
+            replxx::Replxx::highlighter_callback_t highlight_callback{};
+            if (config().getBool("highlight"))
+                highlight_callback = highlight;
+
+            ReplxxLineReader lr(
+                Suggest::instance(),
+                history_file,
+                config().has("multiline"),
+                query_extenders,
+                query_delimiters,
+                highlight_callback);
+
 #elif defined(USE_READLINE) && USE_READLINE
             ReadlineLineReader lr(Suggest::instance(), history_file, config().has("multiline"), query_extenders, query_delimiters);
 #else
@@ -1766,6 +1850,7 @@ public:
             ("echo", "in batch mode, print query before execution")
             ("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
             ("compression", po::value<bool>(), "enable or disable compression")
+            ("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
             ("log-level", po::value<std::string>(), "client log level")
             ("server_logs_file", po::value<std::string>(), "put server logs into specified file")
         ;
@@ -1912,6 +1997,8 @@ public:
             config().setBool("disable_suggestion", true);
         if (options.count("suggestion_limit"))
             config().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
+        if (options.count("highlight"))
+            config().setBool("highlight", options["highlight"].as<bool>());
 
         argsToConfig(common_arguments, config(), 100);
 
diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index 8fffbec4fab..4ac5e735fd5 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -114,6 +114,8 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
             << " UNION ALL "
             "SELECT DISTINCT name FROM system.tables LIMIT " << limit_str
             << " UNION ALL "
+            "SELECT DISTINCT name FROM system.dictionaries LIMIT " << limit_str
+            << " UNION ALL "
             "SELECT DISTINCT name FROM system.columns LIMIT " << limit_str;
     }
 
diff --git a/programs/compressor/CMakeLists.txt b/programs/compressor/CMakeLists.txt
index c009bb55f76..ff642a32fd4 100644
--- a/programs/compressor/CMakeLists.txt
+++ b/programs/compressor/CMakeLists.txt
@@ -1,7 +1,12 @@
 # Also in utils
 
-set(CLICKHOUSE_COMPRESSOR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Compressor.cpp)
-set(CLICKHOUSE_COMPRESSOR_LINK PRIVATE dbms clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY})
-#set(CLICKHOUSE_COMPRESSOR_INCLUDE SYSTEM PRIVATE ...)
+set (CLICKHOUSE_COMPRESSOR_SOURCES Compressor.cpp)
+
+set (CLICKHOUSE_COMPRESSOR_LINK
+    PRIVATE
+        boost::program_options
+        clickhouse_parsers
+        dbms
+)
 
 clickhouse_program_add(compressor)
diff --git a/programs/extract-from-config/CMakeLists.txt b/programs/extract-from-config/CMakeLists.txt
index b82cbb966ae..ff2d7937117 100644
--- a/programs/extract-from-config/CMakeLists.txt
+++ b/programs/extract-from-config/CMakeLists.txt
@@ -1,5 +1,11 @@
-set(CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ExtractFromConfig.cpp)
-set(CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK PRIVATE clickhouse_common_config clickhouse_common_io clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY})
-#set(CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE SYSTEM PRIVATE ...)
+set (CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES ExtractFromConfig.cpp)
+
+set (CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK
+    PRIVATE
+        boost::program_options
+        clickhouse_common_config
+        clickhouse_common_io
+        clickhouse_common_zookeeper
+)
 
 clickhouse_program_add(extract-from-config)
diff --git a/programs/format/CMakeLists.txt b/programs/format/CMakeLists.txt
index aac72d641e6..ab06708cd3a 100644
--- a/programs/format/CMakeLists.txt
+++ b/programs/format/CMakeLists.txt
@@ -1,5 +1,11 @@
-set(CLICKHOUSE_FORMAT_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Format.cpp)
-set(CLICKHOUSE_FORMAT_LINK PRIVATE dbms clickhouse_common_io clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY})
-#set(CLICKHOUSE_FORMAT_INCLUDE SYSTEM PRIVATE ...)
+set (CLICKHOUSE_FORMAT_SOURCES Format.cpp)
+
+set (CLICKHOUSE_FORMAT_LINK
+    PRIVATE
+        boost::program_options
+        clickhouse_common_io
+        clickhouse_parsers
+        dbms
+)
 
 clickhouse_program_add(format)
diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt
index d066fd53277..b61f0ea33b7 100644
--- a/programs/local/CMakeLists.txt
+++ b/programs/local/CMakeLists.txt
@@ -1,6 +1,17 @@
-set(CLICKHOUSE_LOCAL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/LocalServer.cpp)
-set(CLICKHOUSE_LOCAL_LINK PRIVATE clickhouse_storages_system clickhouse_dictionaries clickhouse_common_config clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions clickhouse_parsers clickhouse_table_functions ${Boost_PROGRAM_OPTIONS_LIBRARY})
-#set(CLICKHOUSE_LOCAL_INCLUDE SYSTEM PRIVATE ...)
+set (CLICKHOUSE_LOCAL_SOURCES LocalServer.cpp)
+
+set (CLICKHOUSE_LOCAL_LINK
+    PRIVATE
+        boost::program_options
+        clickhouse_aggregate_functions
+        clickhouse_common_config
+        clickhouse_common_io
+        clickhouse_dictionaries
+        clickhouse_functions
+        clickhouse_parsers
+        clickhouse_storages_system
+        clickhouse_table_functions
+)
 
 clickhouse_program_add(local)
 
diff --git a/programs/obfuscator/CMakeLists.txt b/programs/obfuscator/CMakeLists.txt
index 19dba2be95c..d1179b3718c 100644
--- a/programs/obfuscator/CMakeLists.txt
+++ b/programs/obfuscator/CMakeLists.txt
@@ -1,5 +1,9 @@
-set(CLICKHOUSE_OBFUSCATOR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Obfuscator.cpp)
-set(CLICKHOUSE_OBFUSCATOR_LINK PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
-#set(CLICKHOUSE_OBFUSCATOR_INCLUDE SYSTEM PRIVATE ...)
+set (CLICKHOUSE_OBFUSCATOR_SOURCES Obfuscator.cpp)
+
+set (CLICKHOUSE_OBFUSCATOR_LINK
+    PRIVATE
+        boost::program_options
+        dbms
+)
 
 clickhouse_program_add(obfuscator)
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index e587e134075..8b58c5664b6 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -17,6 +17,7 @@
 #include <common/phdr_cache.h>
 #include <common/ErrorHandlers.h>
 #include <common/getMemoryAmount.h>
+#include <common/errnoToString.h>
 #include <common/coverage.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/DNSResolver.h>
@@ -125,6 +126,7 @@ namespace ErrorCodes
     extern const int FAILED_TO_GETPWUID;
     extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
     extern const int NETWORK_ERROR;
+    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
 }
 
 
@@ -210,6 +212,52 @@ void Server::defineOptions(Poco::Util::OptionSet & options)
     BaseDaemon::defineOptions(options);
 }
 
+
+/// Check that there is no user-level settings at the top level in config.
+/// This is a common source of mistake (user don't know where to write user-level setting).
+void checkForUserSettingsAtTopLevel(const Poco::Util::AbstractConfiguration & config, const std::string & path)
+{
+    if (config.getBool("skip_check_for_incorrect_settings", false))
+        return;
+
+    Settings settings;
+    for (const auto & setting : settings)
+    {
+        std::string name = setting.getName().toString();
+        if (config.has(name))
+        {
+            throw Exception(fmt::format("A setting '{}' appeared at top level in config {}."
+                " But it is user-level setting that should be located in users.xml inside <profiles> section for specific profile."
+                " You can add it to <profiles><default> if you want to change default value of this setting."
+                " You can also disable the check - specify <skip_check_for_incorrect_settings>1</skip_check_for_incorrect_settings>"
+                " in the main configuration file.",
+                name, path),
+                ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+        }
+    }
+}
+
+void checkForUsersNotInMainConfig(
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string & config_path,
+    const std::string & users_config_path,
+    Poco::Logger * log)
+{
+    if (config.getBool("skip_check_for_incorrect_settings", false))
+        return;
+
+    if (config.has("users") || config.has("profiles") || config.has("quotas"))
+    {
+        /// We cannot throw exception here, because we have support for obsolete 'conf.d' directory
+        /// (that does not correspond to config.d or users.d) but substitute configuration to both of them.
+
+        LOG_ERROR(log, "The <users>, <profiles> and <quotas> elements should be located in users config file: {} not in main config {}."
+            " Also note that you should place configuration changes to the appropriate *.d directory like 'users.d'.",
+            users_config_path, config_path);
+    }
+}
+
+
 int Server::main(const std::vector<std::string> & /*args*/)
 {
     Poco::Logger * log = &logger();
@@ -269,6 +317,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
         config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
     }
 
+    checkForUserSettingsAtTopLevel(config(), config_path);
+
     const auto memory_amount = getMemoryAmount();
 
 #if defined(OS_LINUX)
@@ -473,13 +523,16 @@ int Server::main(const std::vector<std::string> & /*args*/)
         SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
     }
 
-    auto main_config_reloader = std::make_unique<ConfigReloader>(config_path,
+    auto main_config_reloader = std::make_unique<ConfigReloader>(
+        config_path,
         include_from_path,
         config().getString("path", ""),
         std::move(main_config_zk_node_cache),
         main_config_zk_changed_event,
         [&](ConfigurationPtr config)
         {
+            checkForUserSettingsAtTopLevel(*config, config_path);
+
             // FIXME logging-related things need synchronization -- see the 'Logger * log' saved
             // in a lot of places. For now, disable updating log configuration without server restart.
             //setTextLog(global_context->getTextLog());
@@ -508,12 +561,21 @@ int Server::main(const std::vector<std::string> & /*args*/)
         if (Poco::File(config_dir + users_config_path).exists())
             users_config_path = config_dir + users_config_path;
     }
-    auto users_config_reloader = std::make_unique<ConfigReloader>(users_config_path,
+
+    if (users_config_path != config_path)
+        checkForUsersNotInMainConfig(config(), config_path, users_config_path, log);
+
+    auto users_config_reloader = std::make_unique<ConfigReloader>(
+        users_config_path,
         include_from_path,
         config().getString("path", ""),
         zkutil::ZooKeeperNodeCache([&] { return global_context->getZooKeeper(); }),
         std::make_shared<Poco::Event>(),
-        [&](ConfigurationPtr config) { global_context->setUsersConfig(config); },
+        [&](ConfigurationPtr config)
+        {
+            global_context->setUsersConfig(config);
+            checkForUserSettingsAtTopLevel(*config, users_config_path);
+        },
         /* already_loaded = */ false);
 
     /// Reload config in SYSTEM RELOAD CONFIG query.
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 21605edeb36..ba870d8a8ea 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1,6 +1,9 @@
 <?xml version="1.0"?>
 <!--
   NOTE: User and query level settings are set up in "users.xml" file.
+  If you have accidentially specified user-level settings here, server won't start.
+  You can either move the settings to the right place inside "users.xml" file
+   or add <skip_check_for_incorrect_settings>1</skip_check_for_incorrect_settings> here.
 -->
 <yandex>
     <logger>
diff --git a/src/Access/AllowedClientHosts.cpp b/src/Access/AllowedClientHosts.cpp
index 82372fd8b14..1cee8a2f782 100644
--- a/src/Access/AllowedClientHosts.cpp
+++ b/src/Access/AllowedClientHosts.cpp
@@ -8,6 +8,7 @@
 #include <ext/scope_guard.h>
 #include <boost/algorithm/string/replace.hpp>
 #include <ifaddrs.h>
+#include <Common/DNSResolver.h>
 
 
 namespace DB
@@ -44,66 +45,22 @@ namespace
         return IPSubnet(toIPv6(subnet.getPrefix()), subnet.getMask());
     }
 
-
-    /// Helper function for isAddressOfHost().
-    bool isAddressOfHostImpl(const IPAddress & address, const String & host)
-    {
-        IPAddress addr_v6 = toIPv6(address);
-
-        /// Resolve by hand, because Poco don't use AI_ALL flag but we need it.
-        addrinfo * ai_begin = nullptr;
-        SCOPE_EXIT(
-        {
-            if (ai_begin)
-                freeaddrinfo(ai_begin);
-        });
-
-        addrinfo hints;
-        memset(&hints, 0, sizeof(hints));
-        hints.ai_family = AF_UNSPEC;
-        hints.ai_flags |= AI_V4MAPPED | AI_ALL;
-
-        int err = getaddrinfo(host.c_str(), nullptr, &hints, &ai_begin);
-        if (err)
-            throw Exception("Cannot getaddrinfo(" + host + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
-
-        for (const addrinfo * ai = ai_begin; ai; ai = ai->ai_next)
-        {
-            if (ai->ai_addrlen && ai->ai_addr)
-            {
-                if (ai->ai_family == AF_INET)
-                {
-                    const auto & sin = *reinterpret_cast<const sockaddr_in *>(ai->ai_addr);
-                    if (addr_v6 == toIPv6(IPAddress(&sin.sin_addr, sizeof(sin.sin_addr))))
-                    {
-                        return true;
-                    }
-                }
-                else if (ai->ai_family == AF_INET6)
-                {
-                    const auto & sin = *reinterpret_cast<const sockaddr_in6*>(ai->ai_addr);
-                    if (addr_v6 == IPAddress(&sin.sin6_addr, sizeof(sin.sin6_addr), sin.sin6_scope_id))
-                    {
-                        return true;
-                    }
-                }
-            }
-        }
-
-        return false;
-    }
-
-    auto & getIsAddressOfHostCache()
-    {
-        static SimpleCache<decltype(isAddressOfHostImpl), isAddressOfHostImpl> cache;
-        return cache;
-    }
-
     /// Whether a specified address is one of the addresses of a specified host.
     bool isAddressOfHost(const IPAddress & address, const String & host)
     {
-        /// We need to cache DNS requests.
-        return getIsAddressOfHostCache()(address, host);
+        IPAddress addr_v6 = toIPv6(address);
+
+        auto host_addresses = DNSResolver::instance().resolveHostAll(host);
+
+        for (const auto & addr : host_addresses)
+        {
+            if (addr.family() == IPAddress::Family::IPv4 && addr_v6 == toIPv6(addr))
+                return true;
+            else if (addr.family() == IPAddress::Family::IPv6 && addr_v6 == addr)
+                return true;
+        }
+
+        return false;
     }
 
     /// Helper function for isAddressOfLocalhost().
@@ -147,16 +104,10 @@ namespace
         return boost::range::find(local_addresses, toIPv6(address)) != local_addresses.end();
     }
 
-    /// Helper function for getHostByAddress().
-    String getHostByAddressImpl(const IPAddress & address)
+    /// Returns the host name by its address.
+    String getHostByAddress(const IPAddress & address)
     {
-        Poco::Net::SocketAddress sock_addr(address, 0);
-
-        /// Resolve by hand, because Poco library doesn't have such functionality.
-        char host[1024];
-        int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
-        if (err)
-            throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
+        String host = DNSResolver::instance().reverseResolve(address);
 
         /// Check that PTR record is resolved back to client address
         if (!isAddressOfHost(address, host))
@@ -165,19 +116,6 @@ namespace
         return host;
     }
 
-    auto & getHostByAddressCache()
-    {
-        static SimpleCache<decltype(getHostByAddressImpl), &getHostByAddressImpl> cache;
-        return cache;
-    }
-
-    /// Returns the host name by its address.
-    String getHostByAddress(const IPAddress & address)
-    {
-        /// We need to cache DNS requests.
-        return getHostByAddressCache()(address);
-    }
-
 
     void parseLikePatternIfIPSubnet(const String & pattern, IPSubnet & subnet, IPAddress::Family address_family)
     {
@@ -376,10 +314,4 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const
     return false;
 }
 
-void AllowedClientHosts::dropDNSCaches()
-{
-    getIsAddressOfHostCache().drop();
-    getHostByAddressCache().drop();
-}
-
 }
diff --git a/src/Access/AllowedClientHosts.h b/src/Access/AllowedClientHosts.h
index 4f4d54ce1ac..2baafb2e04a 100644
--- a/src/Access/AllowedClientHosts.h
+++ b/src/Access/AllowedClientHosts.h
@@ -114,8 +114,6 @@ public:
     friend bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs);
     friend bool operator !=(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) { return !(lhs == rhs); }
 
-    static void dropDNSCaches();
-
 private:
     std::vector<IPAddress> addresses;
     std::vector<IPSubnet> subnets;
diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp
index 8e4314ec7c5..a7af61c7712 100644
--- a/src/Access/IAccessStorage.cpp
+++ b/src/Access/IAccessStorage.cpp
@@ -12,10 +12,10 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int BAD_CAST;
     extern const int ACCESS_ENTITY_ALREADY_EXISTS;
     extern const int ACCESS_ENTITY_NOT_FOUND;
     extern const int ACCESS_STORAGE_READONLY;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -403,7 +403,7 @@ void IAccessStorage::throwBadCast(const UUID & id, EntityType type, const String
 {
     throw Exception(
         "ID {" + toString(id) + "}: " + outputEntityTypeAndName(type, name) + " expected to be of type " + toString(required_type),
-        ErrorCodes::BAD_CAST);
+        ErrorCodes::LOGICAL_ERROR);
 }
 
 
diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h
index ce4ef98e0cf..a11ca9032a5 100644
--- a/src/AggregateFunctions/AggregateFunctionMLMethod.h
+++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h
@@ -15,7 +15,6 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int BAD_CAST;
 }
 
 /**
@@ -381,7 +380,7 @@ public:
         auto * column = typeid_cast<ColumnFloat64 *>(&to);
         if (!column)
             throw Exception("Cast of column of predictions is incorrect. getReturnTypeToPredict must return same value as it is casted to",
-                            ErrorCodes::BAD_CAST);
+                            ErrorCodes::LOGICAL_ERROR);
 
         this->data(place).predict(column->getData(), block, offset, limit, arguments, context);
     }
diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index 1870eee07b8..0087a41d437 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -150,6 +150,8 @@ public:
     virtual void addBatchSinglePlaceNotNull(
         size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0;
 
+    virtual void addBatchSinglePlaceFromInterval(size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
+
     /** In addition to addBatch, this method collects multiple rows of arguments into array "places"
       *  as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
       *  -Array combinator. It might also be used generally to break data dependency when array
@@ -214,6 +216,12 @@ public:
                 static_cast<const Derived *>(this)->add(place, columns, i, arena);
     }
 
+    void addBatchSinglePlaceFromInterval(size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
+    {
+        for (size_t i = batch_begin; i < batch_end; ++i)
+            static_cast<const Derived *>(this)->add(place, columns, i, arena);
+    }
+
     void addBatchArray(
         size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
         const override
diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp
index 2a6b9e3b499..27772c143e8 100644
--- a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp
+++ b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp
@@ -27,8 +27,12 @@ Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const
         const auto * literal = parameters[i]->as<ASTLiteral>();
         if (!literal)
         {
-            throw Exception("Parameters to aggregate functions must be literals" + (error_context.empty() ? "" : " (in " + error_context +")"),
-                        ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
+            throw Exception(
+                ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
+                "Parameters to aggregate functions must be literals. "
+                "Got parameter '{}'{}",
+                parameters[i]->formatForErrorMessage(),
+                (error_context.empty() ? "" : " (in " + error_context +")"));
         }
 
         params_row[i] = literal->value;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 88e4315db96..fe223373cf3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -161,12 +161,12 @@ add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Alg
 
 if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
     add_library (dbms STATIC ${dbms_headers} ${dbms_sources})
-    target_link_libraries (dbms PRIVATE jemalloc)
+    target_link_libraries (dbms PRIVATE jemalloc libdivide)
     set (all_modules dbms)
 else()
     add_library (dbms SHARED ${dbms_headers} ${dbms_sources})
     target_link_libraries (dbms PUBLIC ${all_modules})
-    target_link_libraries (clickhouse_interpreters PRIVATE jemalloc)
+    target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide)
     list (APPEND all_modules dbms)
     # force all split libs to be linked
     set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
@@ -184,6 +184,9 @@ macro (dbms_target_link_libraries)
     endforeach ()
 endmacro ()
 
+dbms_target_include_directories (PUBLIC ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src)
+target_include_directories (clickhouse_common_io PUBLIC ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src)
+
 if (USE_EMBEDDED_COMPILER)
     dbms_target_link_libraries (PRIVATE ${REQUIRED_LLVM_LIBRARIES})
     dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
@@ -240,8 +243,8 @@ target_link_libraries(clickhouse_common_io
             ${EXECINFO_LIBRARIES}
             cpuid
         PUBLIC
-            ${Boost_PROGRAM_OPTIONS_LIBRARY}
-            ${Boost_SYSTEM_LIBRARY}
+            boost::program_options
+            boost::system
             ${CITYHASH_LIBRARIES}
             ${ZLIB_LIBRARIES}
             pcg_random
@@ -264,18 +267,18 @@ endif()
 dbms_target_link_libraries (
     PRIVATE
         ${BTRIE_LIBRARIES}
-        ${Boost_PROGRAM_OPTIONS_LIBRARY}
-        ${Boost_FILESYSTEM_LIBRARY}
-        ${LZ4_LIBRARY}
-        clickhouse_parsers
+        boost::filesystem
+        boost::program_options
         clickhouse_common_config
         clickhouse_common_zookeeper
         clickhouse_dictionaries_embedded
+        clickhouse_parsers
+        lz4
         Poco::JSON
         string_utils
     PUBLIC
-        ${Boost_SYSTEM_LIBRARY}
         ${MYSQLXX_LIBRARY}
+        boost::system
         clickhouse_common_io
 )
 
@@ -284,10 +287,6 @@ dbms_target_include_directories(PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/Core/include)
 
 dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR})
 
-if (NOT USE_INTERNAL_LZ4_LIBRARY AND LZ4_INCLUDE_DIR)
-    dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR})
-endif ()
-
 if (ZSTD_LIBRARY)
     dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY})
     if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
@@ -295,10 +294,6 @@ if (ZSTD_LIBRARY)
     endif ()
 endif()
 
-if (NOT USE_INTERNAL_BOOST_LIBRARY)
-    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
-endif ()
-
 if (USE_ICU)
     dbms_target_link_libraries (PRIVATE ${ICU_LIBRARIES})
     dbms_target_include_directories (SYSTEM PRIVATE ${ICU_INCLUDE_DIRS})
@@ -329,8 +324,6 @@ if (USE_LDAP)
     dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${OPENLDAP_INCLUDE_DIR})
     dbms_target_link_libraries (PRIVATE ${OPENLDAP_LIBRARIES})
 endif ()
-
-dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR})
 dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 
 if (USE_PROTOBUF)
@@ -364,9 +357,6 @@ if (USE_CASSANDRA)
     dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
 endif()
 
-dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR})
-target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR})
-
 target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR})
 
 target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR})
diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index 1568437618d..d4021b45f0e 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -6,6 +6,7 @@
 #include <IO/WriteBufferFromArena.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
+#include <Common/FieldVisitors.h>
 #include <Common/SipHash.h>
 #include <Common/AlignedBuffer.h>
 #include <Common/typeid_cast.h>
@@ -27,6 +28,51 @@ namespace ErrorCodes
 }
 
 
+static std::string getTypeString(const AggregateFunctionPtr & func)
+{
+    WriteBufferFromOwnString stream;
+    stream << "AggregateFunction(" << func->getName();
+    const auto & parameters = func->getParameters();
+    const auto & argument_types = func->getArgumentTypes();
+
+    if (!parameters.empty())
+    {
+        stream << '(';
+        for (size_t i = 0; i < parameters.size(); ++i)
+        {
+            if (i)
+                stream << ", ";
+            stream << applyVisitor(FieldVisitorToString(), parameters[i]);
+        }
+        stream << ')';
+    }
+
+    for (const auto & argument_type : argument_types)
+        stream << ", " << argument_type->getName();
+
+    stream << ')';
+    return stream.str();
+}
+
+
+ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_)
+    : func(func_), type_string(getTypeString(func))
+{
+}
+
+ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_, const ConstArenas & arenas_)
+    : foreign_arenas(arenas_), func(func_), type_string(getTypeString(func))
+{
+
+}
+
+void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
+{
+    func = func_;
+    type_string = getTypeString(func);
+}
+
+
 ColumnAggregateFunction::~ColumnAggregateFunction()
 {
     if (!func->hasTrivialDestructor() && !src)
@@ -336,15 +382,10 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const
     return create(func);
 }
 
-String ColumnAggregateFunction::getTypeString() const
-{
-    return DataTypeAggregateFunction(func, func->getArgumentTypes(), func->getParameters()).getName();
-}
-
 Field ColumnAggregateFunction::operator[](size_t n) const
 {
     Field field = AggregateFunctionStateData();
-    field.get<AggregateFunctionStateData &>().name = getTypeString();
+    field.get<AggregateFunctionStateData &>().name = type_string;
     {
         WriteBufferFromString buffer(field.get<AggregateFunctionStateData &>().data);
         func->serialize(data[n], buffer);
@@ -355,7 +396,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const
 void ColumnAggregateFunction::get(size_t n, Field & res) const
 {
     res = AggregateFunctionStateData();
-    res.get<AggregateFunctionStateData &>().name = getTypeString();
+    res.get<AggregateFunctionStateData &>().name = type_string;
     {
         WriteBufferFromString buffer(res.get<AggregateFunctionStateData &>().data);
         func->serialize(data[n], buffer);
@@ -425,8 +466,6 @@ static void pushBackAndCreateState(ColumnAggregateFunction::Container & data, Ar
 
 void ColumnAggregateFunction::insert(const Field & x)
 {
-    String type_string = getTypeString();
-
     if (x.getType() != Field::Types::AggregateFunctionState)
         throw Exception(String("Inserting field of type ") + x.getTypeName() + " into ColumnAggregateFunction. "
                         "Expected " + Field::Types::toString(Field::Types::AggregateFunctionState), ErrorCodes::LOGICAL_ERROR);
@@ -564,7 +603,7 @@ void ColumnAggregateFunction::getExtremes(Field & min, Field & max) const
     AggregateDataPtr place = place_buffer.data();
 
     AggregateFunctionStateData serialized;
-    serialized.name = getTypeString();
+    serialized.name = type_string;
 
     func->create(place);
     try
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index 40f73665ebe..a9b3c38a2e0 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -74,6 +74,9 @@ private:
     /// Array of pointers to aggregation states, that are placed in arenas.
     Container data;
 
+    /// Name of the type to distinguish different aggregation states.
+    String type_string;
+
     ColumnAggregateFunction() {}
 
     /// Create a new column that has another column as a source.
@@ -84,29 +87,17 @@ private:
     ///  but ownership of different elements cannot be mixed by different columns.
     void ensureOwnership();
 
-    ColumnAggregateFunction(const AggregateFunctionPtr & func_)
-        : func(func_)
-    {
-    }
+    ColumnAggregateFunction(const AggregateFunctionPtr & func_);
 
     ColumnAggregateFunction(const AggregateFunctionPtr & func_,
-                            const ConstArenas & arenas_)
-        : foreign_arenas(arenas_), func(func_)
-    {
-    }
-
+                            const ConstArenas & arenas_);
 
     ColumnAggregateFunction(const ColumnAggregateFunction & src_);
 
-    String getTypeString() const;
-
 public:
     ~ColumnAggregateFunction() override;
 
-    void set(const AggregateFunctionPtr & func_)
-    {
-        func = func_;
-    }
+    void set(const AggregateFunctionPtr & func_);
 
     AggregateFunctionPtr getAggregateFunction() { return func; }
     AggregateFunctionPtr getAggregateFunction() const { return func; }
@@ -121,6 +112,7 @@ public:
 
     std::string getName() const override { return "AggregateFunction(" + func->getName() + ")"; }
     const char * getFamilyName() const override { return "AggregateFunction"; }
+    TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; }
 
     MutableColumnPtr predictValues(Block & block, const ColumnNumbers & arguments, const Context & context) const;
 
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index 55935a91cde..a20165826bb 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -52,6 +52,7 @@ public:
 
     std::string getName() const override;
     const char * getFamilyName() const override { return "Array"; }
+    TypeIndex getDataType() const override { return TypeIndex::Array; }
     MutableColumnPtr cloneResized(size_t size) const override;
     size_t size() const override;
     Field operator[](size_t n) const override;
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index 5fc96b14be8..02dfcc5b620 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -50,6 +50,11 @@ public:
         return "Const";
     }
 
+    TypeIndex getDataType() const override
+    {
+        return data->getDataType();
+    }
+
     MutableColumnPtr cloneResized(size_t new_size) const override
     {
         return ColumnConst::create(data, new_size);
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index 1c238cc6458..3e6fb833b56 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -333,17 +333,6 @@ void ColumnDecimal<T>::getExtremes(Field & min, Field & max) const
     max = NearestFieldType<T>(cur_max, scale);
 }
 
-TypeIndex columnDecimalDataType(const IColumn * column)
-{
-    if (checkColumn<ColumnDecimal<Decimal32>>(column))
-        return TypeIndex::Decimal32;
-    else if (checkColumn<ColumnDecimal<Decimal64>>(column))
-        return TypeIndex::Decimal64;
-    else if (checkColumn<ColumnDecimal<Decimal128>>(column))
-        return TypeIndex::Decimal128;
-    return TypeIndex::Nothing;
-}
-
 template class ColumnDecimal<Decimal32>;
 template class ColumnDecimal<Decimal64>;
 template class ColumnDecimal<Decimal128>;
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 16c6a47c30a..37d85b05d4c 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -81,6 +81,7 @@ private:
 
 public:
     const char * getFamilyName() const override { return TypeName<T>::get(); }
+    TypeIndex getDataType() const override { return TypeId<T>::value; }
 
     bool isNumeric() const override { return false; }
     bool canBeInsideNullable() const override { return true; }
@@ -197,6 +198,4 @@ ColumnPtr ColumnDecimal<T>::indexImpl(const PaddedPODArray<Type> & indexes, size
     return res;
 }
 
-TypeIndex columnDecimalDataType(const IColumn * column);
-
 }
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 996a1f99ef1..6b7f1ecf793 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -43,6 +43,7 @@ private:
 public:
     std::string getName() const override { return "FixedString(" + std::to_string(n) + ")"; }
     const char * getFamilyName() const override { return "FixedString"; }
+    TypeIndex getDataType() const override { return TypeIndex::FixedString; }
 
     MutableColumnPtr cloneResized(size_t size) const override;
 
diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h
index 31cb8708a6e..267f3c7285a 100644
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@@ -29,6 +29,7 @@ private:
 
 public:
     const char * getFamilyName() const override { return "Function"; }
+    TypeIndex getDataType() const override { return TypeIndex::Function; }
 
     MutableColumnPtr cloneResized(size_t size) const override;
 
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index 905d15f8167..1e6319a2cb1 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -39,6 +39,7 @@ public:
 
     std::string getName() const override { return "ColumnLowCardinality"; }
     const char * getFamilyName() const override { return "ColumnLowCardinality"; }
+    TypeIndex getDataType() const override { return TypeIndex::LowCardinality; }
 
     ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); }
     ColumnPtr convertToFullColumnIfLowCardinality() const override { return convertToFullColumn(); }
diff --git a/src/Columns/ColumnNothing.h b/src/Columns/ColumnNothing.h
index 691143e2c15..c2738bb4cdc 100644
--- a/src/Columns/ColumnNothing.h
+++ b/src/Columns/ColumnNothing.h
@@ -21,6 +21,7 @@ private:
 public:
     const char * getFamilyName() const override { return "Nothing"; }
     MutableColumnPtr cloneDummy(size_t s_) const override { return ColumnNothing::create(s_); }
+    TypeIndex getDataType() const override { return TypeIndex::Nothing; }
 
     bool canBeInsideNullable() const override { return true; }
 
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 2cd8ff9f40f..a8f226ed37d 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -45,6 +45,7 @@ public:
 
     const char * getFamilyName() const override { return "Nullable"; }
     std::string getName() const override { return "Nullable(" + nested_column->getName() + ")"; }
+    TypeIndex getDataType() const override { return TypeIndex::Nullable; }
     MutableColumnPtr cloneResized(size_t size) const override;
     size_t size() const override { return nested_column->size(); }
     bool isNullAt(size_t n) const override { return assert_cast<const ColumnUInt8 &>(*null_map).getData()[n] != 0;}
diff --git a/src/Columns/ColumnSet.h b/src/Columns/ColumnSet.h
index b30ba86fafe..316f8196e5a 100644
--- a/src/Columns/ColumnSet.h
+++ b/src/Columns/ColumnSet.h
@@ -25,6 +25,7 @@ private:
 
 public:
     const char * getFamilyName() const override { return "Set"; }
+    TypeIndex getDataType() const override { return TypeIndex::Set; }
     MutableColumnPtr cloneDummy(size_t s_) const override { return ColumnSet::create(s_, data); }
 
     ConstSetPtr getData() const { return data; }
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index a0b3d259b67..f067bce47bc 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -56,6 +56,7 @@ private:
 
 public:
     const char * getFamilyName() const override { return "String"; }
+    TypeIndex getDataType() const override { return TypeIndex::String; }
 
     size_t size() const override
     {
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index 69b18e2fc0f..33c48a0cdd1 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -40,6 +40,7 @@ public:
 
     std::string getName() const override;
     const char * getFamilyName() const override { return "Tuple"; }
+    TypeIndex getDataType() const override { return TypeIndex::Tuple; }
 
     MutableColumnPtr cloneEmpty() const override;
     MutableColumnPtr cloneResized(size_t size) const override;
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index 95efd0dedad..50f1dba4fdb 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -289,13 +289,6 @@ void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_dire
     equal_range = std::move(new_ranges);
 }
 
-
-template <typename T>
-const char * ColumnVector<T>::getFamilyName() const
-{
-    return TypeName<T>::get();
-}
-
 template <typename T>
 MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
 {
@@ -517,33 +510,6 @@ void ColumnVector<T>::getExtremes(Field & min, Field & max) const
     max = NearestFieldType<T>(cur_max);
 }
 
-TypeIndex columnVectorDataType(const IColumn * column)
-{
-    if (checkColumn<ColumnVector<UInt8>>(column))
-        return TypeIndex::UInt8;
-    else if (checkColumn<ColumnVector<UInt16>>(column))
-        return TypeIndex::UInt16;
-    else if (checkColumn<ColumnVector<UInt32>>(column))
-        return TypeIndex::UInt32;
-    else if (checkColumn<ColumnVector<UInt64>>(column))
-        return TypeIndex::UInt64;
-    else if (checkColumn<ColumnVector<Int8>>(column))
-        return TypeIndex::Int8;
-    else if (checkColumn<ColumnVector<Int16>>(column))
-        return TypeIndex::Int16;
-    else if (checkColumn<ColumnVector<Int32>>(column))
-        return TypeIndex::Int32;
-    else if (checkColumn<ColumnVector<Int64>>(column))
-        return TypeIndex::Int64;
-    else if (checkColumn<ColumnVector<Int128>>(column))
-        return TypeIndex::Int128;
-    else if (checkColumn<ColumnVector<Float32>>(column))
-        return TypeIndex::Float32;
-    else if (checkColumn<ColumnVector<Float64>>(column))
-        return TypeIndex::Float64;
-    return TypeIndex::Nothing;
-}
-
 /// Explicit template instantiations - to avoid code bloat in headers.
 template class ColumnVector<UInt8>;
 template class ColumnVector<UInt16>;
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 5e934b42df0..b9b14f4b2a1 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -199,7 +199,8 @@ public:
         data.reserve(n);
     }
 
-    const char * getFamilyName() const override;
+    const char * getFamilyName() const override { return TypeName<T>::get(); }
+    TypeIndex getDataType() const override { return TypeId<T>::value; }
 
     MutableColumnPtr cloneResized(size_t size) const override;
 
@@ -320,6 +321,4 @@ ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_
     return res;
 }
 
-TypeIndex columnVectorDataType(const IColumn * column);
-
 }
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 1d92ed1c3ab..c227ec97e3a 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -51,6 +51,9 @@ public:
     /// Name of a Column kind, without parameters (example: FixedString, Array).
     virtual const char * getFamilyName() const = 0;
 
+    /// Type of data that column contains. It's an underlying type: UInt16 for Date, UInt32 for DateTime, so on.
+    virtual TypeIndex getDataType() const = 0;
+
     /** If column isn't constant, returns itself.
       * If column is constant, transforms constant to full column (if column type allows such transform) and return it.
       */
diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h
index af5d9878a3b..693ed18b87e 100644
--- a/src/Columns/IColumnUnique.h
+++ b/src/Columns/IColumnUnique.h
@@ -66,6 +66,7 @@ public:
     virtual UInt128 getHash() const = 0;
 
     const char * getFamilyName() const override { return "ColumnUnique"; }
+    TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); }
 
     void insert(const Field &) override
     {
diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index f1d42e53345..d203a92d4a3 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -150,7 +150,7 @@ public:
         return res;
     }
 
-    /// Get peice of memory with alignment
+    /// Get piece of memory with alignment
     char * alignedAlloc(size_t size, size_t alignment)
     {
         do
diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt
index 44e74fb30b5..a7914fb17ec 100644
--- a/src/Common/Config/CMakeLists.txt
+++ b/src/Common/Config/CMakeLists.txt
@@ -7,12 +7,11 @@ set (SRCS
 
 add_library(clickhouse_common_config ${SRCS})
 
-target_include_directories(clickhouse_common_config PUBLIC ${DBMS_INCLUDE_DIR})
 target_link_libraries(clickhouse_common_config
     PUBLIC
+        clickhouse_common_zookeeper
         common
         Poco::XML
     PRIVATE
-        clickhouse_common_zookeeper
         string_utils
 )
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index c5b4fd0c585..2f530f2f2de 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -4,20 +4,32 @@
 #include <Common/ProfileEvents.h>
 #include <Core/Names.h>
 #include <Core/Types.h>
+#include <Poco/Net/IPAddress.h>
 #include <Poco/Net/DNS.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/NumberParser.h>
-#include <Poco/Logger.h>
-#include <common/logger_useful.h>
 #include <arpa/inet.h>
 #include <atomic>
 #include <optional>
+#include <string_view>
 
 namespace ProfileEvents
 {
     extern Event DNSError;
 }
 
+namespace std
+{
+template<> struct hash<Poco::Net::IPAddress>
+{
+    size_t operator()(const Poco::Net::IPAddress & address) const noexcept
+    {
+        std::string_view addr(static_cast<const char *>(address.addr()), address.length());
+        std::hash<std::string_view> hash_impl;
+        return hash_impl(addr);
+    }
+};
+}
 
 namespace DB
 {
@@ -25,6 +37,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
+    extern const int DNS_ERROR;
 }
 
 
@@ -76,16 +89,48 @@ static void splitHostAndPort(const std::string & host_and_port, std::string & ou
     }
 }
 
-static Poco::Net::IPAddress resolveIPAddressImpl(const std::string & host)
+static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
 {
+    Poco::Net::IPAddress ip;
+
     /// NOTE: Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
-    /// Therefore we use SocketAddress constructor with dummy port to resolve IP
-    return Poco::Net::SocketAddress(host, 0U).host();
+    if (Poco::Net::IPAddress::tryParse(host, ip))
+        return DNSResolver::IPAddresses(1, ip);
+
+    /// Family: AF_UNSPEC
+    /// AI_ALL is required for checking if client is allowed to connect from an address
+    auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL;
+    /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured.
+    /// It should not affect client address checking, since client cannot connect from IPv6 address
+    /// if server has no IPv6 addresses.
+    flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;
+#if defined(ARCADIA_BUILD)
+    auto addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
+#else
+    auto addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
+#endif
+    if (addresses.empty())
+        throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR);
+
+    return addresses;
+}
+
+static String reverseResolveImpl(const Poco::Net::IPAddress & address)
+{
+    Poco::Net::SocketAddress sock_addr(address, 0);
+
+    /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...)
+    char host[1024];
+    int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
+    if (err)
+        throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
+    return host;
 }
 
 struct DNSResolver::Impl
 {
     SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host;
+    SimpleCache<decltype(reverseResolveImpl), &reverseResolveImpl> cache_address;
 
     std::mutex drop_mutex;
     std::mutex update_mutex;
@@ -95,18 +140,25 @@ struct DNSResolver::Impl
 
     /// Store hosts, which was asked to resolve from last update of DNS cache.
     NameSet new_hosts;
+    std::unordered_set<Poco::Net::IPAddress> new_addresses;
 
     /// Store all hosts, which was whenever asked to resolve
     NameSet known_hosts;
+    std::unordered_set<Poco::Net::IPAddress> known_addresses;
 
     /// If disabled, will not make cache lookups, will resolve addresses manually on each call
     std::atomic<bool> disable_cache{false};
 };
 
 
-DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()) {}
+DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(&Poco::Logger::get("DNSResolver")) {}
 
 Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
+{
+    return resolveHostAll(host).front();
+}
+
+DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
 {
     if (impl->disable_cache)
         return resolveIPAddressImpl(host);
@@ -125,7 +177,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an
     splitHostAndPort(host_and_port, host, port);
 
     addToNewHosts(host);
-    return Poco::Net::SocketAddress(impl->cache_host(host), port);
+    return Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
 }
 
 Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port)
@@ -134,17 +186,29 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U
         return Poco::Net::SocketAddress(host, port);
 
     addToNewHosts(host);
-    return  Poco::Net::SocketAddress(impl->cache_host(host), port);
+    return  Poco::Net::SocketAddress(impl->cache_host(host).front(), port);
+}
+
+String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address)
+{
+    if (impl->disable_cache)
+        return reverseResolveImpl(address);
+
+    addToNewAddresses(address);
+    return impl->cache_address(address);
 }
 
 void DNSResolver::dropCache()
 {
     impl->cache_host.drop();
+    impl->cache_address.drop();
 
     std::scoped_lock lock(impl->update_mutex, impl->drop_mutex);
 
     impl->known_hosts.clear();
+    impl->known_addresses.clear();
     impl->new_hosts.clear();
+    impl->new_addresses.clear();
     impl->host_name.reset();
 }
 
@@ -166,34 +230,27 @@ String DNSResolver::getHostName()
     return *impl->host_name;
 }
 
-bool DNSResolver::updateCache()
+static const String & cacheElemToString(const String & str) { return str; }
+static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr.toString(); }
+
+template<typename UpdateF, typename ElemsT>
+bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg)
 {
-    {
-        std::lock_guard lock(impl->drop_mutex);
-        for (const auto & host : impl->new_hosts)
-            impl->known_hosts.insert(host);
-        impl->new_hosts.clear();
-
-        impl->host_name.emplace(Poco::Net::DNS::hostName());
-    }
-
-    std::lock_guard lock(impl->update_mutex);
-
     bool updated = false;
-    String lost_hosts;
-    for (const auto & host : impl->known_hosts)
+    String lost_elems;
+    for (const auto & elem : elems)
     {
         try
         {
-            updated |= updateHost(host);
+            updated |= (this->*update_func)(elem);
         }
         catch (const Poco::Net::NetException &)
         {
             ProfileEvents::increment(ProfileEvents::DNSError);
 
-            if (!lost_hosts.empty())
-                lost_hosts += ", ";
-            lost_hosts += host;
+            if (!lost_elems.empty())
+                lost_elems += ", ";
+            lost_elems += cacheElemToString(elem);
         }
         catch (...)
         {
@@ -201,12 +258,41 @@ bool DNSResolver::updateCache()
         }
     }
 
-    if (!lost_hosts.empty())
-        LOG_INFO(&Poco::Logger::get("DNSResolver"), "Cached hosts not found: {}", lost_hosts);
+    if (!lost_elems.empty())
+        LOG_INFO(log, log_msg, lost_elems);
 
     return updated;
 }
 
+bool DNSResolver::updateCache()
+{
+    LOG_DEBUG(log, "Updating DNS cache");
+
+    {
+        std::lock_guard lock(impl->drop_mutex);
+
+        for (const auto & host : impl->new_hosts)
+            impl->known_hosts.insert(host);
+        impl->new_hosts.clear();
+
+        for (const auto & address : impl->new_addresses)
+            impl->known_addresses.insert(address);
+        impl->new_addresses.clear();
+
+        impl->host_name.emplace(Poco::Net::DNS::hostName());
+    }
+
+    /// FIXME Updating may take a long time becouse we cannot manage timeouts of getaddrinfo(...) and getnameinfo(...).
+    /// DROP DNS CACHE will wait on update_mutex (possibly while holding drop_mutex)
+    std::lock_guard lock(impl->update_mutex);
+
+    bool hosts_updated = updateCacheImpl(&DNSResolver::updateHost, impl->known_hosts, "Cached hosts not found: {}");
+    updateCacheImpl(&DNSResolver::updateAddress, impl->known_addresses, "Cached addresses not found: {}");
+
+    LOG_DEBUG(log, "Updated DNS cache");
+    return hosts_updated;
+}
+
 bool DNSResolver::updateHost(const String & host)
 {
     /// Usage of updateHost implies that host is already in cache and there is no extra computations
@@ -215,12 +301,25 @@ bool DNSResolver::updateHost(const String & host)
     return old_value != impl->cache_host(host);
 }
 
+bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address)
+{
+    auto old_value = impl->cache_address(address);
+    impl->cache_address.update(address);
+    return old_value == impl->cache_address(address);
+}
+
 void DNSResolver::addToNewHosts(const String & host)
 {
     std::lock_guard lock(impl->drop_mutex);
     impl->new_hosts.insert(host);
 }
 
+void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address)
+{
+    std::lock_guard lock(impl->drop_mutex);
+    impl->new_addresses.insert(address);
+}
+
 DNSResolver::~DNSResolver() = default;
 
 DNSResolver & DNSResolver::instance()
diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h
index 7dfbe49ab77..7dbc2852d43 100644
--- a/src/Common/DNSResolver.h
+++ b/src/Common/DNSResolver.h
@@ -5,6 +5,7 @@
 #include <Core/Types.h>
 #include <Core/Names.h>
 #include <boost/noncopyable.hpp>
+#include <common/logger_useful.h>
 
 
 namespace DB
@@ -16,18 +17,26 @@ namespace DB
 class DNSResolver : private boost::noncopyable
 {
 public:
+    typedef std::vector<Poco::Net::IPAddress> IPAddresses;
+
     static DNSResolver & instance();
 
     DNSResolver(const DNSResolver &) = delete;
 
-    /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolve its IP
+    /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves its IP
     Poco::Net::IPAddress resolveHost(const std::string & host);
 
-    /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolve its IP and port
+    /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs
+    IPAddresses resolveHostAll(const std::string & host);
+
+    /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port
     Poco::Net::SocketAddress resolveAddress(const std::string & host_and_port);
 
     Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);
 
+    /// Accepts host IP and resolves its host name
+    String reverseResolve(const Poco::Net::IPAddress & address);
+
     /// Get this server host name
     String getHostName();
 
@@ -44,16 +53,21 @@ public:
     ~DNSResolver();
 
 private:
+    template<typename UpdateF, typename ElemsT>
+    bool updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg);
 
     DNSResolver();
 
     struct Impl;
     std::unique_ptr<Impl> impl;
+    Poco::Logger * log;
 
-    /// Returns true if IP of host has been changed.
+    /// Updates cached value and returns true it has been changed.
     bool updateHost(const String & host);
+    bool updateAddress(const Poco::Net::IPAddress & address);
 
     void addToNewHosts(const String & host);
+    void addToNewAddresses(const Poco::Net::IPAddress & address);
 };
 
 }
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 961130d320f..694f0979f63 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -341,7 +341,6 @@ namespace ErrorCodes
     extern const int OUTPUT_IS_NOT_SORTED = 365;
     extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT = 366;
     extern const int TOO_MANY_FETCHES = 367;
-    extern const int BAD_CAST = 368;
     extern const int ALL_REPLICAS_ARE_STALE = 369;
     extern const int DATA_TYPE_CANNOT_BE_USED_IN_TABLES = 370;
     extern const int INCONSISTENT_CLUSTER_DEFINITION = 371;
@@ -398,7 +397,6 @@ namespace ErrorCodes
     extern const int CANNOT_GETTIMEOFDAY = 423;
     extern const int CANNOT_LINK = 424;
     extern const int SYSTEM_ERROR = 425;
-    extern const int NULL_POINTER_DEREFERENCE = 426;
     extern const int CANNOT_COMPILE_REGEXP = 427;
     extern const int UNKNOWN_LOG_LEVEL = 428;
     extern const int FAILED_TO_GETPWUID = 429;
@@ -458,7 +456,6 @@ namespace ErrorCodes
     extern const int TOO_MANY_REDIRECTS = 483;
     extern const int INTERNAL_REDIS_ERROR = 484;
     extern const int SCALAR_ALREADY_EXISTS = 485;
-    extern const int UNKNOWN_SCALAR = 486;
     extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 487;
     extern const int UNKNOWN_DICTIONARY = 488;
     extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index b0c897127c6..f2470ea0406 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -10,6 +10,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadBufferFromFile.h>
 #include <common/demangle.h>
+#include <common/errnoToString.h>
 #include <Common/formatReadable.h>
 #include <Common/filesystemHelpers.h>
 #include <filesystem>
@@ -85,31 +86,6 @@ std::string Exception::getStackTraceString() const
 }
 
 
-std::string errnoToString(int code, int the_errno)
-{
-    const size_t buf_size = 128;
-    char buf[buf_size];
-#ifndef _GNU_SOURCE
-    int rc = strerror_r(the_errno, buf, buf_size);
-#ifdef __APPLE__
-    if (rc != 0 && rc != EINVAL)
-#else
-    if (rc != 0)
-#endif
-    {
-        std::string tmp = std::to_string(code);
-        const char * code_str = tmp.c_str();
-        const char * unknown_message = "Unknown error ";
-        strcpy(buf, unknown_message);
-        strcpy(buf + strlen(unknown_message), code_str);
-    }
-    return "errno: " + toString(the_errno) + ", strerror: " + std::string(buf);
-#else
-    (void)code;
-    return "errno: " + toString(the_errno) + ", strerror: " + std::string(strerror_r(the_errno, buf, sizeof(buf)));
-#endif
-}
-
 void throwFromErrno(const std::string & s, int code, int the_errno)
 {
     throw ErrnoException(s + ", " + errnoToString(code, the_errno), code, the_errno);
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index de63f35f463..763b90048bb 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -8,6 +8,8 @@
 
 #include <Common/StackTrace.h>
 
+#include <fmt/format.h>
+
 namespace Poco { class Logger; }
 
 
@@ -20,8 +22,14 @@ public:
     Exception() = default;
     Exception(const std::string & msg, int code);
 
-    enum CreateFromPocoTag { CreateFromPoco };
-    enum CreateFromSTDTag { CreateFromSTD };
+    // Format message with fmt::format, like the logging functions.
+    template <typename ...Fmt>
+    Exception(int code, Fmt&&... fmt)
+        : Exception(fmt::format(std::forward<Fmt>(fmt)...), code)
+    {}
+
+    struct CreateFromPocoTag {};
+    struct CreateFromSTDTag {};
 
     Exception(CreateFromPocoTag, const Poco::Exception & exc);
     Exception(CreateFromSTDTag, const std::exception & exc);
@@ -73,7 +81,6 @@ private:
 using Exceptions = std::vector<std::exception_ptr>;
 
 
-std::string errnoToString(int code, int the_errno = errno);
 [[noreturn]] void throwFromErrno(const std::string & s, int code, int the_errno = errno);
 /// Useful to produce some extra information about available space and inodes on device
 [[noreturn]] void throwFromErrnoWithPath(const std::string & s, const std::string & path, int code,
diff --git a/src/Common/PipeFDs.cpp b/src/Common/PipeFDs.cpp
index 1f57234534f..d91917c23a4 100644
--- a/src/Common/PipeFDs.cpp
+++ b/src/Common/PipeFDs.cpp
@@ -3,6 +3,7 @@
 #include <Common/formatReadable.h>
 
 #include <common/logger_useful.h>
+#include <common/errnoToString.h>
 
 #include <unistd.h>
 #include <fcntl.h>
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index a75339a644d..8393ea85112 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -180,6 +180,25 @@
     M(OSWriteBytes, "Number of bytes written to disks or block devices. Doesn't include bytes that are in page cache dirty pages. May not include data that was written by OS asynchronously.") \
     M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \
     M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \
+    \
+    M(PerfCpuCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.")  \
+    M(PerfInstructions, "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.") \
+    M(PerfCacheReferences, "Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.") \
+    M(PerfCacheMisses, "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in con‐junction with the PERFCOUNTHWCACHEREFERENCES event to calculate cache miss rates.") \
+    M(PerfBranchInstructions, "Retired branch instructions. Prior to Linux 2.6.35, this used the wrong event on AMD processors.") \
+    M(PerfBranchMisses, "Mispredicted branch instructions.") \
+    M(PerfBusCycles, "Bus cycles, which can be different from total cycles.") \
+    M(PerfStalledCyclesFrontend, "Stalled cycles during issue.") \
+    M(PerfStalledCyclesBackend, "Stalled cycles during retirement.") \
+    M(PerfRefCpuCycles, "Total cycles; not affected by CPU frequency scaling.") \
+    \
+    M(PerfCpuClock, "The CPU clock, a high-resolution per-CPU timer") \
+    M(PerfTaskClock, "A clock count specific to the task that is running") \
+    M(PerfContextSwitches, "Number of context switches") \
+    M(PerfCpuMigrations, "Number of times the process has migrated to a new CPU") \
+    M(PerfAlignmentFaults, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \
+    M(PerfEmulationFaults, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \
+    \
     M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
     \
     M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp
index a8b7d51a260..c4c7d21314d 100644
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@@ -8,6 +8,7 @@
 #include <common/StringRef.h>
 #include <common/logger_useful.h>
 #include <common/phdr_cache.h>
+#include <common/errnoToString.h>
 
 #include <random>
 
diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp
index 1b97ed5689c..53ab2301a0a 100644
--- a/src/Common/ShellCommand.cpp
+++ b/src/Common/ShellCommand.cpp
@@ -6,6 +6,7 @@
 #include <Common/ShellCommand.h>
 #include <Common/PipeFDs.h>
 #include <common/logger_useful.h>
+#include <common/errnoToString.h>
 #include <IO/WriteHelpers.h>
 #include <unistd.h>
 #include <csignal>
diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp
index 758f500e9d2..d228fdb42b6 100644
--- a/src/Common/StatusFile.cpp
+++ b/src/Common/StatusFile.cpp
@@ -8,6 +8,7 @@
 
 #include <Poco/File.h>
 #include <common/logger_useful.h>
+#include <common/errnoToString.h>
 #include <Common/ClickHouseRevision.h>
 #include <common/LocalDateTime.h>
 
diff --git a/src/Common/StringUtils/CMakeLists.txt b/src/Common/StringUtils/CMakeLists.txt
index c63e0f260ba..bd1282a08d5 100644
--- a/src/Common/StringUtils/CMakeLists.txt
+++ b/src/Common/StringUtils/CMakeLists.txt
@@ -6,4 +6,3 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
 add_headers_and_sources(clickhouse_common_stringutils .)
 
 add_library(string_utils ${clickhouse_common_stringutils_headers} ${clickhouse_common_stringutils_sources})
-target_include_directories (string_utils PRIVATE ${DBMS_INCLUDE_DIR})
diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index 42452bf590b..fdc27f7efa3 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -4,9 +4,22 @@
 
 #include "TaskStatsInfoGetter.h"
 #include "ProcfsMetricsProvider.h"
+#include "hasLinuxCapability.h"
 
+#include <filesystem>
+#include <fstream>
 #include <optional>
+#include <sstream>
+#include <unordered_set>
 
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <cerrno>
+#include <sys/types.h>
+#include <dirent.h>
 
 namespace DB
 {
@@ -104,6 +117,404 @@ void TasksStatsCounters::incrementProfileEvents(const ::taskstats & prev, const
     profile_events.increment(ProfileEvents::OSReadBytes, safeDiff(prev.read_bytes, curr.read_bytes));
     profile_events.increment(ProfileEvents::OSWriteBytes, safeDiff(prev.write_bytes, curr.write_bytes));
 }
+
+}
+
+#endif
+
+#if defined(__linux__) && !defined(ARCADIA_BUILD)
+
+namespace DB
+{
+
+thread_local PerfEventsCounters current_thread_counters;
+
+#define SOFTWARE_EVENT(PERF_NAME, LOCAL_NAME) \
+    PerfEventInfo \
+    { \
+        .event_type = perf_type_id::PERF_TYPE_SOFTWARE, \
+        .event_config = (PERF_NAME), \
+        .profile_event = ProfileEvents::LOCAL_NAME, \
+        .settings_name = #LOCAL_NAME \
+    }
+
+#define HARDWARE_EVENT(PERF_NAME, LOCAL_NAME) \
+    PerfEventInfo \
+    { \
+        .event_type = perf_type_id::PERF_TYPE_HARDWARE, \
+        .event_config = (PERF_NAME), \
+        .profile_event = ProfileEvents::LOCAL_NAME, \
+        .settings_name = #LOCAL_NAME \
+    }
+
+// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
+static const PerfEventInfo raw_events_info[] = {
+    HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles),
+    HARDWARE_EVENT(PERF_COUNT_HW_INSTRUCTIONS, PerfInstructions),
+    HARDWARE_EVENT(PERF_COUNT_HW_CACHE_REFERENCES, PerfCacheReferences),
+    HARDWARE_EVENT(PERF_COUNT_HW_CACHE_MISSES, PerfCacheMisses),
+    HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PerfBranchInstructions),
+    HARDWARE_EVENT(PERF_COUNT_HW_BRANCH_MISSES, PerfBranchMisses),
+    HARDWARE_EVENT(PERF_COUNT_HW_BUS_CYCLES, PerfBusCycles),
+    HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, PerfStalledCyclesFrontend),
+    HARDWARE_EVENT(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, PerfStalledCyclesBackend),
+    HARDWARE_EVENT(PERF_COUNT_HW_REF_CPU_CYCLES, PerfRefCpuCycles),
+    // `cpu-clock` is a bit broken according to this: https://stackoverflow.com/a/56967896
+    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_CLOCK, PerfCpuClock),
+    SOFTWARE_EVENT(PERF_COUNT_SW_TASK_CLOCK, PerfTaskClock),
+    SOFTWARE_EVENT(PERF_COUNT_SW_CONTEXT_SWITCHES, PerfContextSwitches),
+    SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations),
+    SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults),
+    SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults)
+};
+
+#undef HARDWARE_EVENT
+#undef SOFTWARE_EVENT
+
+// A map of event name -> event index, to parse event list in settings.
+static std::unordered_map<std::string, size_t> populateEventMap()
+{
+    std::unordered_map<std::string, size_t> name_to_index;
+    name_to_index.reserve(NUMBER_OF_RAW_EVENTS);
+
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        name_to_index.emplace(raw_events_info[i].settings_name, i);
+    }
+
+    return name_to_index;
+}
+
+static const auto event_name_to_index = populateEventMap();
+
+static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, UInt64 flags)
+{
+    return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
+}
+
+static int openPerfEventDisabled(Int32 perf_event_paranoid, bool has_cap_sys_admin, UInt32 perf_event_type, UInt64 perf_event_config)
+{
+    perf_event_attr pe{};
+    pe.type = perf_event_type;
+    pe.size = sizeof(struct perf_event_attr);
+    pe.config = perf_event_config;
+    // disable by default to add as little extra time as possible
+    pe.disabled = 1;
+    // can record kernel only when `perf_event_paranoid` <= 1 or have CAP_SYS_ADMIN
+    pe.exclude_kernel = perf_event_paranoid >= 2 && !has_cap_sys_admin;
+    pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+    return openPerfEvent(&pe, /* measure the calling thread */ 0, /* on any cpu */ -1, -1, 0);
+}
+
+static void enablePerfEvent(int event_fd)
+{
+    if (ioctl(event_fd, PERF_EVENT_IOC_ENABLE, 0))
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Can't enable perf event with file descriptor {}: '{}' ({})",
+            event_fd, strerror(errno), errno);
+    }
+}
+
+static void disablePerfEvent(int event_fd)
+{
+    if (ioctl(event_fd, PERF_EVENT_IOC_DISABLE, 0))
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Can't disable perf event with file descriptor {}: '{}' ({})",
+            event_fd, strerror(errno), errno);
+    }
+}
+
+static void releasePerfEvent(int event_fd)
+{
+    if (close(event_fd))
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Can't close perf event file descriptor {}: {} ({})",
+            event_fd, strerror(errno), errno);
+    }
+}
+
+static bool validatePerfEventDescriptor(int & fd)
+{
+    if (fcntl(fd, F_GETFL) != -1)
+        return true;
+
+    if (errno == EBADF)
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Event descriptor {} was closed from the outside; reopening", fd);
+    }
+    else
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Error while checking availability of event descriptor {}: {} ({})",
+            fd, strerror(errno), errno);
+
+        disablePerfEvent(fd);
+        releasePerfEvent(fd);
+    }
+
+    fd = -1;
+    return false;
+}
+
+bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_events_list)
+{
+    const auto valid_event_indices = eventIndicesFromString(needed_events_list);
+
+    // find state changes (if there are any)
+    bool old_state[NUMBER_OF_RAW_EVENTS];
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+        old_state[i] = thread_events_descriptors_holder.descriptors[i] != -1;
+
+    bool new_state[NUMBER_OF_RAW_EVENTS];
+    std::fill_n(new_state, NUMBER_OF_RAW_EVENTS, false);
+    for (size_t opened_index : valid_event_indices)
+        new_state[opened_index] = true;
+
+    std::vector<size_t> events_to_open;
+    std::vector<size_t> events_to_release;
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        bool old_one = old_state[i];
+        bool new_one = new_state[i];
+
+        if (old_one == new_one)
+        {
+            if (old_one
+                && !validatePerfEventDescriptor(
+                    thread_events_descriptors_holder.descriptors[i]))
+            {
+                events_to_open.push_back(i);
+            }
+            continue;
+        }
+
+        if (new_one)
+            events_to_open.push_back(i);
+        else
+            events_to_release.push_back(i);
+    }
+
+    // release unused descriptors
+    for (size_t i : events_to_release)
+    {
+        int & fd = thread_events_descriptors_holder.descriptors[i];
+        disablePerfEvent(fd);
+        releasePerfEvent(fd);
+        fd = -1;
+    }
+
+    if (events_to_open.empty())
+    {
+        return true;
+    }
+
+    // check permissions
+    // cat /proc/sys/kernel/perf_event_paranoid
+    // -1: Allow use of (almost) all events by all users
+    // >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+    // >=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+    // >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+    // >=3: Disallow all event access by users without CAP_SYS_ADMIN
+    Int32 perf_event_paranoid = 0;
+    std::ifstream paranoid_file("/proc/sys/kernel/perf_event_paranoid");
+    paranoid_file >> perf_event_paranoid;
+
+    bool has_cap_sys_admin = hasLinuxCapability(CAP_SYS_ADMIN);
+    if (perf_event_paranoid >= 3 && !has_cap_sys_admin)
+    {
+        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+            "Not enough permissions to record perf events: "
+            "perf_event_paranoid = {} and CAP_SYS_ADMIN = 0",
+            perf_event_paranoid);
+        return false;
+    }
+
+    // Open descriptors for new events.
+    // Theoretically, we can run out of file descriptors. Threads go up to 10k,
+    // and there might be a dozen perf events per thread, so we're looking at
+    // 100k open files. In practice, this is not likely -- perf events are
+    // mostly used in performance tests or other kinds of testing, and the
+    // number of threads stays below hundred.
+    // We used to check the number of open files by enumerating /proc/self/fd,
+    // but listing all open files before opening more files is obviously
+    // quadratic, and quadraticity never ends well.
+    for (size_t i : events_to_open)
+    {
+        const PerfEventInfo & event_info = raw_events_info[i];
+        int & fd = thread_events_descriptors_holder.descriptors[i];
+        // disable by default to add as little extra time as possible
+        fd = openPerfEventDisabled(perf_event_paranoid, has_cap_sys_admin, event_info.event_type, event_info.event_config);
+
+        if (fd == -1 && errno != ENOENT)
+        {
+            // ENOENT means that the event is not supported. Don't log it, because
+            // this is called for each thread and would be too verbose. Log other
+            // error codes because they might signify an error.
+            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+                "Failed to open perf event {} (event_type={}, event_config={}): "
+                "'{}' ({})", event_info.settings_name, event_info.event_type,
+                event_info.event_config, strerror(errno), errno);
+        }
+    }
+
+    return true;
+}
+
+// Parse comma-separated list of event names. Empty means all available
+// events.
+std::vector<size_t> PerfEventsCounters::eventIndicesFromString(const std::string & events_list)
+{
+    std::vector<size_t> result;
+    result.reserve(NUMBER_OF_RAW_EVENTS);
+
+    if (events_list.empty())
+    {
+        for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+        {
+            result.push_back(i);
+        }
+        return result;
+    }
+
+    std::istringstream iss(events_list);
+    std::string event_name;
+    while (std::getline(iss, event_name, ','))
+    {
+        // Allow spaces at the beginning of the token, so that you can write
+        // 'a, b'.
+        event_name.erase(0, event_name.find_first_not_of(' '));
+
+        auto entry = event_name_to_index.find(event_name);
+        if (entry != event_name_to_index.end())
+        {
+            result.push_back(entry->second);
+        }
+        else
+        {
+            LOG_ERROR(&Poco::Logger::get("PerfEvents"),
+                "Unknown perf event name '{}' specified in settings", event_name);
+        }
+    }
+
+    return result;
+}
+
+void PerfEventsCounters::initializeProfileEvents(const std::string & events_list)
+{
+    if (!processThreadLocalChanges(events_list))
+        return;
+
+    for (int fd : thread_events_descriptors_holder.descriptors)
+    {
+        if (fd == -1)
+            continue;
+
+        // We don't reset the event, because the time_running and time_enabled
+        // can't be reset anyway and we have to calculate deltas.
+        enablePerfEvent(fd);
+    }
+}
+
+void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile_events)
+{
+    // Disable all perf events.
+    for (auto fd : thread_events_descriptors_holder.descriptors)
+    {
+        if (fd == -1)
+            continue;
+        disablePerfEvent(fd);
+    }
+
+    // Read the counter values.
+    PerfEventValue current_values[NUMBER_OF_RAW_EVENTS];
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        int fd = thread_events_descriptors_holder.descriptors[i];
+        if (fd == -1)
+            continue;
+
+        constexpr ssize_t bytes_to_read = sizeof(current_values[0]);
+        const int bytes_read = read(fd, &current_values[i], bytes_to_read);
+
+        if (bytes_read != bytes_to_read)
+        {
+            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
+                "Can't read event value from file descriptor {}: '{}' ({})",
+                fd, strerror(errno), errno);
+            current_values[i] = {};
+        }
+    }
+
+    // actually process counters' values
+    for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
+    {
+        int fd = thread_events_descriptors_holder.descriptors[i];
+        if (fd == -1)
+            continue;
+
+        const PerfEventInfo & info = raw_events_info[i];
+        const PerfEventValue & previous_value = previous_values[i];
+        const PerfEventValue & current_value = current_values[i];
+
+        // Account for counter multiplexing. time_running and time_enabled are
+        // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate
+        // deltas from old values.
+        const UInt64 delta = (current_value.value - previous_value.value)
+            * (current_value.time_enabled - previous_value.time_enabled)
+            / std::max(1.f,
+                float(current_value.time_running - previous_value.time_running));
+
+        profile_events.increment(info.profile_event, delta);
+    }
+
+    // Store current counter values for the next profiling period.
+    memcpy(previous_values, current_values, sizeof(current_values));
+}
+
+void PerfEventsCounters::closeEventDescriptors()
+{
+    thread_events_descriptors_holder.releaseResources();
+}
+
+PerfDescriptorsHolder::PerfDescriptorsHolder()
+{
+    for (int & descriptor : descriptors)
+        descriptor = -1;
+}
+
+PerfDescriptorsHolder::~PerfDescriptorsHolder()
+{
+    releaseResources();
+}
+
+void PerfDescriptorsHolder::releaseResources()
+{
+    for (int & descriptor : descriptors)
+    {
+        if (descriptor == -1)
+            continue;
+
+        disablePerfEvent(descriptor);
+        releasePerfEvent(descriptor);
+        descriptor = -1;
+    }
+}
+
+}
+
+#else
+
+namespace DB
+{
+
+// Not on Linux or in Arcadia: the functionality is disabled.
+PerfEventsCounters current_thread_counters;
+
 }
 
 #endif
diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h
index 038e04c4955..b6281234214 100644
--- a/src/Common/ThreadProfileEvents.h
+++ b/src/Common/ThreadProfileEvents.h
@@ -5,6 +5,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <pthread.h>
+#include <common/logger_useful.h>
 
 
 #if defined(__linux__)
@@ -34,6 +35,24 @@ namespace ProfileEvents
     extern const Event OSWriteChars;
     extern const Event OSReadBytes;
     extern const Event OSWriteBytes;
+
+    extern const Event PerfCpuCycles;
+    extern const Event PerfInstructions;
+    extern const Event PerfCacheReferences;
+    extern const Event PerfCacheMisses;
+    extern const Event PerfBranchInstructions;
+    extern const Event PerfBranchMisses;
+    extern const Event PerfBusCycles;
+    extern const Event PerfStalledCyclesFrontend;
+    extern const Event PerfStalledCyclesBackend;
+    extern const Event PerfRefCpuCycles;
+
+    extern const Event PerfCpuClock;
+    extern const Event PerfTaskClock;
+    extern const Event PerfContextSwitches;
+    extern const Event PerfCpuMigrations;
+    extern const Event PerfAlignmentFaults;
+    extern const Event PerfEmulationFaults;
 #endif
 }
 
@@ -116,6 +135,78 @@ struct RUsageCounters
     }
 };
 
+// thread_local is disabled in Arcadia, so we have to use a dummy implementation
+// there.
+#if defined(__linux__) && !defined(ARCADIA_BUILD)
+
+struct PerfEventInfo
+{
+    // see perf_event.h/perf_type_id enum
+    int event_type;
+    // see configs in perf_event.h
+    int event_config;
+    ProfileEvents::Event profile_event;
+    std::string settings_name;
+};
+
+struct PerfEventValue
+{
+    UInt64 value = 0;
+    UInt64 time_enabled = 0;
+    UInt64 time_running = 0;
+};
+
+static constexpr size_t NUMBER_OF_RAW_EVENTS = 16;
+
+struct PerfDescriptorsHolder : boost::noncopyable
+{
+    int descriptors[NUMBER_OF_RAW_EVENTS]{};
+
+    PerfDescriptorsHolder();
+
+    ~PerfDescriptorsHolder();
+
+    void releaseResources();
+};
+
+struct PerfEventsCounters
+{
+    PerfDescriptorsHolder thread_events_descriptors_holder;
+
+    // time_enabled and time_running can't be reset, so we have to store the
+    // data from the previous profiling period and calculate deltas to them,
+    // to be able to properly account for counter multiplexing.
+    PerfEventValue previous_values[NUMBER_OF_RAW_EVENTS]{};
+
+
+    void initializeProfileEvents(const std::string & events_list);
+    void finalizeProfileEvents(ProfileEvents::Counters & profile_events);
+    void closeEventDescriptors();
+    bool processThreadLocalChanges(const std::string & needed_events_list);
+
+
+    static std::vector<size_t> eventIndicesFromString(const std::string & events_list);
+};
+
+// Perf event creation is moderately heavy, so we create them once per thread and
+// then reuse.
+extern thread_local PerfEventsCounters current_thread_counters;
+
+#else
+
+// Not on Linux, or in Arcadia: the functionality is disabled.
+struct PerfEventsCounters
+{
+    void initializeProfileEvents(const std::string & /* events_list */) {}
+    void finalizeProfileEvents(ProfileEvents::Counters & /* profile_events */) {}
+    void closeEventDescriptors() {}
+};
+
+// thread_local is disabled in Arcadia, so we are going to use a static dummy.
+extern PerfEventsCounters current_thread_counters;
+
+#endif
+
 #if defined(__linux__)
 
 class TasksStatsCounters
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 682a4b0a412..ddb0b96df0e 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -1,6 +1,5 @@
 #include <sstream>
 
-#include <Common/CurrentThread.h>
 #include <Common/Exception.h>
 #include <Common/ThreadProfileEvents.h>
 #include <Common/QueryProfiler.h>
@@ -57,36 +56,6 @@ ThreadStatus::~ThreadStatus()
     current_thread = nullptr;
 }
 
-void ThreadStatus::initPerformanceCounters()
-{
-    performance_counters_finalized = false;
-
-    /// Clear stats from previous query if a new query is started
-    /// TODO: make separate query_thread_performance_counters and thread_performance_counters
-    performance_counters.resetCounters();
-    memory_tracker.resetCounters();
-    memory_tracker.setDescription("(for thread)");
-
-    query_start_time_nanoseconds = getCurrentTimeNanoseconds();
-    query_start_time = time(nullptr);
-    ++queries_started;
-
-    *last_rusage = RUsageCounters::current(query_start_time_nanoseconds);
-    if (!taskstats)
-    {
-        try
-        {
-            taskstats = TasksStatsCounters::create(thread_id);
-        }
-        catch (...)
-        {
-            tryLogCurrentException(log);
-        }
-    }
-    if (taskstats)
-        taskstats->reset();
-}
-
 void ThreadStatus::updatePerformanceCounters()
 {
     try
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 11cd7628a7d..d0952c3ab28 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -33,6 +33,7 @@ class QueryProfilerCpu;
 class QueryThreadLog;
 class TasksStatsCounters;
 struct RUsageCounters;
+struct PerfEventsCounters;
 class TaskStatsInfoGetter;
 class InternalTextLogsQueue;
 using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>;
diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt
index 90a75f1d9ec..ef32d9266c0 100644
--- a/src/Common/ZooKeeper/CMakeLists.txt
+++ b/src/Common/ZooKeeper/CMakeLists.txt
@@ -5,7 +5,6 @@ add_headers_and_sources(clickhouse_common_zookeeper .)
 add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources})
 
 target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common PRIVATE string_utils)
-target_include_directories(clickhouse_common_zookeeper PUBLIC ${DBMS_INCLUDE_DIR})
 
 if (ENABLE_TESTS)
     add_subdirectory (tests)
diff --git a/src/Common/ZooKeeper/ZooKeeperHolder.cpp b/src/Common/ZooKeeper/ZooKeeperHolder.cpp
index 41a36a51082..ea8a2017e37 100644
--- a/src/Common/ZooKeeper/ZooKeeperHolder.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperHolder.cpp
@@ -5,7 +5,7 @@ namespace DB
 {
     namespace ErrorCodes
     {
-        extern const int NULL_POINTER_DEREFERENCE;
+        extern const int LOGICAL_ERROR;
     }
 }
 
@@ -57,7 +57,7 @@ ZooKeeperHolder::UnstorableZookeeperHandler::UnstorableZookeeperHandler(ZooKeepe
 ZooKeeper * ZooKeeperHolder::UnstorableZookeeperHandler::operator->()
 {
     if (zk_ptr == nullptr)
-        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE);
+        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::LOGICAL_ERROR);
 
     return zk_ptr.get();
 }
@@ -65,20 +65,20 @@ ZooKeeper * ZooKeeperHolder::UnstorableZookeeperHandler::operator->()
 const ZooKeeper * ZooKeeperHolder::UnstorableZookeeperHandler::operator->() const
 {
     if (zk_ptr == nullptr)
-        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE);
+        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::LOGICAL_ERROR);
     return zk_ptr.get();
 }
 
 ZooKeeper & ZooKeeperHolder::UnstorableZookeeperHandler::operator*()
 {
     if (zk_ptr == nullptr)
-        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE);
+        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::LOGICAL_ERROR);
     return *zk_ptr;
 }
 
 const ZooKeeper & ZooKeeperHolder::UnstorableZookeeperHandler::operator*() const
 {
     if (zk_ptr == nullptr)
-        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE);
+        throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::LOGICAL_ERROR);
     return *zk_ptr;
 }
diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h
index 7f9a19805bb..b70068b8e81 100644
--- a/src/Common/assert_cast.h
+++ b/src/Common/assert_cast.h
@@ -13,7 +13,7 @@ namespace DB
 {
     namespace ErrorCodes
     {
-        extern const int BAD_CAST;
+        extern const int LOGICAL_ERROR;
     }
 }
 
@@ -41,11 +41,11 @@ To assert_cast(From && from)
     }
     catch (const std::exception & e)
     {
-        throw DB::Exception(e.what(), DB::ErrorCodes::BAD_CAST);
+        throw DB::Exception(e.what(), DB::ErrorCodes::LOGICAL_ERROR);
     }
 
     throw DB::Exception("Bad cast from type " + demangle(typeid(from).name()) + " to " + demangle(typeid(To).name()),
-                        DB::ErrorCodes::BAD_CAST);
+                        DB::ErrorCodes::LOGICAL_ERROR);
 #else
     return static_cast<To>(from);
 #endif
diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt
index b68e71c0b43..2653ab30c29 100644
--- a/src/Common/tests/CMakeLists.txt
+++ b/src/Common/tests/CMakeLists.txt
@@ -26,7 +26,6 @@ add_executable (int_hashes_perf int_hashes_perf.cpp)
 target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io)
 
 add_executable (simple_cache simple_cache.cpp)
-target_include_directories (simple_cache PRIVATE ${DBMS_INCLUDE_DIR})
 target_link_libraries (simple_cache PRIVATE common)
 
 add_executable (compact_array compact_array.cpp)
diff --git a/src/Common/typeid_cast.h b/src/Common/typeid_cast.h
index 29ad2e520c0..f28271fb53b 100644
--- a/src/Common/typeid_cast.h
+++ b/src/Common/typeid_cast.h
@@ -15,7 +15,7 @@ namespace DB
 {
     namespace ErrorCodes
     {
-        extern const int BAD_CAST;
+        extern const int LOGICAL_ERROR;
     }
 }
 
@@ -34,11 +34,11 @@ std::enable_if_t<std::is_reference_v<To>, To> typeid_cast(From & from)
     }
     catch (const std::exception & e)
     {
-        throw DB::Exception(e.what(), DB::ErrorCodes::BAD_CAST);
+        throw DB::Exception(e.what(), DB::ErrorCodes::LOGICAL_ERROR);
     }
 
     throw DB::Exception("Bad cast from type " + demangle(typeid(from).name()) + " to " + demangle(typeid(To).name()),
-                        DB::ErrorCodes::BAD_CAST);
+                        DB::ErrorCodes::LOGICAL_ERROR);
 }
 
 
@@ -54,7 +54,7 @@ std::enable_if_t<std::is_pointer_v<To>, To> typeid_cast(From * from)
     }
     catch (const std::exception & e)
     {
-        throw DB::Exception(e.what(), DB::ErrorCodes::BAD_CAST);
+        throw DB::Exception(e.what(), DB::ErrorCodes::LOGICAL_ERROR);
     }
 }
 
@@ -71,6 +71,6 @@ std::enable_if_t<ext::is_shared_ptr_v<To>, To> typeid_cast(const std::shared_ptr
     }
     catch (const std::exception & e)
     {
-        throw DB::Exception(e.what(), DB::ErrorCodes::BAD_CAST);
+        throw DB::Exception(e.what(), DB::ErrorCodes::LOGICAL_ERROR);
     }
 }
diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp
index 2369e2ca232..6c7cf92a41d 100644
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@@ -166,6 +166,9 @@ void registerCodecDelta(CompressionCodecFactory & factory)
 
             const auto children = arguments->children;
             const auto * literal = children[0]->as<ASTLiteral>();
+            if (!literal)
+                throw Exception("Delta codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+
             size_t user_bytes_size = literal->value.safeGet<UInt64>();
             if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8)
                 throw Exception("Delta value for delta codec can be 1, 2, 4 or 8, given " + toString(user_bytes_size), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index 95fa51d1bd0..19f2dc11e85 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -166,6 +166,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
         throw Exception("Cannot compress, data size " + toString(source_size)
                         + " is not aligned to " + toString(sizeof(ValueType)), ErrorCodes::CANNOT_COMPRESS);
     const char * source_end = source + source_size;
+    const char * dest_start = dest;
 
     const UInt32 items_count = source_size / sizeof(ValueType);
     unalignedStore<UInt32>(dest, items_count);
@@ -229,7 +230,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 
     writer.flush();
 
-    return sizeof(items_count) + sizeof(prev_value) + sizeof(prev_delta) + writer.count() / 8;
+    return (dest - dest_start) + (writer.count() + 7) / 8;
 }
 
 template <typename ValueType>
@@ -237,7 +238,6 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
 {
     static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
     using UnsignedDeltaType = ValueType;
-    using SignedDeltaType = typename std::make_signed<UnsignedDeltaType>::type;
 
     const char * source_end = source + source_size;
 
@@ -286,12 +286,13 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
         if (write_spec.data_bits != 0)
         {
             const UInt8 sign = reader.readBit();
-            SignedDeltaType signed_dd = static_cast<SignedDeltaType>(reader.readBits(write_spec.data_bits - 1) + 1);
+            double_delta = reader.readBits(write_spec.data_bits - 1) + 1;
             if (sign)
             {
-                signed_dd *= -1;
+                /// It's well defined for unsigned data types.
+                /// In constrast, it's undefined to do negation of the most negative signed number due to overflow.
+                double_delta = -double_delta;
             }
-            double_delta = static_cast<UnsignedDeltaType>(signed_dd);
         }
 
         const UnsignedDeltaType delta = double_delta + prev_delta;
diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp
index 5782da791a1..7ba128cfe4e 100644
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@@ -90,6 +90,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest,
     if (source_size % sizeof(T) != 0)
         throw Exception("Cannot compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
     const char * source_end = source + source_size;
+    const char * dest_start = dest;
     const char * dest_end = dest + dest_size;
 
     const UInt32 items_count = source_size / sizeof(T);
@@ -145,7 +146,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest,
 
     writer.flush();
 
-    return sizeof(items_count) + sizeof(prev_value) + writer.count() / 8;
+    return (dest - dest_start) + (writer.count() + 7) / 8;
 }
 
 template <typename T>
diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp
index cf8f8e976ea..32c3958e65e 100644
--- a/src/Compression/CompressionCodecLZ4.cpp
+++ b/src/Compression/CompressionCodecLZ4.cpp
@@ -19,6 +19,7 @@ namespace ErrorCodes
 {
 extern const int CANNOT_COMPRESS;
 extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
+extern const int ILLEGAL_CODEC_PARAMETER;
 }
 
 
@@ -84,6 +85,9 @@ void registerCodecLZ4HC(CompressionCodecFactory & factory)
 
             const auto children = arguments->children;
             const auto * literal = children[0]->as<ASTLiteral>();
+            if (!literal)
+                throw Exception("LZ4HC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+
             level = literal->value.safeGet<UInt64>();
         }
 
diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp
index a9dc5de59ad..f1030d87ddd 100644
--- a/src/Compression/CompressionCodecZSTD.cpp
+++ b/src/Compression/CompressionCodecZSTD.cpp
@@ -74,6 +74,9 @@ void registerCodecZSTD(CompressionCodecFactory & factory)
 
             const auto children = arguments->children;
             const auto * literal = children[0]->as<ASTLiteral>();
+            if (!literal)
+                throw Exception("ZSTD codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+
             level = literal->value.safeGet<UInt64>();
             if (level > ZSTD_maxCLevel())
                 throw Exception("ZSTD codec can't have level more that " + toString(ZSTD_maxCLevel()) + ", given " + toString(level), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp
index 64e6051b8d5..3c7766ba508 100644
--- a/src/Compression/ICompressionCodec.cpp
+++ b/src/Compression/ICompressionCodec.cpp
@@ -21,6 +21,8 @@ namespace ErrorCodes
 
 UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const
 {
+    assert(source != nullptr && dest != nullptr);
+
     dest[0] = getMethodByte();
     UInt8 header_size = getHeaderSize();
     /// Write data from header_size
@@ -33,8 +35,9 @@ UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char
 
 UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, char * dest) const
 {
-    UInt8 header_size = getHeaderSize();
+    assert(source != nullptr && dest != nullptr);
 
+    UInt8 header_size = getHeaderSize();
     if (source_size < header_size)
         throw Exception("Can't decompress data: the compressed data size (" + toString(source_size)
             + ", this should include header size) is less than the header size (" + toString(header_size) + ")", ErrorCodes::CORRUPTED_DATA);
diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp
index b416b14efb4..dc33fc50252 100644
--- a/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/src/Compression/tests/gtest_compressionCodec.cpp
@@ -220,7 +220,7 @@ template <typename T, typename ContainerLeft, typename ContainerRight>
 
     if (l_size != r_size)
     {
-        result = ::testing::AssertionFailure() << "size mismatch expected: " << l_size << " got:" << r_size;
+        result = ::testing::AssertionFailure() << "size mismatch, expected: " << l_size << " got:" << r_size;
     }
     if (l_size == 0 || r_size == 0)
     {
@@ -403,11 +403,6 @@ CodecTestSequence generateSeq(Generator gen, const char* gen_name, B Begin = 0,
     {
         const T v = gen(static_cast<T>(i));
 
-//        if constexpr (debug_log_items)
-//        {
-//            std::cerr << "#" << i << " " << type_name<T>() << "(" << sizeof(T) << " bytes) : " << v << std::endl;
-//        }
-
         unalignedStore<T>(write_pos, v);
         write_pos += sizeof(v);
     }
@@ -483,6 +478,7 @@ void testTranscoding(Timer & timer, ICompressionCodec & codec, const CodecTestSe
 
     timer.start();
 
+    assert(source_data.data() != nullptr); // Codec assumes that source buffer is not null.
     const UInt32 encoded_size = codec.compress(source_data.data(), source_data.size(), encoded.data());
     timer.report("encoding");
 
@@ -800,7 +796,8 @@ std::vector<CodecTestSequence> generatePyramidOfSequences(const size_t sequences
     std::vector<CodecTestSequence> sequences;
     sequences.reserve(sequences_count);
 
-    sequences.push_back(makeSeq<T>()); // sequence of size 0
+    // Don't test against sequence of size 0, since it causes a nullptr source buffer as codec input and produces an error.
+    // sequences.push_back(makeSeq<T>()); // sequence of size 0
     for (size_t i = 1; i < sequences_count; ++i)
     {
         std::string name = generator_name + std::string(" from 0 to ") + std::to_string(i);
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index e1d64a783d3..cd9de5abec3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -28,6 +28,7 @@ class IColumn;
 
 
 /** Settings of query execution.
+  * These settings go to users.xml.
   */
 struct Settings : public SettingsCollection<Settings>
 {
@@ -45,7 +46,7 @@ struct Settings : public SettingsCollection<Settings>
       * A setting is "IMPORTANT" if it affects the results of queries and can't be ignored by older versions.
       */
 
-#define LIST_OF_SETTINGS(M)                                            \
+#define COMMON_SETTINGS(M)                                            \
     M(SettingUInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
     M(SettingUInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
     M(SettingUInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \
@@ -184,40 +185,10 @@ struct Settings : public SettingsCollection<Settings>
     \
     M(SettingString, count_distinct_implementation, "uniqExact", "What aggregate function to use for implementation of count(DISTINCT ...)", 0) \
     \
-    M(SettingBool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
-    M(SettingBool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \
-    \
     M(SettingBool, add_http_cors_header, false, "Write add http CORS header.", 0) \
     \
     M(SettingUInt64, max_http_get_redirects, 0, "Max number of http GET redirects hops allowed. Make sure additional security measures are in place to prevent a malicious server to redirect your requests to unexpected services.", 0) \
     \
-    M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \
-    M(SettingBool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \
-    M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
-    M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \
-    M(SettingBool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
-    M(SettingBool, input_format_null_as_default, false, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
-    \
-    M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
-    M(SettingBool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
-    M(SettingBool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
-    M(SettingURI, format_avro_schema_registry_url, {}, "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
-    \
-    M(SettingBool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
-    \
-    M(SettingBool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
-    \
-    M(SettingBool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
-    \
-    M(SettingUInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
-    M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
-    M(SettingUInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
-    M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \
-    M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \
-    M(SettingString, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
-    M(SettingUInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
-    M(SettingBool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
-    \
     M(SettingBool, use_client_time_zone, false, "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.", 0) \
     \
     M(SettingBool, send_progress_in_http_headers, false, "Send progress notifications using X-ClickHouse-Progress headers. Some clients do not support high amount of HTTP headers (Python requests in particular), so it is disabled by default.", 0) \
@@ -226,9 +197,6 @@ struct Settings : public SettingsCollection<Settings>
     \
     M(SettingBool, fsync_metadata, 1, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
     \
-    M(SettingUInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
-    M(SettingFloat, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
-    \
     M(SettingBool, join_use_nulls, 0, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
     \
     M(SettingJoinStrictness, join_default_strictness, JoinStrictness::ALL, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
@@ -246,23 +214,6 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingMilliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \
     M(SettingMilliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
     \
-    M(SettingString, format_schema, "", "Schema identifier (used by schema-based formats)", 0) \
-    M(SettingString, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
-    M(SettingString, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
-    M(SettingString, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
-    \
-    M(SettingString, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_field_delimiter, "\t", "Delimiter between fields (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_row_before_delimiter, "", "Delimiter before field of the first column (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_row_after_delimiter, "\n", "Delimiter after field of the last column (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_row_between_delimiter, "", "Delimiter between rows (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_result_before_delimiter, "", "Prefix before result set (for CustomSeparated format)", 0) \
-    M(SettingString, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \
-    \
-    M(SettingString, format_regexp, "", "Regular expression (for Regexp format)", 0) \
-    M(SettingString, format_regexp_escaping_rule, "Escaped", "Field escaping rule (for Regexp format)", 0) \
-    M(SettingBool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \
-    \
     M(SettingBool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
     M(SettingSeconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
     M(SettingSeconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
@@ -276,6 +227,8 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, odbc_max_field_size, 1024, "Max size of filed can be read from ODBC dictionary. Long strings are truncated.", 0) \
     M(SettingUInt64, query_profiler_real_time_period_ns, 1000000000, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
     M(SettingUInt64, query_profiler_cpu_time_period_ns, 1000000000, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
+    M(SettingBool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \
+    M(SettingString, metrics_perf_events_list, "", "Comma separated list of perf metrics that will be measured throughout queries' execution. Empty means all events. See PerfEventInfo in sources for the available events.", 0) \
     \
     \
     /** Limits during query execution are part of the settings. \
@@ -358,13 +311,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.", 0) \
     M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.", 0)\
     M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.", 0) \
-    M(SettingChar, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
-    M(SettingBool, format_csv_allow_single_quotes, 1, "If it is set to true, allow strings in single quotes.", 0) \
-    M(SettingBool, format_csv_allow_double_quotes, 1, "If it is set to true, allow strings in double quotes.", 0) \
-    M(SettingBool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
-    M(SettingBool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
     \
-    M(SettingDateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
     M(SettingBool, log_profile_events, true, "Log query performance statistics into the query_log and query_thread_log.", 0) \
     M(SettingBool, log_query_settings, true, "Log query settings into the query_log.", 0) \
     M(SettingBool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \
@@ -385,6 +332,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingBool, enable_debug_queries, false, "Enables debug queries such as AST.", 0) \
     M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \
     M(SettingBool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \
+    M(SettingBool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \
     M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \
     M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \
     M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql' and 'odbc' table functions.", 0) \
@@ -411,7 +359,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingBool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
     M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
     M(SettingUInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
-    M(SettingBool, optimize_arithmetic_operations_in_agr_func, true, "Removing arithmetic operations from aggregation functions", 0) \
+    M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
     M(SettingBool, optimize_if_chain_to_miltiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
     M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
     M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
@@ -441,7 +389,70 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
     M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \
     M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
-    M(SettingBool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
+    \
+    M(SettingBool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0)
+
+#define FORMAT_FACTORY_SETTINGS(M)                                            \
+    M(SettingChar, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
+    M(SettingBool, format_csv_allow_single_quotes, 1, "If it is set to true, allow strings in single quotes.", 0) \
+    M(SettingBool, format_csv_allow_double_quotes, 1, "If it is set to true, allow strings in double quotes.", 0) \
+    M(SettingBool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
+    M(SettingBool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
+    M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \
+    M(SettingBool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \
+    M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
+    M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \
+    M(SettingBool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
+    M(SettingBool, input_format_null_as_default, false, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
+    \
+    M(SettingDateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
+    \
+    M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
+    M(SettingBool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
+    M(SettingBool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
+    M(SettingURI, format_avro_schema_registry_url, {}, "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
+    \
+    M(SettingBool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
+    \
+    M(SettingBool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
+    \
+    M(SettingBool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
+    \
+    M(SettingUInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
+    M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
+    M(SettingUInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
+    M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \
+    M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \
+    M(SettingString, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
+    M(SettingUInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
+    M(SettingBool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \
+    \
+    M(SettingUInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
+    M(SettingFloat, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \
+    \
+    M(SettingString, format_schema, "", "Schema identifier (used by schema-based formats)", 0) \
+    M(SettingString, format_template_resultset, "", "Path to file which contains format string for result set (for Template format)", 0) \
+    M(SettingString, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
+    M(SettingString, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
+    \
+    M(SettingString, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_field_delimiter, "\t", "Delimiter between fields (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_row_before_delimiter, "", "Delimiter before field of the first column (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_row_after_delimiter, "\n", "Delimiter after field of the last column (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_row_between_delimiter, "", "Delimiter between rows (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_result_before_delimiter, "", "Prefix before result set (for CustomSeparated format)", 0) \
+    M(SettingString, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \
+    \
+    M(SettingString, format_regexp, "", "Regular expression (for Regexp format)", 0) \
+    M(SettingString, format_regexp_escaping_rule, "Escaped", "Field escaping rule (for Regexp format)", 0) \
+    M(SettingBool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \
+    \
+    M(SettingBool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
+    M(SettingBool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0)
+
+    #define LIST_OF_SETTINGS(M)    \
+        COMMON_SETTINGS(M)         \
+        FORMAT_FACTORY_SETTINGS(M)
 
     DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)
 
diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h
index edf507f8a1d..4c90cc723bf 100644
--- a/src/Core/SortCursor.h
+++ b/src/Core/SortCursor.h
@@ -63,7 +63,7 @@ struct SortCursorImpl
         for (auto & column_desc : desc)
         {
             if (!column_desc.column_name.empty())
-                throw Exception("SortDesctiption should contain column position if SortCursor was used without header.",
+                throw Exception("SortDescription should contain column position if SortCursor was used without header.",
                         ErrorCodes::LOGICAL_ERROR);
         }
         reset(columns, {});
diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h
index 6cc957cac55..86e4bb573ed 100644
--- a/src/Core/SortDescription.h
+++ b/src/Core/SortDescription.h
@@ -59,6 +59,13 @@ struct SortColumnDescription
     {
         return !(*this == other);
     }
+
+    std::string dump() const
+    {
+        std::stringstream ss;
+        ss << column_name << ":" << column_number << ":dir " << direction << "nulls " << nulls_direction;
+        return ss.str();
+    }
 };
 
 /// Description of the sorting rule for several columns.
diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
index 878ab0c4e37..8e075e5bf08 100644
--- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
+++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
@@ -1,4 +1,3 @@
-#include <DataStreams/ExpressionBlockInputStream.h>
 #include <DataStreams/CheckConstraintsBlockOutputStream.h>
 #include <Parsers/formatAST.h>
 #include <Interpreters/ExpressionActions.h>
diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h
index e39a7ccc94b..0db0e0218be 100644
--- a/src/DataStreams/RemoteQueryExecutor.h
+++ b/src/DataStreams/RemoteQueryExecutor.h
@@ -61,8 +61,8 @@ public:
     void cancel();
 
     /// Get totals and extremes if any.
-    Block getTotals() const { return totals; }
-    Block getExtremes() const { return extremes; }
+    Block getTotals() { return std::move(totals); }
+    Block getExtremes() { return std::move(extremes); }
 
     /// Set callback for progress. It will be called on Progress packet.
     void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); }
diff --git a/src/DataStreams/tests/CMakeLists.txt b/src/DataStreams/tests/CMakeLists.txt
index 95ef717c008..14db417b71c 100644
--- a/src/DataStreams/tests/CMakeLists.txt
+++ b/src/DataStreams/tests/CMakeLists.txt
@@ -1,13 +1,4 @@
 set(SRCS)
 
-add_executable (expression_stream expression_stream.cpp ${SRCS})
-target_link_libraries (expression_stream PRIVATE dbms clickhouse_storages_system clickhouse_parsers)
-
-add_executable (filter_stream filter_stream.cpp ${SRCS})
-target_link_libraries (filter_stream PRIVATE dbms clickhouse_storages_system clickhouse_parsers clickhouse_common_io)
-
-add_executable (union_stream2 union_stream2.cpp ${SRCS})
-target_link_libraries (union_stream2 PRIVATE dbms)
-
 add_executable (finish_sorting_stream finish_sorting_stream.cpp ${SRCS})
 target_link_libraries (finish_sorting_stream PRIVATE dbms)
diff --git a/src/DataStreams/tests/expression_stream.cpp b/src/DataStreams/tests/expression_stream.cpp
deleted file mode 100644
index 84b35cc2d3d..00000000000
--- a/src/DataStreams/tests/expression_stream.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/ReadHelpers.h>
-
-#include <Storages/System/StorageSystemNumbers.h>
-
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/ExpressionBlockInputStream.h>
-#include <Formats/FormatFactory.h>
-#include <DataStreams/copyData.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Parsers/ParserSelectQuery.h>
-#include <Parsers/parseQuery.h>
-
-#include <Interpreters/SyntaxAnalyzer.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/Context.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-
-int main(int argc, char ** argv)
-try
-{
-    using namespace DB;
-
-    size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL;
-
-    std::string input = "SELECT number, number / 3, number * number";
-
-    ParserSelectQuery parser;
-    ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    NamesAndTypesList source_columns = {{"number", std::make_shared<DataTypeUInt64>()}};
-    auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns);
-    SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context);
-    ExpressionActionsChain chain(context);
-    analyzer.appendSelect(chain, false);
-    analyzer.appendProjectResult(chain);
-    chain.finalize();
-    ExpressionActionsPtr expression = chain.getLastActions();
-
-    StoragePtr table = StorageSystemNumbers::create(StorageID("test", "numbers"), false);
-
-    Names column_names;
-    column_names.push_back("number");
-
-    QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-
-    BlockInputStreamPtr in;
-    in = std::make_shared<TreeExecutorBlockInputStream>(std::move(table->read(column_names, {}, context, stage, 8192, 1)[0]));
-    in = std::make_shared<ExpressionBlockInputStream>(in, expression);
-    in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));
-
-    WriteBufferFromOStream out1(std::cout);
-    BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out1, expression->getSampleBlock(), context);
-
-    {
-        Stopwatch stopwatch;
-        stopwatch.start();
-
-        copyData(*in, *out);
-
-        stopwatch.stop();
-        std::cout << std::fixed << std::setprecision(2)
-            << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-            << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-            << std::endl;
-    }
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    throw;
-}
-
diff --git a/src/DataStreams/tests/filter_stream.cpp b/src/DataStreams/tests/filter_stream.cpp
deleted file mode 100644
index 8c481e1f258..00000000000
--- a/src/DataStreams/tests/filter_stream.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/ReadHelpers.h>
-
-#include <Storages/System/StorageSystemNumbers.h>
-
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/ExpressionBlockInputStream.h>
-#include <DataStreams/FilterBlockInputStream.h>
-#include <Formats/FormatFactory.h>
-#include <DataStreams/copyData.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Parsers/ParserSelectQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-
-#include <Interpreters/SyntaxAnalyzer.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/Context.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-
-int main(int argc, char ** argv)
-try
-{
-    using namespace DB;
-
-    size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL;
-
-    std::string input = "SELECT number, number % 3 == 1";
-
-    ParserSelectQuery parser;
-    ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-    formatAST(*ast, std::cerr);
-    std::cerr << std::endl;
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    NamesAndTypesList source_columns = {{"number", std::make_shared<DataTypeUInt64>()}};
-    auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns);
-    SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context);
-    ExpressionActionsChain chain(context);
-    analyzer.appendSelect(chain, false);
-    analyzer.appendProjectResult(chain);
-    chain.finalize();
-    ExpressionActionsPtr expression = chain.getLastActions();
-
-    StoragePtr table = StorageSystemNumbers::create(StorageID("test", "numbers"), false);
-
-    Names column_names;
-    column_names.push_back("number");
-
-    QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-
-    BlockInputStreamPtr in = std::make_shared<TreeExecutorBlockInputStream>(std::move(table->read(column_names, {}, context, stage, 8192, 1)[0]));
-    in = std::make_shared<FilterBlockInputStream>(in, expression, "equals(modulo(number, 3), 1)");
-    in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));
-
-    WriteBufferFromOStream ob(std::cout);
-    BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", ob, expression->getSampleBlock(), context);
-
-    {
-        Stopwatch stopwatch;
-        stopwatch.start();
-
-        copyData(*in, *out);
-
-        stopwatch.stop();
-        std::cout << std::fixed << std::setprecision(2)
-            << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-            << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-            << std::endl;
-    }
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    throw;
-}
diff --git a/src/DataStreams/tests/union_stream2.cpp b/src/DataStreams/tests/union_stream2.cpp
deleted file mode 100644
index 5b84d89a435..00000000000
--- a/src/DataStreams/tests/union_stream2.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-
-#include <Storages/System/StorageSystemNumbers.h>
-
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/UnionBlockInputStream.h>
-#include <DataStreams/AsynchronousBlockInputStream.h>
-#include <DataStreams/IBlockOutputStream.h>
-#include <DataStreams/copyData.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/loadMetadata.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-
-using namespace DB;
-
-int main(int, char **)
-try
-{
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-    Settings settings = context.getSettings();
-
-    context.setPath("./");
-
-    loadMetadata(context);
-
-    Names column_names;
-    column_names.push_back("WatchID");
-
-    StoragePtr table = DatabaseCatalog::instance().getTable({"default", "hits6"}, context);
-
-    QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-    auto pipes = table->read(column_names, {}, context, stage, settings.max_block_size, settings.max_threads);
-
-    BlockInputStreams streams(pipes.size());
-
-    for (size_t i = 0, size = streams.size(); i < size; ++i)
-        streams[i] = std::make_shared<AsynchronousBlockInputStream>(std::make_shared<TreeExecutorBlockInputStream>(std::move(pipes[i])));
-
-    BlockInputStreamPtr stream = std::make_shared<UnionBlockInputStream>(streams, nullptr, settings.max_threads);
-    stream = std::make_shared<LimitBlockInputStream>(stream, 10, 0);
-
-    WriteBufferFromFileDescriptor wb(STDERR_FILENO);
-    Block sample = table->getSampleBlock();
-    BlockOutputStreamPtr out = context.getOutputFormat("TabSeparated", wb, sample);
-
-    copyData(*stream, *out);
-
-    return 0;
-}
-catch (const Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl
-        << std::endl
-        << "Stack trace:" << std::endl
-        << e.getStackTraceString();
-    return 1;
-}
diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp
index 3fb380eac0f..59811b1cd55 100644
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -14,6 +14,8 @@
 #include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
 
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Parsers/ASTFunction.h>
@@ -36,25 +38,25 @@ namespace ErrorCodes
 
 std::string DataTypeAggregateFunction::doGetName() const
 {
-    std::stringstream stream;
+    WriteBufferFromOwnString stream;
     stream << "AggregateFunction(" << function->getName();
 
     if (!parameters.empty())
     {
-        stream << "(";
+        stream << '(';
         for (size_t i = 0; i < parameters.size(); ++i)
         {
             if (i)
                 stream << ", ";
             stream << applyVisitor(DB::FieldVisitorToString(), parameters[i]);
         }
-        stream << ")";
+        stream << ')';
     }
 
     for (const auto & argument_type : argument_types)
         stream << ", " << argument_type->getName();
 
-    stream << ")";
+    stream << ')';
     return stream.str();
 }
 
@@ -362,8 +364,11 @@ static DataTypePtr create(const ASTPtr & arguments)
         {
             const auto * literal = parameters[i]->as<ASTLiteral>();
             if (!literal)
-                throw Exception("Parameters to aggregate functions must be literals",
-                    ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
+                throw Exception(
+                    ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
+                    "Parameters to aggregate functions must be literals. "
+                    "Got parameter '{}' for function '{}'",
+                    parameters[i]->formatForErrorMessage(), function_name);
 
             params_row[i] = literal->value;
         }
diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
index bf22845a5f6..2ddce184cce 100644
--- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
@@ -82,8 +82,11 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
         {
             const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
             if (!lit)
-                throw Exception("Parameters to aggregate functions must be literals",
-                                ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
+                throw Exception(
+                    ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
+                    "Parameters to aggregate functions must be literals. "
+                    "Got parameter '{}' for function '{}'",
+                    parameters[i]->formatForErrorMessage(), function_name);
 
             params_row[i] = lit->value;
         }
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 1b542c7a1ff..1886d0fc555 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -294,7 +294,7 @@ void DatabaseOnDisk::renameTable(
     {
         attachTable(table_name, table, table_data_relative_path);
         /// Better diagnostics.
-        throw Exception{Exception::CreateFromPoco, e};
+        throw Exception{Exception::CreateFromPocoTag{}, e};
     }
 
     /// Now table data are moved to new database, so we must add metadata and attach table to new database
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 069a63aa9e1..6e5f984f435 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -10,28 +10,29 @@ add_library(clickhouse_functions ${clickhouse_functions_sources})
 
 target_link_libraries(clickhouse_functions
     PUBLIC
-        clickhouse_dictionaries
-        clickhouse_dictionaries_embedded
-        dbms
-        consistent-hashing
-        consistent-hashing-sumbur
+        ${BASE64_LIBRARY}
         ${CITYHASH_LIBRARIES}
         ${FARMHASH_LIBRARIES}
-        ${METROHASH_LIBRARIES}
-        murmurhash
-        ${BASE64_LIBRARY}
         ${FASTOPS_LIBRARY}
+        clickhouse_dictionaries
+        clickhouse_dictionaries_embedded
+        consistent-hashing
+        consistent-hashing-sumbur
+        dbms
+        metrohash
+        murmurhash
 
     PRIVATE
         ${ZLIB_LIBRARIES}
-        ${Boost_FILESYSTEM_LIBRARY}
+        boost::filesystem
+        libdivide
 )
 
 if (OPENSSL_CRYPTO_LIBRARY)
     target_link_libraries(clickhouse_functions PUBLIC ${OPENSSL_CRYPTO_LIBRARY})
 endif()
 
-target_include_directories(clickhouse_functions SYSTEM PRIVATE ${DIVIDE_INCLUDE_DIR} ${METROHASH_INCLUDE_DIR} ${SPARSEHASH_INCLUDE_DIR})
+target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 
 if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
     # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
@@ -60,20 +61,14 @@ if(USE_BASE64)
     target_include_directories(clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR})
 endif()
 
-if(USE_XXHASH)
-    target_link_libraries(clickhouse_functions PRIVATE ${XXHASH_LIBRARY})
-    target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR})
-endif()
+target_link_libraries(clickhouse_functions PRIVATE lz4)
 
 if (USE_H3)
     target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY})
     target_include_directories(clickhouse_functions SYSTEM PRIVATE ${H3_INCLUDE_DIR})
 endif()
 
-if(USE_HYPERSCAN)
-    target_link_libraries(clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY})
-    target_include_directories(clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
-endif()
+target_link_libraries(clickhouse_functions PRIVATE hyperscan)
 
 if(USE_SIMDJSON)
     target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
diff --git a/src/Functions/GeoHash.cpp b/src/Functions/GeoHash.cpp
index c6ac9939070..e59cf160ce1 100644
--- a/src/Functions/GeoHash.cpp
+++ b/src/Functions/GeoHash.cpp
@@ -260,10 +260,10 @@ void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * lo
     *latitude = decodeCoordinate(lat_encoded, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE));
 }
 
-GeohashesInBoxPreparedArgs geohashesInBoxPrepare(const Float64 longitude_min,
-                                              const Float64 latitude_min,
-                                              const Float64 longitude_max,
-                                              const Float64 latitude_max,
+GeohashesInBoxPreparedArgs geohashesInBoxPrepare(Float64 longitude_min,
+                                              Float64 latitude_min,
+                                              Float64 longitude_max,
+                                              Float64 latitude_max,
                                               uint8_t precision)
 {
     precision = geohashPrecision(precision);
@@ -273,6 +273,11 @@ GeohashesInBoxPreparedArgs geohashesInBoxPrepare(const Float64 longitude_min,
         return {};
     }
 
+    longitude_min = std::max(longitude_min, LON_MIN);
+    longitude_max = std::min(longitude_max, LON_MAX);
+    latitude_min = std::max(latitude_min, LAT_MIN);
+    latitude_max = std::min(latitude_max, LAT_MAX);
+
     const auto lon_step = getSpan(precision, LONGITUDE);
     const auto lat_step = getSpan(precision, LATITUDE);
 
diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h
index 25c2da9915c..6e1b03a47bd 100644
--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@@ -358,6 +358,9 @@ bool PointInPolygonWithGrid<CoordinateType>::contains(CoordinateType x, Coordina
     if (has_empty_bound)
         return false;
 
+    if (std::isnan(x) || std::isnan(y))
+        return false;
+
     CoordinateType float_row = (y + y_shift) * y_scale;
     CoordinateType float_col = (x + x_shift) * x_scale;
 
diff --git a/src/Functions/URL/CMakeLists.txt b/src/Functions/URL/CMakeLists.txt
index fabfccae230..21f0adb6594 100644
--- a/src/Functions/URL/CMakeLists.txt
+++ b/src/Functions/URL/CMakeLists.txt
@@ -9,10 +9,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW
 endif ()
 
 # TODO: move Functions/Regexps.h to some lib and use here
-if(USE_HYPERSCAN)
-    target_link_libraries(clickhouse_functions_url PRIVATE ${HYPERSCAN_LIBRARY})
-    target_include_directories(clickhouse_functions_url SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
-endif()
+target_link_libraries(clickhouse_functions_url PRIVATE hyperscan)
 
 if (USE_GPERF)
     # Only for regenerate
diff --git a/src/Functions/URL/FunctionsURL.h b/src/Functions/URL/FunctionsURL.h
index fa5e9246488..297b62ca256 100644
--- a/src/Functions/URL/FunctionsURL.h
+++ b/src/Functions/URL/FunctionsURL.h
@@ -21,6 +21,7 @@ namespace DB
   *  queryString
   *  fragment
   *  queryStringAndFragment
+  *  netloc
   *
   * Functions, removing parts from URL.
   * If URL has nothing like, then it is returned unchanged.
diff --git a/src/Functions/URL/netloc.cpp b/src/Functions/URL/netloc.cpp
new file mode 100644
index 00000000000..d8858c3364a
--- /dev/null
+++ b/src/Functions/URL/netloc.cpp
@@ -0,0 +1,17 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+#include "netloc.h"
+
+namespace DB
+{
+
+struct NameNetloc { static constexpr auto name = "netloc"; };
+using FunctionNetloc = FunctionStringToString<ExtractSubstringImpl<ExtractNetloc>, NameNetloc>;
+
+void registerFunctionNetloc(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionNetloc>();
+}
+
+}
+
diff --git a/src/Functions/URL/netloc.h b/src/Functions/URL/netloc.h
new file mode 100644
index 00000000000..443ef7f9003
--- /dev/null
+++ b/src/Functions/URL/netloc.h
@@ -0,0 +1,129 @@
+#pragma once
+
+#include "FunctionsURL.h"
+#include <common/find_symbols.h>
+#include "protocol.h"
+#include <cstring>
+#include <Common/StringUtils/StringUtils.h>
+
+
+namespace DB
+{
+
+struct ExtractNetloc
+{
+    /// We use the same as domain function
+    static size_t getReserveLengthForElement() { return 15; }
+
+    static inline StringRef getNetworkLocation(const char * data, size_t size)
+    {
+        Pos pos = data;
+        Pos end = data + size;
+
+        if (*pos == '/' && *(pos + 1) == '/')
+        {
+            pos += 2;
+        }
+        else
+        {
+            Pos scheme_end = data + std::min(size, 16UL);
+            for (++pos; pos < scheme_end; ++pos)
+            {
+                if (!isAlphaNumericASCII(*pos))
+                {
+                    switch (*pos)
+                    {
+                        case '.':
+                        case '-':
+                        case '+':
+                            break;
+                        case ' ': /// restricted symbols
+                        case '\t':
+                        case '<':
+                        case '>':
+                        case '%':
+                        case '{':
+                        case '}':
+                        case '|':
+                        case '\\':
+                        case '^':
+                        case '~':
+                        case '[':
+                        case ']':
+                        case ';':
+                        case '=':
+                        case '&':
+                            return StringRef{};
+                        default:
+                            goto exloop;
+                    }
+                }
+            }
+exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/')
+            pos += 3;
+        else
+            pos = data;
+        }
+
+        bool has_identification = false;
+        Pos question_mark_pos = end;
+        Pos slash_pos = end;
+        auto start_of_host = pos;
+        for (; pos < end; ++pos)
+        {
+            switch (*pos)
+            {
+                case '/':
+                    if (has_identification)
+                        return StringRef(start_of_host, pos - start_of_host);
+                    else
+                        slash_pos = pos;
+                    break;
+                case '?':
+                    if (has_identification)
+                        return StringRef(start_of_host, pos - start_of_host);
+                    else
+                        question_mark_pos = pos;
+                    break;
+                case '#':
+                    return StringRef(start_of_host, pos - start_of_host);
+                case '@': /// foo:bar@example.ru
+                    has_identification = true;
+                    break;
+                case ' ': /// restricted symbols in whole URL
+                case '\t':
+                case '<':
+                case '>':
+                case '%':
+                case '{':
+                case '}':
+                case '|':
+                case '\\':
+                case '^':
+                case '~':
+                case '[':
+                case ']':
+                case ';':
+                case '=':
+                case '&':
+                    return StringRef(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host);
+            }
+        }
+
+        if (has_identification)
+            return StringRef(start_of_host, pos - start_of_host);
+        else
+            return StringRef(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host);
+    }
+
+    static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
+    {
+        StringRef host = getNetworkLocation(data, size);
+
+        res_data = host.data;
+        res_size = host.size;
+    }
+};
+
+}
+
diff --git a/src/Functions/URL/registerFunctionsURL.cpp b/src/Functions/URL/registerFunctionsURL.cpp
index 9ba5261f728..f3906c2723e 100644
--- a/src/Functions/URL/registerFunctionsURL.cpp
+++ b/src/Functions/URL/registerFunctionsURL.cpp
@@ -26,6 +26,7 @@ void registerFunctionCutFragment(FunctionFactory & factory);
 void registerFunctionCutQueryStringAndFragment(FunctionFactory & factory);
 void registerFunctionCutURLParameter(FunctionFactory & factory);
 void registerFunctionDecodeURLComponent(FunctionFactory & factory);
+void registerFunctionNetloc(FunctionFactory & factory);
 
 void registerFunctionsURL(FunctionFactory & factory)
 {
@@ -52,6 +53,7 @@ void registerFunctionsURL(FunctionFactory & factory)
     registerFunctionCutQueryStringAndFragment(factory);
     registerFunctionCutURLParameter(factory);
     registerFunctionDecodeURLComponent(factory);
+    registerFunctionNetloc(factory);
 }
 
 }
diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp
index 2c55e39506c..561caf316b2 100644
--- a/src/Functions/bitBoolMaskAnd.cpp
+++ b/src/Functions/bitBoolMaskAnd.cpp
@@ -7,7 +7,7 @@ namespace DB
 {
     namespace ErrorCodes
     {
-        extern const int BAD_CAST;
+        extern const int BAD_ARGUMENTS;
     }
 
     /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@@ -23,8 +23,10 @@ namespace DB
         template <typename Result = ResultType>
         static inline Result apply(A left, B right)
         {
+            // Should be a logical error, but this function is callable from SQL.
+            // Need to investigate this.
             if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd.", ErrorCodes::BAD_CAST);
+                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd.", ErrorCodes::BAD_ARGUMENTS);
             return static_cast<ResultType>(
                     ((static_cast<ResultType>(left) & static_cast<ResultType>(right)) & 1)
                     | ((((static_cast<ResultType>(left) >> 1) | (static_cast<ResultType>(right) >> 1)) & 1) << 1));
diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp
index 0b439165fca..a23be509f1a 100644
--- a/src/Functions/bitBoolMaskOr.cpp
+++ b/src/Functions/bitBoolMaskOr.cpp
@@ -7,7 +7,7 @@ namespace DB
 {
     namespace ErrorCodes
     {
-        extern const int BAD_CAST;
+        extern const int BAD_ARGUMENTS;
     }
 
     /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@@ -24,7 +24,9 @@ namespace DB
         static inline Result apply(A left, B right)
         {
             if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskOr.", ErrorCodes::BAD_CAST);
+                // Should be a logical error, but this function is callable from SQL.
+                // Need to investigate this.
+                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskOr.", ErrorCodes::BAD_ARGUMENTS);
             return static_cast<ResultType>(
                     ((static_cast<ResultType>(left) | static_cast<ResultType>(right)) & 1)
                     | ((((static_cast<ResultType>(left) >> 1) & (static_cast<ResultType>(right) >> 1)) & 1) << 1));
diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp
index d6fa9a39ec3..9d942494258 100644
--- a/src/Functions/bitSwapLastTwo.cpp
+++ b/src/Functions/bitSwapLastTwo.cpp
@@ -7,7 +7,7 @@ namespace DB
     namespace ErrorCodes
     {
         extern const int LOGICAL_ERROR;
-        extern const int BAD_CAST;
+        extern const int BAD_ARGUMENTS;
     }
 
     /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@@ -21,7 +21,9 @@ namespace DB
         static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
         {
             if constexpr (!std::is_same_v<A, ResultType>)
-                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitSwapLastTwo.", ErrorCodes::BAD_CAST);
+                // Should be a logical error, but this function is callable from SQL.
+                // Need to investigate this.
+                throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitSwapLastTwo.", ErrorCodes::BAD_ARGUMENTS);
             return static_cast<ResultType>(
                     ((static_cast<ResultType>(a) & 1) << 1) | ((static_cast<ResultType>(a) >> 1) & 1));
         }
diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp
index 9f7276fbf98..d2d4b45781b 100644
--- a/src/Functions/bitWrapperFunc.cpp
+++ b/src/Functions/bitWrapperFunc.cpp
@@ -6,7 +6,7 @@ namespace DB
 {
     namespace ErrorCodes
     {
-        extern const int BAD_CAST;
+        extern const int BAD_ARGUMENTS;
     }
 
     /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
@@ -20,8 +20,10 @@ namespace DB
 
         static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
         {
+            // Should be a logical error, but this function is callable from SQL.
+            // Need to investigate this.
             if constexpr (!is_integral_v<A>)
-                throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::BAD_CAST);
+                throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::BAD_ARGUMENTS);
             return a == 0 ? static_cast<ResultType>(0b10) : static_cast<ResultType >(0b1);
         }
 
diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in
index 46664caaa3f..eb96c13c355 100644
--- a/src/Functions/config_functions.h.in
+++ b/src/Functions/config_functions.h.in
@@ -3,8 +3,6 @@
 // .h autogenerated by cmake!
 
 #cmakedefine01 USE_BASE64
-#cmakedefine01 USE_XXHASH
-#cmakedefine01 USE_HYPERSCAN
 #cmakedefine01 USE_SIMDJSON
 #cmakedefine01 USE_RAPIDJSON
 #cmakedefine01 USE_H3
diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp
index 289e94b1c45..6bf0e5a82cd 100644
--- a/src/Functions/geohashesInBox.cpp
+++ b/src/Functions/geohashesInBox.cpp
@@ -120,7 +120,7 @@ public:
             // Actually write geohashes into preallocated buffer.
             geohashesInBox(prepared_args, out);
 
-            for (UInt8 i = 1; i <= prepared_args.items_count ; ++i)
+            for (UInt64 i = 1; i <= prepared_args.items_count ; ++i)
             {
                 res_strings_offsets.push_back(starting_offset + (prepared_args.precision + 1) * i);
             }
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 31491dec02c..dd1c0075ceb 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -6,7 +6,7 @@ CFLAGS(
 )
 
 ADDINCL(
-    library/consistent_hashing
+    library/cpp/consistent_hashing
     contrib/libs/farmhash
     contrib/libs/hyperscan/src
     contrib/libs/icu/common
@@ -26,7 +26,7 @@ PEERDIR(
     contrib/libs/metrohash
     contrib/libs/rapidjson
     contrib/libs/xxhash
-    library/consistent_hashing
+    library/cpp/consistent_hashing
 )
 
 # "Arcadia" build is slightly deficient. It lacks many libraries that we need.
@@ -423,6 +423,7 @@ SRCS(
     URL/extractURLParameters.cpp
     URL/firstSignificantSubdomain.cpp
     URL/fragment.cpp
+    URL/netloc.cpp
     URL/path.cpp
     URL/pathFull.cpp
     URL/port.cpp
diff --git a/src/Functions/ya.make.in b/src/Functions/ya.make.in
index 42ae1c6dde8..2f01b20ca5f 100644
--- a/src/Functions/ya.make.in
+++ b/src/Functions/ya.make.in
@@ -5,7 +5,7 @@ CFLAGS(
 )
 
 ADDINCL(
-    library/consistent_hashing
+    library/cpp/consistent_hashing
     contrib/libs/farmhash
     contrib/libs/hyperscan/src
     contrib/libs/icu/common
@@ -25,7 +25,7 @@ PEERDIR(
     contrib/libs/metrohash
     contrib/libs/rapidjson
     contrib/libs/xxhash
-    library/consistent_hashing
+    library/cpp/consistent_hashing
 )
 
 # "Arcadia" build is slightly deficient. It lacks many libraries that we need.
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index 1539b3c7025..2c75a137222 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -4,6 +4,7 @@
 
 #    include <IO/S3Common.h>
 #    include <IO/WriteBufferFromString.h>
+#    include <Storages/StorageS3Settings.h>
 
 #    include <aws/core/auth/AWSCredentialsProvider.h>
 #    include <aws/core/utils/logging/LogMacros.h>
@@ -60,6 +61,47 @@ public:
 private:
     Poco::Logger * log = &Poco::Logger::get("AWSClient");
 };
+
+class S3AuthSigner : public Aws::Client::AWSAuthV4Signer
+{
+public:
+    S3AuthSigner(
+        const Aws::Client::ClientConfiguration & client_configuration,
+        const Aws::Auth::AWSCredentials & credentials,
+        const DB::HeaderCollection & headers_)
+        : Aws::Client::AWSAuthV4Signer(
+            std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials),
+            "s3",
+            client_configuration.region,
+            Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
+            false)
+        , headers(headers_)
+    {
+    }
+
+    bool SignRequest(Aws::Http::HttpRequest & request, const char * region, bool sign_body) const override
+    {
+        auto result = Aws::Client::AWSAuthV4Signer::SignRequest(request, region, sign_body);
+        for (const auto & header : headers)
+            request.SetHeaderValue(header.name, header.value);
+        return result;
+    }
+
+    bool PresignRequest(
+        Aws::Http::HttpRequest & request,
+        const char * region,
+        const char * serviceName,
+        long long expiration_time_sec) const override // NOLINT
+    {
+        auto result = Aws::Client::AWSAuthV4Signer::PresignRequest(request, region, serviceName, expiration_time_sec);
+        for (const auto & header : headers)
+            request.SetHeaderValue(header.name, header.value);
+        return result;
+    }
+
+private:
+    const DB::HeaderCollection headers;
+};
 }
 
 namespace DB
@@ -139,6 +181,25 @@ namespace S3
         );
     }
 
+    std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
+        const String & endpoint,
+        bool is_virtual_hosted_style,
+        const String & access_key_id,
+        const String & secret_access_key,
+        HeaderCollection headers)
+    {
+        Aws::Client::ClientConfiguration cfg;
+        if (!endpoint.empty())
+            cfg.endpointOverride = endpoint;
+
+        Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
+        return std::make_shared<Aws::S3::S3Client>(
+            std::make_shared<S3AuthSigner>(cfg, std::move(credentials), std::move(headers)),
+            std::move(cfg), // Client configuration.
+            is_virtual_hosted_style || cfg.endpointOverride.empty() // Use virtual addressing only if endpoint is not specified.
+        );
+    }
+
     URI::URI(const Poco::URI & uri_)
     {
         /// Case when bucket name represented in domain name of S3 URL.
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 84795a4b39a..7f8cba66aad 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -5,7 +5,7 @@
 #if USE_AWS_S3
 
 #include <Core/Types.h>
-#include <Poco/URI.h>
+#include <Interpreters/Context.h>
 #include <aws/core/Aws.h>
 
 namespace Aws::S3
@@ -13,6 +13,12 @@ namespace Aws::S3
     class S3Client;
 }
 
+namespace DB
+{
+    struct HttpHeader;
+    using HeaderCollection = std::vector<HttpHeader>;
+}
+
 namespace DB::S3
 {
 
@@ -34,6 +40,14 @@ public:
         bool is_virtual_hosted_style,
         const String & access_key_id,
         const String & secret_access_key);
+
+    std::shared_ptr<Aws::S3::S3Client> create(
+        const String & endpoint,
+        bool is_virtual_hosted_style,
+        const String & access_key_id,
+        const String & secret_access_key,
+        HeaderCollection headers);
+
 private:
     ClientFactory();
 
diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp
index 814caeaffd3..aec6c779394 100644
--- a/src/IO/tests/gtest_s3_uri.cpp
+++ b/src/IO/tests/gtest_s3_uri.cpp
@@ -20,18 +20,42 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ("https://s3.yandexcloud.net", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
         S3::URI uri(Poco::URI("https://storage.yandexcloud.net/jokserfn/data"));
         ASSERT_EQ("https://storage.yandexcloud.net", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(false, uri.is_virtual_hosted_style);
+    }
+    {
+        S3::URI uri(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data"));
+        ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint);
+        ASSERT_EQ("bucketname", uri.bucket);
+        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
         S3::URI uri(Poco::URI("https://s3.us-east-2.amazonaws.com/bucketname/data"));
         ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucketname", uri.bucket);
         ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(false, uri.is_virtual_hosted_style);
+    }
+    {
+        S3::URI uri(Poco::URI("https://bucketname.s3-us-east-2.amazonaws.com/data"));
+        ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint);
+        ASSERT_EQ("bucketname", uri.bucket);
+        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(true, uri.is_virtual_hosted_style);
+    }
+    {
+        S3::URI uri(Poco::URI("https://s3-us-east-2.amazonaws.com/bucketname/data"));
+        ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint);
+        ASSERT_EQ("bucketname", uri.bucket);
+        ASSERT_EQ("data", uri.key);
+        ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
 }
 
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index b5d9f30573e..512319375d5 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -512,7 +512,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
                 if (data.only_consts)
                     arguments_present = false;
                 else
-                    throw Exception("Unknown identifier: " + child_column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
+                    throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.getSampleBlock().dumpNames(),
+                                    ErrorCodes::UNKNOWN_IDENTIFIER);
             }
         }
     }
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 96a9b1fc1df..538a24fa997 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -530,63 +530,33 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl(
 }
 
 
-bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & result,
-    ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl(
+        AggregatedDataWithoutKey & res,
+        size_t row_begin,
+        size_t row_end,
+        AggregateFunctionInstruction * aggregate_instructions,
+        Arena * arena)
 {
-    UInt64 num_rows = block.rows();
-    return executeOnBlock(block.getColumns(), num_rows, result, key_columns, aggregate_columns, no_more_keys);
+    /// Adding values
+    for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
+    {
+        if (inst->offsets)
+            inst->batch_that->addBatchSinglePlaceFromInterval(inst->offsets[row_begin], inst->offsets[row_end - 1], res + inst->state_offset, inst->batch_arguments, arena);
+        else
+            inst->batch_that->addBatchSinglePlaceFromInterval(row_begin, row_end, res + inst->state_offset, inst->batch_arguments, arena);
+    }
 }
 
-bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result,
-    ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+
+void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns & aggregate_columns, Columns & materialized_columns,
+                                              AggregateFunctionInstructions & aggregate_functions_instructions, NestedColumnsHolder & nested_columns_holder)
 {
-    if (isCancelled())
-        return true;
-
-    /// `result` will destroy the states of aggregate functions in the destructor
-    result.aggregator = this;
-
-    /// How to perform the aggregation?
-    if (result.empty())
-    {
-        result.init(method_chosen);
-        result.keys_size = params.keys_size;
-        result.key_sizes = key_sizes;
-        LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
-    }
-
-    if (isCancelled())
-        return true;
-
     for (size_t i = 0; i < params.aggregates_size; ++i)
         aggregate_columns[i].resize(params.aggregates[i].arguments.size());
 
-    /** Constant columns are not supported directly during aggregation.
-      * To make them work anyway, we materialize them.
-      */
-    Columns materialized_columns;
-
-    /// Remember the columns we will work with
-    for (size_t i = 0; i < params.keys_size; ++i)
-    {
-        materialized_columns.push_back(columns.at(params.keys[i])->convertToFullColumnIfConst());
-        key_columns[i] = materialized_columns.back().get();
-
-        if (!result.isLowCardinality())
-        {
-            auto column_no_lc = recursiveRemoveLowCardinality(key_columns[i]->getPtr());
-            if (column_no_lc.get() != key_columns[i])
-            {
-                materialized_columns.emplace_back(std::move(column_no_lc));
-                key_columns[i] = materialized_columns.back().get();
-            }
-        }
-    }
-
-    AggregateFunctionInstructions aggregate_functions_instructions(params.aggregates_size + 1);
+    aggregate_functions_instructions.resize(params.aggregates_size + 1);
     aggregate_functions_instructions[params.aggregates_size].that = nullptr;
 
-    std::vector<std::vector<const IColumn *>> nested_columns_holder;
     for (size_t i = 0; i < params.aggregates_size; ++i)
     {
         for (size_t j = 0; j < aggregate_columns[i].size(); ++j)
@@ -627,6 +597,62 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
 
         aggregate_functions_instructions[i].batch_that = that;
     }
+}
+
+
+bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & result,
+                                ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+{
+    UInt64 num_rows = block.rows();
+    return executeOnBlock(block.getColumns(), num_rows, result, key_columns, aggregate_columns, no_more_keys);
+}
+
+
+bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result,
+    ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys)
+{
+    if (isCancelled())
+        return true;
+
+    /// `result` will destroy the states of aggregate functions in the destructor
+    result.aggregator = this;
+
+    /// How to perform the aggregation?
+    if (result.empty())
+    {
+        result.init(method_chosen);
+        result.keys_size = params.keys_size;
+        result.key_sizes = key_sizes;
+        LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
+    }
+
+    if (isCancelled())
+        return true;
+
+    /** Constant columns are not supported directly during aggregation.
+      * To make them work anyway, we materialize them.
+      */
+    Columns materialized_columns;
+
+    /// Remember the columns we will work with
+    for (size_t i = 0; i < params.keys_size; ++i)
+    {
+        materialized_columns.push_back(columns.at(params.keys[i])->convertToFullColumnIfConst());
+        key_columns[i] = materialized_columns.back().get();
+
+        if (!result.isLowCardinality())
+        {
+            auto column_no_lc = recursiveRemoveLowCardinality(key_columns[i]->getPtr());
+            if (column_no_lc.get() != key_columns[i])
+            {
+                materialized_columns.emplace_back(std::move(column_no_lc));
+                key_columns[i] = materialized_columns.back().get();
+            }
+        }
+    }
+    NestedColumnsHolder nested_columns_holder;
+    AggregateFunctionInstructions aggregate_functions_instructions;
+    prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_functions_instructions, nested_columns_holder);
 
     if (isCancelled())
         return true;
@@ -666,7 +692,8 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
         if (auto * memory_tracker = memory_tracker_child->getParent())
             current_memory_usage = memory_tracker->get();
 
-    auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;    /// Here all the results in the sum are taken into account, from different threads.
+    /// Here all the results in the sum are taken into account, from different threads.
+    auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
 
     bool worth_convert_to_two_level
         = (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
@@ -972,6 +999,73 @@ void Aggregator::convertToBlockImpl(
     data.clearAndShrink();
 }
 
+
+template <typename Mapped>
+inline void Aggregator::insertAggregatesIntoColumns(
+    Mapped & mapped,
+    MutableColumns & final_aggregate_columns) const
+{
+    /** Final values of aggregate functions are inserted to columns.
+      * Then states of aggregate functions, that are not longer needed, are destroyed.
+      *
+      * We mark already destroyed states with "nullptr" in data,
+      *  so they will not be destroyed in destructor of Aggregator
+      * (other values will be destroyed in destructor in case of exception).
+      *
+      * But it becomes tricky, because we have multiple aggregate states pointed by a single pointer in data.
+      * So, if exception is thrown in the middle of moving states for different aggregate functions,
+      *  we have to catch exceptions and destroy all the states that are no longer needed,
+      *  to keep the data in consistent state.
+      *
+      * It is also tricky, because there are aggregate functions with "-State" modifier.
+      * When we call "insertResultInto" for them, they insert a pointer to the state to ColumnAggregateFunction
+      *  and ColumnAggregateFunction will take ownership of this state.
+      * So, for aggregate functions with "-State" modifier, the state must not be destroyed
+      *  after it has been transferred to ColumnAggregateFunction.
+      * But we should mark that the data no longer owns these states.
+      */
+
+    size_t insert_i = 0;
+    std::exception_ptr exception;
+
+    try
+    {
+        /// Insert final values of aggregate functions into columns.
+        for (; insert_i < params.aggregates_size; ++insert_i)
+            aggregate_functions[insert_i]->insertResultInto(
+                mapped + offsets_of_aggregate_states[insert_i],
+                *final_aggregate_columns[insert_i]);
+    }
+    catch (...)
+    {
+        exception = std::current_exception();
+    }
+
+    /** Destroy states that are no longer needed. This loop does not throw.
+        *
+        * Don't destroy states for "-State" aggregate functions,
+        *  because the ownership of this state is transferred to ColumnAggregateFunction
+        *  and ColumnAggregateFunction will take care.
+        *
+        * But it's only for states that has been transferred to ColumnAggregateFunction
+        *  before exception has been thrown;
+        */
+    for (size_t destroy_i = 0; destroy_i < params.aggregates_size; ++destroy_i)
+    {
+        /// If ownership was not transferred to ColumnAggregateFunction.
+        if (!(destroy_i < insert_i && aggregate_functions[destroy_i]->isState()))
+            aggregate_functions[destroy_i]->destroy(
+                mapped + offsets_of_aggregate_states[destroy_i]);
+    }
+
+    /// Mark the cell as destroyed so it will not be destroyed in destructor.
+    mapped = nullptr;
+
+    if (exception)
+        std::rethrow_exception(exception);
+}
+
+
 template <typename Method, typename Table>
 void NO_INLINE Aggregator::convertToBlockImplFinal(
     Method & method,
@@ -984,25 +1078,15 @@ void NO_INLINE Aggregator::convertToBlockImplFinal(
         if (data.hasNullKeyData())
         {
             key_columns[0]->insertDefault();
-
-            for (size_t i = 0; i < params.aggregates_size; ++i)
-                aggregate_functions[i]->insertResultInto(
-                    data.getNullKeyData() + offsets_of_aggregate_states[i],
-                    *final_aggregate_columns[i]);
+            insertAggregatesIntoColumns(data.getNullKeyData(), final_aggregate_columns);
         }
     }
 
     data.forEachValue([&](const auto & key, auto & mapped)
     {
         method.insertKeyIntoColumns(key, key_columns, key_sizes);
-
-        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_functions[i]->insertResultInto(
-                mapped + offsets_of_aggregate_states[i],
-                *final_aggregate_columns[i]);
+        insertAggregatesIntoColumns(mapped, final_aggregate_columns);
     });
-
-    destroyImpl<Method>(data);
 }
 
 template <typename Method, typename Table>
@@ -1020,6 +1104,8 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal(
 
             for (size_t i = 0; i < params.aggregates_size; ++i)
                 aggregate_columns[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]);
+
+            data.getNullKeyData() = nullptr;
         }
     }
 
@@ -1112,7 +1198,39 @@ Block Aggregator::prepareBlockAndFill(
 
     return res;
 }
+void Aggregator::fillAggregateColumnsWithSingleKey(
+    AggregatedDataVariants & data_variants,
+    MutableColumns & final_aggregate_columns)
+{
+    AggregatedDataWithoutKey & data = data_variants.without_key;
 
+    for (size_t i = 0; i < params.aggregates_size; ++i)
+    {
+        ColumnAggregateFunction & column_aggregate_func = assert_cast<ColumnAggregateFunction &>(*final_aggregate_columns[i]);
+        for (auto & pool : data_variants.aggregates_pools)
+        {
+            column_aggregate_func.addArena(pool);
+        }
+        column_aggregate_func.getData().push_back(data + offsets_of_aggregate_states[i]);
+    }
+    data = nullptr;
+}
+
+void Aggregator::createStatesAndFillKeyColumnsWithSingleKey(
+    AggregatedDataVariants & data_variants,
+    Columns & key_columns,
+    size_t key_row,
+    MutableColumns & final_key_columns)
+{
+    AggregateDataPtr place = data_variants.aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+    createAggregateStates(place);
+    data_variants.without_key = place;
+
+    for (size_t i = 0; i < params.keys_size; ++i)
+    {
+        final_key_columns[i]->insertFrom(*key_columns[i].get(), key_row);
+    }
+}
 
 Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const
 {
@@ -1128,16 +1246,16 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va
         {
             AggregatedDataWithoutKey & data = data_variants.without_key;
 
-            for (size_t i = 0; i < params.aggregates_size; ++i)
-            {
-                if (!final_)
-                    aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]);
-                else
-                    aggregate_functions[i]->insertResultInto(data + offsets_of_aggregate_states[i], *final_aggregate_columns[i]);
-            }
-
             if (!final_)
+            {
+                for (size_t i = 0; i < params.aggregates_size; ++i)
+                    aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]);
                 data = nullptr;
+            }
+            else
+            {
+                insertAggregatesIntoColumns(data, final_aggregate_columns);
+            }
 
             if (params.overflow_row)
                 for (size_t i = 0; i < params.keys_size; ++i)
@@ -2328,8 +2446,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const
             return;
 
         for (size_t i = 0; i < params.aggregates_size; ++i)
-            if (!aggregate_functions[i]->isState())
-                aggregate_functions[i]->destroy(data + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(data + offsets_of_aggregate_states[i]);
 
         data = nullptr;
     });
@@ -2343,8 +2460,7 @@ void Aggregator::destroyWithoutKey(AggregatedDataVariants & result) const
     if (nullptr != res_data)
     {
         for (size_t i = 0; i < params.aggregates_size; ++i)
-            if (!aggregate_functions[i]->isState())
-                aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]);
 
         res_data = nullptr;
     }
diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h
index 45addb976f1..6d0eeee9014 100644
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@@ -1002,6 +1002,7 @@ protected:
     friend class MergingAndConvertingBlockInputStream;
     friend class ConvertingAggregatedToChunksTransform;
     friend class ConvertingAggregatedToChunksSource;
+    friend class AggregatingInOrderTransform;
 
     Params params;
 
@@ -1033,12 +1034,13 @@ protected:
     };
 
     using AggregateFunctionInstructions = std::vector<AggregateFunctionInstruction>;
+    using NestedColumnsHolder = std::vector<std::vector<const IColumn *>>;
 
     Sizes offsets_of_aggregate_states;    /// The offset to the n-th aggregate function in a row of aggregate functions.
     size_t total_size_of_aggregate_states = 0;    /// The total size of the row from the aggregate functions.
 
     // add info to track alignment requirement
-    // If there are states whose alignmentment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
+    // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
     size_t align_aggregate_states = 1;
 
     bool all_aggregates_has_trivial_destructor = false;
@@ -1105,6 +1107,13 @@ protected:
         AggregateFunctionInstruction * aggregate_instructions,
         Arena * arena);
 
+    static void executeOnIntervalWithoutKeyImpl(
+        AggregatedDataWithoutKey & res,
+        size_t row_begin,
+        size_t row_end,
+        AggregateFunctionInstruction * aggregate_instructions,
+        Arena * arena);
+
     template <typename Method>
     void writeToTemporaryFileImpl(
         AggregatedDataVariants & data_variants,
@@ -1157,6 +1166,11 @@ protected:
         MutableColumns & final_aggregate_columns,
         bool final) const;
 
+    template <typename Mapped>
+    void insertAggregatesIntoColumns(
+        Mapped & mapped,
+        MutableColumns & final_aggregate_columns) const;
+
     template <typename Method, typename Table>
     void convertToBlockImplFinal(
         Method & method,
@@ -1250,6 +1264,22 @@ protected:
       * - sets the variable no_more_keys to true.
       */
     bool checkLimits(size_t result_size, bool & no_more_keys) const;
+
+    void prepareAggregateInstructions(
+        Columns columns,
+        AggregateColumns & aggregate_columns,
+        Columns & materialized_columns,
+        AggregateFunctionInstructions & instructions,
+        NestedColumnsHolder & nested_columns_holder);
+
+    void fillAggregateColumnsWithSingleKey(
+        AggregatedDataVariants & data_variants,
+        MutableColumns & final_aggregate_columns);
+
+    void createStatesAndFillKeyColumnsWithSingleKey(
+        AggregatedDataVariants & data_variants,
+        Columns & key_columns, size_t key_row,
+        MutableColumns & final_key_columns);
 };
 
 
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 5d41b0e87ce..bfa6fae0977 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -1,8 +1,6 @@
 #include <Interpreters/ClusterProxy/SelectStreamFactory.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <DataStreams/RemoteBlockInputStream.h>
-#include <DataStreams/MaterializingBlockInputStream.h>
-#include <DataStreams/LazyBlockInputStream.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Common/Exception.h>
@@ -13,9 +11,8 @@
 #include <common/logger_useful.h>
 #include <Processors/Pipe.h>
 #include <Processors/Transforms/ConvertingTransform.h>
-#include <Processors/Sources/SourceFromInputStream.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-#include <Processors/Executors/PipelineExecutingBlockInputStream.h>
+#include <Processors/Sources/RemoteSource.h>
+#include <Processors/Sources/DelayedSource.h>
 
 namespace ProfileEvents
 {
@@ -118,13 +115,13 @@ void SelectStreamFactory::createForShard(
     const SelectQueryInfo &,
     Pipes & res)
 {
-    bool force_add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
-    bool add_totals_port = false;
-    bool add_extremes_port = false;
+    bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
+    bool add_totals = false;
+    bool add_extremes = false;
     if (processed_stage == QueryProcessingStage::Complete)
     {
-        add_totals_port = query_ast->as<ASTSelectQuery &>().group_by_with_totals;
-        add_extremes_port = context.getSettingsRef().extremes;
+        add_totals = query_ast->as<ASTSelectQuery &>().group_by_with_totals;
+        add_extremes = context.getSettingsRef().extremes;
     }
 
     auto modified_query_ast = query_ast->clone();
@@ -140,20 +137,13 @@ void SelectStreamFactory::createForShard(
 
     auto emplace_remote_stream = [&]()
     {
-        auto stream = std::make_shared<RemoteBlockInputStream>(
+        auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
             shard_info.pool, modified_query, header, context, nullptr, throttler, scalars, external_tables, processed_stage);
-        stream->setPoolMode(PoolMode::GET_MANY);
+        remote_query_executor->setPoolMode(PoolMode::GET_MANY);
         if (!table_func_ptr)
-            stream->setMainTable(main_table);
+            remote_query_executor->setMainTable(main_table);
 
-        auto source = std::make_shared<SourceFromInputStream>(std::move(stream), force_add_agg_info);
-
-        if (add_totals_port)
-            source->addTotalsPort();
-        if (add_extremes_port)
-            source->addExtremesPort();
-
-        res.emplace_back(std::move(source));
+        res.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes));
     };
 
     const auto & settings = context.getSettingsRef();
@@ -246,8 +236,8 @@ void SelectStreamFactory::createForShard(
         auto lazily_create_stream = [
                 pool = shard_info.pool, shard_num = shard_info.shard_num, modified_query, header = header, modified_query_ast, context, throttler,
                 main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables,
-                stage = processed_stage, local_delay]()
-            -> BlockInputStreamPtr
+                stage = processed_stage, local_delay, add_agg_info, add_totals, add_extremes]()
+            -> Pipe
         {
             auto current_settings = context.getSettingsRef();
             auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(
@@ -277,8 +267,7 @@ void SelectStreamFactory::createForShard(
             }
 
             if (try_results.empty() || local_delay < max_remote_delay)
-                return std::make_shared<PipelineExecutingBlockInputStream>(
-                        createLocalStream(modified_query_ast, header, context, stage));
+                return createLocalStream(modified_query_ast, header, context, stage).getPipe();
             else
             {
                 std::vector<IConnectionPool::Entry> connections;
@@ -286,20 +275,14 @@ void SelectStreamFactory::createForShard(
                 for (auto & try_result : try_results)
                     connections.emplace_back(std::move(try_result.entry));
 
-                return std::make_shared<RemoteBlockInputStream>(
+                auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                     std::move(connections), modified_query, header, context, nullptr, throttler, scalars, external_tables, stage);
+
+                return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes);
             }
         };
 
-        auto lazy_stream = std::make_shared<LazyBlockInputStream>("LazyShardWithLocalReplica", header, lazily_create_stream);
-        auto source = std::make_shared<SourceFromInputStream>(std::move(lazy_stream), force_add_agg_info);
-
-        if (add_totals_port)
-            source->addTotalsPort();
-        if (add_extremes_port)
-            source->addExtremesPort();
-
-        res.emplace_back(std::move(source));
+        res.emplace_back(createDelayedPipe(header, lazily_create_stream));
     }
     else
         emplace_remote_stream();
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index 8a1836a97ac..c9b106d21dd 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -26,8 +26,8 @@ public:
     struct Data
     {
         TableJoin & analyzed_join;
-        const TableWithColumnNames & left_table;
-        const TableWithColumnNames & right_table;
+        const TableWithColumnNamesAndTypes & left_table;
+        const TableWithColumnNamesAndTypes & right_table;
         const Aliases & aliases;
         const bool is_asof{false};
         ASTPtr asof_left_key{};
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 5e2f4ecadab..cbf00836103 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -22,6 +22,7 @@
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/MergeTreeSettings.h>
 #include <Storages/CompressionCodecSelector.h>
+#include <Storages/StorageS3Settings.h>
 #include <Disks/DiskLocal.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/ActionLocksManager.h>
@@ -101,7 +102,6 @@ namespace ErrorCodes
     extern const int SESSION_NOT_FOUND;
     extern const int SESSION_IS_LOCKED;
     extern const int LOGICAL_ERROR;
-    extern const int UNKNOWN_SCALAR;
     extern const int AUTHENTICATION_FAILED;
     extern const int NOT_IMPLEMENTED;
 }
@@ -351,6 +351,7 @@ struct ContextShared
     String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
     ActionLocksManagerPtr action_locks_manager;             /// Set of storages' action lockers
     std::optional<SystemLogs> system_logs;                  /// Used to log queries and operations on parts
+    std::optional<StorageS3Settings> storage_s3_settings;   /// Settings of S3 storage
 
     RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
 
@@ -821,7 +822,11 @@ const Block & Context::getScalar(const String & name) const
 {
     auto it = scalars.find(name);
     if (scalars.end() == it)
-        throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::UNKNOWN_SCALAR);
+    {
+        // This should be a logical error, but it fails the sql_fuzz test too
+        // often, so 'bad arguments' for now.
+        throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::BAD_ARGUMENTS);
+    }
     return it->second;
 }
 
@@ -1764,6 +1769,11 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
             LOG_ERROR(shared->log, "An error has occured while reloading storage policies, storage policies were not applied: {}", e.message());
         }
     }
+
+    if (shared->storage_s3_settings)
+    {
+        shared->storage_s3_settings->loadFromConfig("s3", config);
+    }
 }
 
 
@@ -1782,6 +1792,18 @@ const MergeTreeSettings & Context::getMergeTreeSettings() const
     return *shared->merge_tree_settings;
 }
 
+const StorageS3Settings & Context::getStorageS3Settings() const
+{
+    auto lock = getLock();
+
+    if (!shared->storage_s3_settings)
+    {
+        const auto & config = getConfigRef();
+        shared->storage_s3_settings.emplace().loadFromConfig("s3", config);
+    }
+
+    return *shared->storage_s3_settings;
+}
 
 void Context::checkCanBeDropped(const String & database, const String & table, const size_t & size, const size_t & max_size_to_drop) const
 {
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 864468c0663..1d46049fb92 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -81,6 +81,7 @@ class TextLog;
 class TraceLog;
 class MetricLog;
 struct MergeTreeSettings;
+class StorageS3Settings;
 class IDatabase;
 class DDLWorker;
 class ITableFunction;
@@ -531,6 +532,7 @@ public:
     std::shared_ptr<PartLog> getPartLog(const String & part_database);
 
     const MergeTreeSettings & getMergeTreeSettings() const;
+    const StorageS3Settings & getStorageS3Settings() const;
 
     /// Prevents DROP TABLE if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
     void setMaxTableSizeToDrop(size_t max_size);
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h
index adb0829a54e..d4a1a582fdc 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.h
+++ b/src/Interpreters/DatabaseAndTableWithAlias.h
@@ -45,34 +45,6 @@ struct DatabaseAndTableWithAlias
     }
 };
 
-struct TableWithColumnNames
-{
-    DatabaseAndTableWithAlias table;
-    Names columns;
-    Names hidden_columns; /// Not general columns like MATERIALIZED and ALIAS. They are omitted in * and t.* results.
-
-    TableWithColumnNames(const DatabaseAndTableWithAlias & table_, const Names & columns_)
-        : table(table_)
-        , columns(columns_)
-    {
-        columns_set.insert(columns.begin(), columns.end());
-    }
-
-    TableWithColumnNames(const DatabaseAndTableWithAlias table_, Names && columns_, Names && hidden_columns_)
-        : table(table_)
-        , columns(columns_)
-        , hidden_columns(hidden_columns_)
-    {
-        columns_set.insert(columns.begin(), columns.end());
-        columns_set.insert(hidden_columns.begin(), hidden_columns.end());
-    }
-
-    bool hasColumn(const String & name) const { return columns_set.count(name); }
-
-private:
-    NameSet columns_set;
-};
-
 struct TableWithColumnNamesAndTypes
 {
     DatabaseAndTableWithAlias table;
@@ -96,21 +68,6 @@ struct TableWithColumnNamesAndTypes
             names.insert(col.name);
     }
 
-    TableWithColumnNames removeTypes() const
-    {
-        Names out_columns;
-        out_columns.reserve(columns.size());
-        for (auto & col : columns)
-            out_columns.push_back(col.name);
-
-        Names out_hidden_columns;
-        out_hidden_columns.reserve(hidden_columns.size());
-        for (auto & col : hidden_columns)
-            out_hidden_columns.push_back(col.name);
-
-        return TableWithColumnNames(table, std::move(out_columns), std::move(out_hidden_columns));
-    }
-
 private:
     NameSet names;
 };
@@ -118,7 +75,6 @@ private:
 std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);
 std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);
 
-using TablesWithColumnNames = std::vector<TableWithColumnNames>;
-using TablesWithColumnNamesAndTypes = std::vector<TableWithColumnNames>;
+using TablesWithColumns = std::vector<TableWithColumnNamesAndTypes>;
 
 }
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 4871d8d37aa..3171f84ec9c 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -25,7 +25,6 @@ namespace ErrorCodes
     extern const int DATABASE_NOT_EMPTY;
     extern const int DATABASE_ACCESS_DENIED;
     extern const int LOGICAL_ERROR;
-    extern const int NULL_POINTER_DEREFERENCE;
 }
 
 TemporaryTableHolder::TemporaryTableHolder(const Context & context_,
@@ -385,38 +384,46 @@ void DatabaseCatalog::updateUUIDMapping(const UUID & uuid, DatabasePtr database,
     it->second = std::make_pair(std::move(database), std::move(table));
 }
 
+std::unique_ptr<DatabaseCatalog> DatabaseCatalog::database_catalog;
+
 DatabaseCatalog::DatabaseCatalog(Context * global_context_)
     : global_context(global_context_), log(&Poco::Logger::get("DatabaseCatalog"))
 {
     if (!global_context)
-        throw Exception("DatabaseCatalog is not initialized. It's a bug.", ErrorCodes::NULL_POINTER_DEREFERENCE);
+        throw Exception("DatabaseCatalog is not initialized. It's a bug.", ErrorCodes::LOGICAL_ERROR);
 }
 
 DatabaseCatalog & DatabaseCatalog::init(Context * global_context_)
 {
-    static DatabaseCatalog database_catalog(global_context_);
-    return database_catalog;
+    if (database_catalog)
+    {
+        throw Exception("Database catalog is initialized twice. This is a bug.",
+            ErrorCodes::LOGICAL_ERROR);
+    }
+
+    database_catalog.reset(new DatabaseCatalog(global_context_));
+
+    return *database_catalog;
 }
 
 DatabaseCatalog & DatabaseCatalog::instance()
 {
-    return init(nullptr);
+    if (!database_catalog)
+    {
+        throw Exception("Database catalog is not initialized. This is a bug.",
+            ErrorCodes::LOGICAL_ERROR);
+    }
+
+    return *database_catalog;
 }
 
 void DatabaseCatalog::shutdown()
 {
-    try
+    // The catalog might not be initialized yet by init(global_context). It can
+    // happen if some exception was thrown on first steps of startup.
+    if (database_catalog)
     {
-        instance().shutdownImpl();
-    }
-    catch (const Exception & e)
-    {
-        /// If catalog was not initialized yet by init(global_context), instance() throws NULL_POINTER_DEREFERENCE.
-        /// It can happen if some exception was thrown on first steps of startup (e.g. command line arguments parsing).
-        /// Ignore it.
-        if (e.code() == ErrorCodes::NULL_POINTER_DEREFERENCE)
-            return;
-        throw;
+        database_catalog->shutdownImpl();
     }
 }
 
@@ -724,5 +731,3 @@ DDLGuard::~DDLGuard()
 }
 
 }
-
-
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index aefed0f372d..540568927cc 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -169,6 +169,11 @@ public:
     void enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay = false);
 
 private:
+    // The global instance of database catalog. unique_ptr is to allow
+    // deferred initialization. Thought I'd use std::optional, but I can't
+    // make emplace(global_context_) compile with private constructor ¯\_(ツ)_/¯.
+    static std::unique_ptr<DatabaseCatalog> database_catalog;
+
     DatabaseCatalog(Context * global_context_);
     void assertDatabaseExistsUnlocked(const String & database_name) const;
     void assertDatabaseDoesntExistUnlocked(const String & database_name) const;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index b4988f00699..3010dfcfe12 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -726,7 +726,8 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
     return true;
 }
 
-bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
+bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order,
+                                                  ManyExpressionActions & group_by_elements_actions)
 {
     const auto * select_query = getAggregatingQuery();
 
@@ -743,6 +744,20 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
         getRootActions(ast, only_types, step.actions);
     }
 
+    if (optimize_aggregation_in_order)
+    {
+        auto all_columns = sourceWithJoinedColumns();
+        for (auto & child : asts)
+        {
+            group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
+            getRootActions(child, only_types, group_by_elements_actions.back());
+        }
+//        std::cerr << "group_by_elements_actions\n";
+//        for (const auto & elem : group_by_elements_actions) {
+//            std::cerr << elem->dumpActions() << "\n";
+//        }
+    }
+
     return true;
 }
 
@@ -834,8 +849,11 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain
             order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
             getRootActions(child, only_types, order_by_elements_actions.back());
         }
+//        std::cerr << "order_by_elements_actions\n";
+//        for (const auto & elem : order_by_elements_actions) {
+//            std::cerr << elem->dumpActions() << "\n";
+//        }
     }
-
     return true;
 }
 
@@ -1115,7 +1133,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
         if (need_aggregate)
         {
-            query_analyzer.appendGroupBy(chain, only_types || !first_stage);
+            /// TODO correct conditions
+            optimize_aggregation_in_order =
+                    context.getSettingsRef().optimize_aggregation_in_order
+                    && storage && query.groupBy();
+
+            query_analyzer.appendGroupBy(chain, only_types || !first_stage, optimize_aggregation_in_order, group_by_elements_actions);
             query_analyzer.appendAggregateFunctionsArguments(chain, only_types || !first_stage);
             before_aggregation = chain.getLastActions();
 
@@ -1128,13 +1151,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
             }
         }
 
-        bool has_stream_with_non_joned_rows = (before_join && before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
+        bool has_stream_with_non_joined_rows = (before_join && before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
         optimize_read_in_order =
             settings.optimize_read_in_order
             && storage && query.orderBy()
             && !query_analyzer.hasAggregation()
             && !query.final()
-            && !has_stream_with_non_joned_rows;
+            && !has_stream_with_non_joined_rows;
 
         /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
         query_analyzer.appendSelect(chain, only_types || (need_aggregate ? !second_stage : !first_stage));
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 1afb289430e..ed07ab3fe36 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -174,6 +174,7 @@ struct ExpressionAnalysisResult
 
     bool remove_where_filter = false;
     bool optimize_read_in_order = false;
+    bool optimize_aggregation_in_order = false;
 
     ExpressionActionsPtr before_join;   /// including JOIN
     ExpressionActionsPtr before_where;
@@ -195,6 +196,7 @@ struct ExpressionAnalysisResult
     ConstantFilterDescription where_constant_filter_description;
     /// Actions by every element of ORDER BY
     ManyExpressionActions order_by_elements_actions;
+    ManyExpressionActions group_by_elements_actions;
 
     ExpressionAnalysisResult() = default;
 
@@ -303,7 +305,7 @@ private:
     /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
     bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
     bool appendWhere(ExpressionActionsChain & chain, bool only_types);
-    bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
+    bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
     void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
 
     /// After aggregation:
diff --git a/src/Interpreters/ExtractExpressionInfoVisitor.cpp b/src/Interpreters/ExtractExpressionInfoVisitor.cpp
index f0ca33b6b8b..5f7754d315a 100644
--- a/src/Interpreters/ExtractExpressionInfoVisitor.cpp
+++ b/src/Interpreters/ExtractExpressionInfoVisitor.cpp
@@ -38,10 +38,10 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr
     {
         for (size_t index = 0; index < data.tables.size(); ++index)
         {
-            const auto & columns = data.tables[index].columns;
+            const auto & table = data.tables[index];
 
             // TODO: make sure no collision ever happens
-            if (std::find(columns.begin(), columns.end(), identifier.name) != columns.end())
+            if (table.hasColumn(identifier.name))
             {
                 data.unique_reference_tables_pos.emplace(index);
                 break;
diff --git a/src/Interpreters/ExtractExpressionInfoVisitor.h b/src/Interpreters/ExtractExpressionInfoVisitor.h
index 65d23057e52..a412704edcc 100644
--- a/src/Interpreters/ExtractExpressionInfoVisitor.h
+++ b/src/Interpreters/ExtractExpressionInfoVisitor.h
@@ -16,7 +16,7 @@ struct ExpressionInfoMatcher
     struct Data
     {
         const Context & context;
-        const std::vector<TableWithColumnNames> & tables;
+        const TablesWithColumns & tables;
 
         bool is_array_join = false;
         bool is_stateful_function = false;
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 9e2ad664765..d18649c4c17 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -107,7 +107,7 @@ static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column,
 {
     if (nullable)
     {
-        JoinCommon::convertColumnToNullable(column);
+        JoinCommon::convertColumnToNullable(column, true);
         if (column.type->isNullable() && !negative_null_map.empty())
         {
             MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column));
diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp
index 26bb8e6261d..8f254b50400 100644
--- a/src/Interpreters/IdentifierSemantic.cpp
+++ b/src/Interpreters/IdentifierSemantic.cpp
@@ -125,12 +125,6 @@ std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & iden
     return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, ambiguous);
 }
 
-std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNames> & tables,
-                                                      bool ambiguous)
-{
-    return tryChooseTable<TableWithColumnNames>(identifier, tables, ambiguous);
-}
-
 std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNamesAndTypes> & tables,
                                                       bool ambiguous)
 {
@@ -196,19 +190,14 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const
     return ColumnMatch::NoMatch;
 }
 
-IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
-                                                                          const TableWithColumnNames & db_and_table)
-{
-    /// TODO: ColumnName match logic is disabled cause caller's code is not ready for it
-    return canReferColumnToTable(identifier, db_and_table.table);
-}
-
 IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
                                                                           const TableWithColumnNamesAndTypes & db_and_table)
 {
     ColumnMatch match = canReferColumnToTable(identifier, db_and_table.table);
+#if 0
     if (match == ColumnMatch::NoMatch && identifier.isShort() && db_and_table.hasColumn(identifier.shortName()))
         match = ColumnMatch::ColumnName;
+#endif
     return match;
 }
 
diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h
index 81019f65b1f..7e84e10a26f 100644
--- a/src/Interpreters/IdentifierSemantic.h
+++ b/src/Interpreters/IdentifierSemantic.h
@@ -41,7 +41,6 @@ struct IdentifierSemantic
     static std::optional<String> extractNestedName(const ASTIdentifier & identifier, const String & table_name);
 
     static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
-    static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNames & db_and_table);
     static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & db_and_table);
 
     static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
@@ -53,8 +52,6 @@ struct IdentifierSemantic
     static std::optional<size_t> getMembership(const ASTIdentifier & identifier);
     static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<DatabaseAndTableWithAlias> & tables,
                             bool allow_ambiguous = false);
-    static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNames> & tables,
-                            bool allow_ambiguous = false);
     static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNamesAndTypes> & tables,
                             bool allow_ambiguous = false);
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index b238c48c002..8a7b9a245e4 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -690,6 +690,10 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     /// Then background task is created by "startup" method. And when destructor of a table object is called, background task is still active,
     /// and the task will use references to freed data.
 
+    /// Also note that "startup" method is exception-safe. If exception is thrown from "startup",
+    /// we can safely destroy the object without a call to "shutdown", because there is guarantee
+    /// that no background threads/similar resources remain after exception from "startup".
+
     res->startup();
     return true;
 }
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index c4a8e3041ac..7deed262eda 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -209,10 +209,7 @@ BlockIO InterpreterInsertQuery::execute()
             if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
                 out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams());
 
-            if (out_streams_size == 1)
-                res.pipeline.addPipe({std::make_shared<ConcatProcessor>(res.pipeline.getHeader(), res.pipeline.getNumStreams())});
-            else
-                res.pipeline.resize(out_streams_size);
+            res.pipeline.resize(out_streams_size);
         }
         else if (query.watch)
         {
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 64a58e33231..f9072e6176a 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -71,6 +71,8 @@
 #include <Processors/Pipe.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Processors/Transforms/ConvertingTransform.h>
+#include <Processors/Transforms/AggregatingInOrderTransform.h>
+#include <Processors/Merges/AggregatingSortedTransform.h>
 
 
 namespace DB
@@ -601,6 +603,20 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
     return order_descr;
 }
 
+static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
+{
+    SortDescription order_descr;
+    order_descr.reserve(query.groupBy()->children.size());
+
+    for (const auto & elem : query.groupBy()->children)
+    {
+        String name = elem->getColumnName();
+        order_descr.emplace_back(name, 1, 1);
+    }
+
+    return order_descr;
+}
+
 static UInt64 getLimitUIntValue(const ASTPtr & node, const Context & context, const std::string & expr)
 {
     const auto & [field, type] = evaluateConstantExpression(node, context);
@@ -648,8 +664,8 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
 {
     /** Streams of data. When the query is executed in parallel, we have several data streams.
      *  If there is no GROUP BY, then perform all operations before ORDER BY and LIMIT in parallel, then
-     *  if there is an ORDER BY, then glue the streams using UnionBlockInputStream, and then MergeSortingBlockInputStream,
-     *  if not, then glue it using UnionBlockInputStream,
+     *  if there is an ORDER BY, then glue the streams using ResizeProcessor, and then MergeSorting transforms,
+     *  if not, then glue it using ResizeProcessor,
      *  then apply LIMIT.
      *  If there is GROUP BY, then we will perform all operations up to GROUP BY, inclusive, in parallel;
      *  a parallel GROUP BY will glue streams into one,
@@ -739,7 +755,7 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
             if (!expressions.second_stage && !expressions.need_aggregate && !expressions.hasHaving())
             {
                 if (expressions.has_order_by)
-                    executeOrder(pipeline, query_info.input_sorting_info);
+                    executeOrder(pipeline, query_info.input_order_info);
 
                 if (expressions.has_order_by && query.limitLength())
                     executeDistinct(pipeline, false, expressions.selected_columns);
@@ -832,7 +848,11 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
                 executeWhere(pipeline, expressions.before_where, expressions.remove_where_filter);
 
             if (expressions.need_aggregate)
-                executeAggregation(pipeline, expressions.before_aggregation, aggregate_overflow_row, aggregate_final);
+            {
+                executeAggregation(pipeline, expressions.before_aggregation, aggregate_overflow_row, aggregate_final, query_info.input_order_info);
+                /// We need to reset input order info, so that executeOrder can't use  it
+                query_info.input_order_info.reset();
+            }
             else
             {
                 executeExpression(pipeline, expressions.before_order_and_select);
@@ -898,7 +918,7 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn
                 if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
                     executeMergeSorted(pipeline);
                 else    /// Otherwise, just sort.
-                    executeOrder(pipeline, query_info.input_sorting_info);
+                    executeOrder(pipeline, query_info.input_order_info);
             }
 
             /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT,
@@ -958,28 +978,16 @@ void InterpreterSelectQuery::executeFetchColumns(
     const Settings & settings = context->getSettingsRef();
 
     /// Optimization for trivial query like SELECT count() FROM table.
-    auto check_trivial_count_query = [&]() -> std::optional<AggregateDescription>
+    bool optimize_trivial_count =
+        syntax_analyzer_result->optimize_trivial_count && storage &&
+        processing_stage == QueryProcessingStage::FetchColumns &&
+        query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) &&
+        typeid_cast<AggregateFunctionCount *>(query_analyzer->aggregates()[0].function.get());
+
+    if (optimize_trivial_count)
     {
-        if (!settings.optimize_trivial_count_query || !syntax_analyzer_result->maybe_optimize_trivial_count || !storage
-            || query.sampleSize() || query.sampleOffset() || query.final() || query.prewhere() || query.where() || query.groupBy()
-            || !query_analyzer->hasAggregation() || processing_stage != QueryProcessingStage::FetchColumns)
-            return {};
-
-        const AggregateDescriptions & aggregates = query_analyzer->aggregates();
-
-        if (aggregates.size() != 1)
-            return {};
-
-        const AggregateDescription & desc = aggregates[0];
-        if (typeid_cast<AggregateFunctionCount *>(desc.function.get()))
-            return desc;
-
-        return {};
-    };
-
-    if (auto desc = check_trivial_count_query())
-    {
-        auto func = desc->function;
+        const auto & desc = query_analyzer->aggregates()[0];
+        const auto & func = desc.function;
         std::optional<UInt64> num_rows = storage->totalRows();
         if (num_rows)
         {
@@ -998,13 +1006,13 @@ void InterpreterSelectQuery::executeFetchColumns(
             column->insertFrom(place);
 
             auto header = analysis_result.before_aggregation->getSampleBlock();
-            size_t arguments_size = desc->argument_names.size();
+            size_t arguments_size = desc.argument_names.size();
             DataTypes argument_types(arguments_size);
             for (size_t j = 0; j < arguments_size; ++j)
-                argument_types[j] = header.getByName(desc->argument_names[j]).type;
+                argument_types[j] = header.getByName(desc.argument_names[j]).type;
 
             Block block_with_count{
-                {std::move(column), std::make_shared<DataTypeAggregateFunction>(func, argument_types, desc->parameters), desc->column_name}};
+                {std::move(column), std::make_shared<DataTypeAggregateFunction>(func, argument_types, desc.parameters), desc.column_name}};
 
             auto istream = std::make_shared<OneBlockInputStream>(block_with_count);
             pipeline.init(Pipe(std::make_shared<SourceFromInputStream>(istream)));
@@ -1274,15 +1282,21 @@ void InterpreterSelectQuery::executeFetchColumns(
         query_info.prewhere_info = prewhere_info;
 
         /// Create optimizer with prepared actions.
-        /// Maybe we will need to calc input_sorting_info later, e.g. while reading from StorageMerge.
-        if (analysis_result.optimize_read_in_order)
+        /// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
+        if (analysis_result.optimize_read_in_order || analysis_result.optimize_aggregation_in_order)
         {
-            query_info.order_by_optimizer = std::make_shared<ReadInOrderOptimizer>(
-                analysis_result.order_by_elements_actions,
-                getSortDescription(query, *context),
-                query_info.syntax_analyzer_result);
+            if (analysis_result.optimize_read_in_order)
+                query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
+                    analysis_result.order_by_elements_actions,
+                    getSortDescription(query, *context),
+                    query_info.syntax_analyzer_result);
+            else
+                query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
+                    analysis_result.group_by_elements_actions,
+                    getSortDescriptionFromGroupBy(query),
+                    query_info.syntax_analyzer_result);
 
-            query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
+            query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage);
         }
 
         Pipes pipes = storage->read(required_columns, query_info, *context, processing_stage, max_block_size, max_streams);
@@ -1388,7 +1402,7 @@ void InterpreterSelectQuery::executeWhere(QueryPipeline & pipeline, const Expres
 }
 
 
-void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final)
+void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info)
 {
     pipeline.addSimpleTransform([&](const Block & header)
     {
@@ -1426,6 +1440,62 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const
     /// Forget about current totals and extremes. They will be calculated again after aggregation if needed.
     pipeline.dropTotalsAndExtremes();
 
+    if (group_by_info && settings.optimize_aggregation_in_order)
+    {
+        auto & query = getSelectQuery();
+        SortDescription group_by_descr = getSortDescriptionFromGroupBy(query);
+        bool need_finish_sorting = (group_by_info->order_key_prefix_descr.size() < group_by_descr.size());
+
+        if (need_finish_sorting)
+        {
+            /// TOO SLOW
+        }
+        else
+        {
+            if (pipeline.getNumStreams() > 1)
+            {
+                auto many_data = std::make_shared<ManyAggregatedData>(pipeline.getNumStreams());
+                size_t counter = 0;
+                pipeline.addSimpleTransform([&](const Block & header)
+                {
+                    return std::make_shared<AggregatingInOrderTransform>(header, transform_params, group_by_descr, settings.max_block_size, many_data, counter++);
+                });
+
+                for (auto & column_description : group_by_descr)
+                {
+                    if (!column_description.column_name.empty())
+                    {
+                        column_description.column_number = pipeline.getHeader().getPositionByName(column_description.column_name);
+                        column_description.column_name.clear();
+                    }
+                }
+
+                auto transform = std::make_shared<AggregatingSortedTransform>(
+                    pipeline.getHeader(),
+                    pipeline.getNumStreams(),
+                    group_by_descr,
+                    settings.max_block_size);
+
+                pipeline.addPipe({ std::move(transform) });
+            }
+            else
+            {
+                pipeline.addSimpleTransform([&](const Block & header)
+                {
+                    return std::make_shared<AggregatingInOrderTransform>(header, transform_params, group_by_descr, settings.max_block_size);
+                });
+            }
+
+            pipeline.addSimpleTransform([&](const Block & header)
+            {
+                return std::make_shared<FinalizingSimpleTransform>(header, transform_params);
+            });
+
+            pipeline.enableQuotaForCurrentStreams();
+            return;
+        }
+    }
+
     /// If there are several sources, then we perform parallel aggregation
     if (pipeline.getNumStreams() > 1)
     {
@@ -1588,7 +1658,47 @@ void InterpreterSelectQuery::executeExpression(QueryPipeline & pipeline, const E
 }
 
 
-void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSortingInfoPtr input_sorting_info)
+void InterpreterSelectQuery::executeOrderOptimized(QueryPipeline & pipeline, InputOrderInfoPtr input_sorting_info, UInt64 limit, SortDescription & output_order_descr)
+{
+    const Settings & settings = context->getSettingsRef();
+
+    bool need_finish_sorting = (input_sorting_info->order_key_prefix_descr.size() < output_order_descr.size());
+    if (pipeline.getNumStreams() > 1)
+    {
+        UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit);
+        auto transform = std::make_shared<MergingSortedTransform>(
+                pipeline.getHeader(),
+                pipeline.getNumStreams(),
+                input_sorting_info->order_key_prefix_descr,
+                settings.max_block_size, limit_for_merging);
+
+        pipeline.addPipe({ std::move(transform) });
+    }
+
+    pipeline.enableQuotaForCurrentStreams();
+
+    if (need_finish_sorting)
+    {
+        pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+        {
+            if (stream_type != QueryPipeline::StreamType::Main)
+                return nullptr;
+
+            return std::make_shared<PartialSortingTransform>(header, output_order_descr, limit);
+        });
+
+            /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform
+
+            pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr
+            {
+                return std::make_shared<FinishSortingTransform>(
+                    header, input_sorting_info->order_key_prefix_descr,
+                    output_order_descr, settings.max_block_size, limit);
+        });
+    }
+}
+
+void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputOrderInfoPtr input_sorting_info)
 {
     auto & query = getSelectQuery();
     SortDescription output_order_descr = getSortDescription(query, *context);
@@ -1608,43 +1718,7 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting
          *  and then merge them into one sorted stream.
          * At this stage we merge per-thread streams into one.
          */
-
-        bool need_finish_sorting = (input_sorting_info->order_key_prefix_descr.size() < output_order_descr.size());
-
-        if (pipeline.getNumStreams() > 1)
-        {
-            UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit);
-            auto transform = std::make_shared<MergingSortedTransform>(
-                pipeline.getHeader(),
-                pipeline.getNumStreams(),
-                input_sorting_info->order_key_prefix_descr,
-                settings.max_block_size, limit_for_merging);
-
-            pipeline.addPipe({ std::move(transform) });
-        }
-
-        pipeline.enableQuotaForCurrentStreams();
-
-        if (need_finish_sorting)
-        {
-            pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
-            {
-                if (stream_type != QueryPipeline::StreamType::Main)
-                    return nullptr;
-
-                return std::make_shared<PartialSortingTransform>(header, output_order_descr, limit);
-            });
-
-            /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform
-
-            pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr
-            {
-                return std::make_shared<FinishSortingTransform>(
-                    header, input_sorting_info->order_key_prefix_descr,
-                    output_order_descr, settings.max_block_size, limit);
-            });
-        }
-
+        executeOrderOptimized(pipeline, input_sorting_info, limit, output_order_descr);
         return;
     }
 
@@ -1917,8 +1991,8 @@ void InterpreterSelectQuery::executeExtremes(QueryPipeline & pipeline)
 
 void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, const SubqueriesForSets & subqueries_for_sets)
 {
-    if (query_info.input_sorting_info)
-        executeMergeSorted(pipeline, query_info.input_sorting_info->order_key_prefix_descr, 0);
+    if (query_info.input_order_info)
+        executeMergeSorted(pipeline, query_info.input_order_info->order_key_prefix_descr, 0);
 
     const Settings & settings = context->getSettingsRef();
 
diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h
index ca7fb4c72ba..34d255e398e 100644
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@@ -113,12 +113,13 @@ private:
         const Names & columns_to_remove_after_prewhere);
 
     void executeWhere(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_filter);
-    void executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
+    void executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info);
     void executeMergeAggregated(QueryPipeline & pipeline, bool overflow_row, bool final);
     void executeTotalsAndHaving(QueryPipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
     void executeHaving(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
     static void executeExpression(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
-    void executeOrder(QueryPipeline & pipeline, InputSortingInfoPtr sorting_info);
+    void executeOrder(QueryPipeline & pipeline, InputOrderInfoPtr sorting_info);
+    void executeOrderOptimized(QueryPipeline & pipeline, InputOrderInfoPtr sorting_info, UInt64 limit, SortDescription & output_order_descr);
     void executeWithFill(QueryPipeline & pipeline);
     void executeMergeSorted(QueryPipeline & pipeline);
     void executePreLimit(QueryPipeline & pipeline, bool do_not_skip_offset);
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index eddfcd0a633..9ebdb155643 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -204,7 +204,6 @@ BlockIO InterpreterSystemQuery::execute()
         case Type::DROP_DNS_CACHE:
             context.checkAccess(AccessType::SYSTEM_DROP_DNS_CACHE);
             DNSResolver::instance().dropCache();
-            AllowedClientHosts::dropDNSCaches();
             /// Reinitialize clusters to update their resolved_addresses
             system_context.reloadClusterConfig();
             break;
diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp
index b5d2c632135..fea0228e3fe 100644
--- a/src/Interpreters/PredicateExpressionsOptimizer.cpp
+++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp
@@ -18,14 +18,17 @@ namespace ErrorCodes
 }
 
 PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
-    const Context & context_, const TablesWithColumnNames & tables_with_columns_, const Settings & settings_)
-    : context(context_), tables_with_columns(tables_with_columns_), settings(settings_)
+    const Context & context_, const TablesWithColumns & tables_with_columns_, const Settings & settings)
+    : enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression)
+    , enable_optimize_predicate_expression_to_final_subquery(settings.enable_optimize_predicate_expression_to_final_subquery)
+    , context(context_)
+    , tables_with_columns(tables_with_columns_)
 {
 }
 
 bool PredicateExpressionsOptimizer::optimize(ASTSelectQuery & select_query)
 {
-    if (!settings.enable_optimize_predicate_expression)
+    if (!enable_optimize_predicate_expression)
         return false;
 
     if (select_query.having() && (!select_query.group_by_with_cube && !select_query.group_by_with_rollup && !select_query.group_by_with_totals))
@@ -133,7 +136,7 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e
                 break;  /// Skip left and right table optimization
 
             is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos],
-                tables_with_columns[table_pos].columns);
+                tables_with_columns[table_pos].columns.getNames());
 
             if (table_element->table_join && isRight(table_element->table_join->as<ASTTableJoin>()->kind))
                 break;  /// Skip left table optimization
@@ -143,12 +146,12 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e
     return is_rewrite_tables;
 }
 
-bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, const Names & table_column) const
+bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, Names && table_columns) const
 {
     if (!table_predicates.empty())
     {
-        auto optimize_final = settings.enable_optimize_predicate_expression_to_final_subquery;
-        PredicateRewriteVisitor::Data data(context, table_predicates, table_column, optimize_final);
+        auto optimize_final = enable_optimize_predicate_expression_to_final_subquery;
+        PredicateRewriteVisitor::Data data(context, table_predicates, std::move(table_columns), optimize_final);
 
         PredicateRewriteVisitor(data).visit(table_element);
         return data.is_rewrite;
diff --git a/src/Interpreters/PredicateExpressionsOptimizer.h b/src/Interpreters/PredicateExpressionsOptimizer.h
index da6b98987a6..f555c68020e 100644
--- a/src/Interpreters/PredicateExpressionsOptimizer.h
+++ b/src/Interpreters/PredicateExpressionsOptimizer.h
@@ -18,34 +18,21 @@ struct Settings;
 class PredicateExpressionsOptimizer
 {
 public:
-    PredicateExpressionsOptimizer(const Context & context_, const TablesWithColumnNames & tables_with_columns_, const Settings & settings_);
+    PredicateExpressionsOptimizer(const Context & context_, const TablesWithColumns & tables_with_columns_, const Settings & settings_);
 
     bool optimize(ASTSelectQuery & select_query);
 
 private:
-    /// Extracts settings, mostly to show which are used and which are not.
-    struct ExtractedSettings
-    {
-        const bool enable_optimize_predicate_expression;
-        const bool enable_optimize_predicate_expression_to_final_subquery;
-
-        template<typename T>
-        ExtractedSettings(const T & settings_)
-            :   enable_optimize_predicate_expression(settings_.enable_optimize_predicate_expression),
-                enable_optimize_predicate_expression_to_final_subquery(settings_.enable_optimize_predicate_expression_to_final_subquery)
-        {}
-    };
-
+    const bool enable_optimize_predicate_expression;
+    const bool enable_optimize_predicate_expression_to_final_subquery;
     const Context & context;
-    const std::vector<TableWithColumnNames> & tables_with_columns;
-
-    const ExtractedSettings settings;
+    const TablesWithColumns & tables_with_columns;
 
     std::vector<ASTs> extractTablesPredicates(const ASTPtr & where, const ASTPtr & prewhere);
 
     bool tryRewritePredicatesToTables(ASTs & tables_element, const std::vector<ASTs> & tables_predicates);
 
-    bool tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, const Names & table_column) const;
+    bool tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, Names && table_columns) const;
 
     bool tryMovePredicatesFromHavingToWhere(ASTSelectQuery & select_query);
 };
diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp
index a834e68172b..7fc45044a88 100644
--- a/src/Interpreters/PredicateRewriteVisitor.cpp
+++ b/src/Interpreters/PredicateRewriteVisitor.cpp
@@ -17,7 +17,7 @@ namespace DB
 {
 
 PredicateRewriteVisitorData::PredicateRewriteVisitorData(
-    const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_)
+    const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_)
     : context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_)
 {
 }
diff --git a/src/Interpreters/PredicateRewriteVisitor.h b/src/Interpreters/PredicateRewriteVisitor.h
index cc1b6472a4c..fa25381f4b9 100644
--- a/src/Interpreters/PredicateRewriteVisitor.h
+++ b/src/Interpreters/PredicateRewriteVisitor.h
@@ -24,12 +24,12 @@ public:
         return true;
     }
 
-    PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_);
+    PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_);
 
 private:
     const Context & context;
     const ASTs & predicates;
-    const Names & column_names;
+    const Names column_names;
     bool optimize_final;
 
     void visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &);
diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp
index e10f8bb2ea7..879a0bcf88e 100644
--- a/src/Interpreters/RowRefs.cpp
+++ b/src/Interpreters/RowRefs.cpp
@@ -104,9 +104,7 @@ const RowRef * AsofRowRefs::findAsof(TypeIndex type, ASOF::Inequality inequality
 
 std::optional<TypeIndex> AsofRowRefs::getTypeSize(const IColumn * asof_column, size_t & size)
 {
-    TypeIndex idx = columnVectorDataType(asof_column);
-    if (idx == TypeIndex::Nothing)
-        idx = columnDecimalDataType(asof_column);
+    TypeIndex idx = asof_column->getDataType();
 
     switch (idx)
     {
diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp
index 831379090ad..8f6d368e6ad 100644
--- a/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/src/Interpreters/SyntaxAnalyzer.cpp
@@ -102,7 +102,7 @@ using CustomizeGlobalNotInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeF
 /// Expand asterisks and qualified asterisks with column names.
 /// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
 void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const NameSet & source_columns_set,
-                             const std::vector<TableWithColumnNames> & tables_with_columns)
+                             const TablesWithColumns & tables_with_columns)
 {
     LogAST log;
     TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
@@ -528,7 +528,7 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul
 
 /// Find the columns that are obtained by JOIN.
 void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query,
-                          const std::vector<TableWithColumnNames> & tables, const Aliases & aliases)
+                          const TablesWithColumns & tables, const Aliases & aliases)
 {
     const ASTTablesInSelectQueryElement * node = select_query.join();
     if (!node)
@@ -598,7 +598,7 @@ void SyntaxAnalyzerResult::collectSourceColumns(bool add_special)
 /// Calculate which columns are required to execute the expression.
 /// Then, delete all other columns from the list of available columns.
 /// After execution, columns will only contain the list of columns needed to read from the table.
-void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query)
+void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, bool is_select)
 {
     /// We calculate required_source_columns with source_columns modifications and swap them on exit
     required_source_columns = source_columns;
@@ -648,12 +648,11 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query)
                 required.insert(column_name_type.name);
     }
 
-    const auto * select_query = query->as<ASTSelectQuery>();
-
     /// You need to read at least one column to find the number of rows.
-    if (select_query && required.empty())
+    if (is_select && required.empty())
     {
-        maybe_optimize_trivial_count = true;
+        optimize_trivial_count = true;
+
         /// We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
         /// Because it is the column that is cheapest to read.
         struct ColumnSizeTuple
@@ -662,12 +661,14 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query)
             size_t type_size;
             size_t uncompressed_size;
             String name;
+
             bool operator<(const ColumnSizeTuple & that) const
             {
                 return std::tie(compressed_size, type_size, uncompressed_size)
                     < std::tie(that.compressed_size, that.type_size, that.uncompressed_size);
             }
         };
+
         std::vector<ColumnSizeTuple> columns;
         if (storage)
         {
@@ -681,6 +682,7 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query)
                 columns.emplace_back(ColumnSizeTuple{c->second.data_compressed, type_size, c->second.data_uncompressed, source_column.name});
             }
         }
+
         if (!columns.empty())
             required.insert(std::min_element(columns.begin(), columns.end())->name);
         else
@@ -760,6 +762,7 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query)
     required_source_columns.swap(source_columns);
 }
 
+
 SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
     ASTPtr & query,
     SyntaxAnalyzerResult && result,
@@ -790,12 +793,6 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
     if (remove_duplicates)
         renameDuplicatedColumns(select_query);
 
-    /// TODO: Remove unneeded conversion
-    std::vector<TableWithColumnNames> tables_with_column_names;
-    tables_with_column_names.reserve(tables_with_columns.size());
-    for (const auto & table : tables_with_columns)
-        tables_with_column_names.emplace_back(table.removeTypes());
-
     if (tables_with_columns.size() > 1)
     {
         result.analyzed_join->columns_from_joined_table = tables_with_columns[1].columns;
@@ -803,7 +800,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
             source_columns_set, tables_with_columns[1].table.getQualifiedNamePrefix());
     }
 
-    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_column_names);
+    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
 
     /// Optimizes logical expressions.
     LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
@@ -822,10 +819,11 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
     {
         optimizeIf(query, result.aliases, settings.optimize_if_chain_to_miltiif);
 
-        optimizeArithmeticOperationsInAgr(query, settings.optimize_arithmetic_operations_in_agr_func);
+        /// Move arithmetic operations out of aggregation functions
+        optimizeArithmeticOperationsInAgr(query, settings.optimize_arithmetic_operations_in_aggregate_functions);
 
         /// Push the predicate expression down to the subqueries.
-        result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_column_names, settings).optimize(*select_query);
+        result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query);
 
         /// GROUP BY injective function elimination.
         optimizeGroupBy(select_query, source_columns_set, context);
@@ -844,11 +842,18 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
 
         setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
                           result.analyzed_join->table_join);
-        collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_column_names, result.aliases);
+        collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
     }
 
     result.aggregates = getAggregates(query, *select_query);
-    result.collectUsedColumns(query);
+    result.collectUsedColumns(query, true);
+
+    if (result.optimize_trivial_count)
+        result.optimize_trivial_count = settings.optimize_trivial_count_query &&
+            !select_query->where() && !select_query->prewhere() && !select_query->groupBy() && !select_query->having() &&
+            !select_query->sampleSize() && !select_query->sampleOffset() && !select_query->final() &&
+            (tables_with_columns.size() < 2 || isLeft(result.analyzed_join->kind()));
+
     return std::make_shared<const SyntaxAnalyzerResult>(result);
 }
 
@@ -882,7 +887,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTy
     else
         assertNoAggregates(query, "in wrong place");
 
-    result.collectUsedColumns(query);
+    result.collectUsedColumns(query, false);
     return std::make_shared<const SyntaxAnalyzerResult>(result);
 }
 
diff --git a/src/Interpreters/SyntaxAnalyzer.h b/src/Interpreters/SyntaxAnalyzer.h
index abacb25ac4d..175c2db295a 100644
--- a/src/Interpreters/SyntaxAnalyzer.h
+++ b/src/Interpreters/SyntaxAnalyzer.h
@@ -46,11 +46,11 @@ struct SyntaxAnalyzerResult
     /// Predicate optimizer overrides the sub queries
     bool rewrite_subqueries = false;
 
+    bool optimize_trivial_count = false;
+
     /// Results of scalar sub queries
     Scalars scalars;
 
-    bool maybe_optimize_trivial_count = false;
-
     SyntaxAnalyzerResult(const NamesAndTypesList & source_columns_, ConstStoragePtr storage_ = {}, bool add_special = true)
         : storage(storage_)
         , source_columns(source_columns_)
@@ -59,7 +59,7 @@ struct SyntaxAnalyzerResult
     }
 
     void collectSourceColumns(bool add_special);
-    void collectUsedColumns(const ASTPtr & query);
+    void collectUsedColumns(const ASTPtr & query, bool is_select);
     Names requiredSourceColumns() const { return required_source_columns.getNames(); }
     const Scalars & getScalars() const { return scalars; }
 };
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index bde64919bc6..04265734ce7 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -8,6 +8,7 @@
 #include <Common/QueryProfiler.h>
 #include <Common/ThreadProfileEvents.h>
 #include <Common/TraceCollector.h>
+#include <common/errnoToString.h>
 
 #if defined(OS_LINUX)
 #   include <Common/hasLinuxCapability.h>
@@ -134,6 +135,54 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool
     setupState(thread_group_);
 }
 
+void ThreadStatus::initPerformanceCounters()
+{
+    performance_counters_finalized = false;
+
+    /// Clear stats from previous query if a new query is started
+    /// TODO: make separate query_thread_performance_counters and thread_performance_counters
+    performance_counters.resetCounters();
+    memory_tracker.resetCounters();
+    memory_tracker.setDescription("(for thread)");
+
+    query_start_time_nanoseconds = getCurrentTimeNanoseconds();
+    query_start_time = time(nullptr);
+    ++queries_started;
+
+    *last_rusage = RUsageCounters::current(query_start_time_nanoseconds);
+
+    if (query_context)
+    {
+        const Settings & settings = query_context->getSettingsRef();
+        if (settings.metrics_perf_events_enabled)
+        {
+            try
+            {
+                current_thread_counters.initializeProfileEvents(
+                    settings.metrics_perf_events_list);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+    }
+
+    if (!taskstats)
+    {
+        try
+        {
+            taskstats = TasksStatsCounters::create(thread_id);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log);
+        }
+    }
+    if (taskstats)
+        taskstats->reset();
+}
+
 void ThreadStatus::finalizePerformanceCounters()
 {
     if (performance_counters_finalized)
@@ -142,6 +191,21 @@ void ThreadStatus::finalizePerformanceCounters()
     performance_counters_finalized = true;
     updatePerformanceCounters();
 
+    bool close_perf_descriptors = true;
+    if (query_context)
+        close_perf_descriptors = !query_context->getSettingsRef().metrics_perf_events_enabled;
+
+    try
+    {
+        current_thread_counters.finalizeProfileEvents(performance_counters);
+        if (close_perf_descriptors)
+            current_thread_counters.closeEventDescriptors();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
+    }
+
     try
     {
         if (global_context && query_context)
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index 363e2e2ba64..908eb2fd57c 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -37,9 +37,10 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const
     auto nested2 = IdentifierSemantic::extractNestedName(identifier, table.alias);
 
     const String & short_name = identifier.shortName();
-    const Names & column_names = tables[table_pos].columns;
-    for (const auto & known_name : column_names)
+    const auto & columns = tables[table_pos].columns;
+    for (const auto & column : columns)
     {
+        const String & known_name = column.name;
         if (short_name == known_name)
             return false;
         if (nested1 && *nested1 == known_name)
@@ -48,9 +49,10 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const
             return false;
     }
 
-    const Names & hidden_names = tables[table_pos].hidden_columns;
-    for (const auto & known_name : hidden_names)
+    const auto & hidden_columns = tables[table_pos].hidden_columns;
+    for (const auto & column : hidden_columns)
     {
+        const String & known_name = column.name;
         if (short_name == known_name)
             return false;
         if (nested1 && *nested1 == known_name)
@@ -59,7 +61,7 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const
             return false;
     }
 
-    return !column_names.empty();
+    return !columns.empty();
 }
 
 bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
@@ -232,11 +234,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
             bool first_table = true;
             for (const auto & table : tables_with_columns)
             {
-                for (const auto & column_name : table.columns)
+                for (const auto & column : table.columns)
                 {
-                    if (first_table || !data.join_using_columns.count(column_name))
+                    if (first_table || !data.join_using_columns.count(column.name))
                     {
-                        addIdentifier(node.children, table.table, column_name, AsteriskSemantic::getAliases(*asterisk));
+                        addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk));
                     }
                 }
 
@@ -248,11 +250,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
             bool first_table = true;
             for (const auto & table : tables_with_columns)
             {
-                for (const auto & column_name : table.columns)
+                for (const auto & column : table.columns)
                 {
-                    if (asterisk_pattern->isColumnMatching(column_name) && (first_table || !data.join_using_columns.count(column_name)))
+                    if (asterisk_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name)))
                     {
-                        addIdentifier(node.children, table.table, column_name, AsteriskSemantic::getAliases(*asterisk_pattern));
+                        addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk_pattern));
                     }
                 }
 
@@ -267,9 +269,9 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
             {
                 if (ident_db_and_name.satisfies(table.table, true))
                 {
-                    for (const auto & column_name : table.columns)
+                    for (const auto & column : table.columns)
                     {
-                        addIdentifier(node.children, table.table, column_name, AsteriskSemantic::getAliases(*qualified_asterisk));
+                        addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*qualified_asterisk));
                     }
                     break;
                 }
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h
index e8c320671bf..1ed4da57a93 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.h
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h
@@ -25,11 +25,11 @@ public:
     struct Data
     {
         const NameSet source_columns;
-        const std::vector<TableWithColumnNames> & tables;
+        const TablesWithColumns & tables;
         std::unordered_set<String> join_using_columns;
         bool has_columns;
 
-        Data(const NameSet & source_columns_, const std::vector<TableWithColumnNames> & tables_, bool has_columns_ = true)
+        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true)
             : source_columns(source_columns_)
             , tables(tables_)
             , has_columns(has_columns_)
diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp
index 8467a98685d..6e3fd516e1c 100644
--- a/src/Interpreters/getTableExpressions.cpp
+++ b/src/Interpreters/getTableExpressions.cpp
@@ -115,10 +115,9 @@ NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table
     return getColumnsFromTableExpression(table_expression, context, materialized, aliases, virtuals);
 }
 
-std::vector<TableWithColumnNamesAndTypes> getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                          const Context & context)
+TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, const Context & context)
 {
-    std::vector<TableWithColumnNamesAndTypes> tables_with_columns;
+    TablesWithColumns tables_with_columns;
 
     if (!table_expressions.empty())
     {
@@ -146,15 +145,4 @@ std::vector<TableWithColumnNamesAndTypes> getDatabaseAndTablesWithColumns(const
     return tables_with_columns;
 }
 
-std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                      const Context & context)
-{
-    std::vector<TableWithColumnNamesAndTypes> tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
-    std::vector<TableWithColumnNames> out;
-    out.reserve(tables_with_columns.size());
-    for (auto & table : tables_with_columns)
-        out.emplace_back(table.removeTypes());
-    return out;
-}
-
 }
diff --git a/src/Interpreters/getTableExpressions.h b/src/Interpreters/getTableExpressions.h
index 4e49a94bcd9..9254fb9d6a0 100644
--- a/src/Interpreters/getTableExpressions.h
+++ b/src/Interpreters/getTableExpressions.h
@@ -17,9 +17,6 @@ const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, siz
 ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);
 
 NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, const Context & context);
-std::vector<TableWithColumnNamesAndTypes> getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                          const Context & context);
-std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const std::vector<const ASTTableExpression *> & table_expressions,
-                                                                      const Context & context);
+TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector<const ASTTableExpression *> & table_expressions, const Context & context);
 
 }
diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp
index e3ca9258892..6dd3a202d4d 100644
--- a/src/Interpreters/join_common.cpp
+++ b/src/Interpreters/join_common.cpp
@@ -16,8 +16,14 @@ namespace ErrorCodes
 namespace JoinCommon
 {
 
-void convertColumnToNullable(ColumnWithTypeAndName & column)
+void convertColumnToNullable(ColumnWithTypeAndName & column, bool low_card_nullability)
 {
+    if (low_card_nullability && column.type->lowCardinality())
+    {
+        column.column = recursiveRemoveLowCardinality(column.column);
+        column.type = recursiveRemoveLowCardinality(column.type);
+    }
+
     if (column.type->isNullable() || !column.type->canBeInsideNullable())
         return;
 
diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h
index b69a0a4a993..47fa082e700 100644
--- a/src/Interpreters/join_common.h
+++ b/src/Interpreters/join_common.h
@@ -13,7 +13,7 @@ using ColumnRawPtrs = std::vector<const IColumn *>;
 namespace JoinCommon
 {
 
-void convertColumnToNullable(ColumnWithTypeAndName & column);
+void convertColumnToNullable(ColumnWithTypeAndName & column, bool low_card_nullability = false);
 void convertColumnsToNullable(Block & block, size_t starting_pos = 0);
 void removeColumnNullability(ColumnWithTypeAndName & column);
 Columns materializeColumns(const Block & block, const Names & names);
diff --git a/src/Interpreters/tests/CMakeLists.txt b/src/Interpreters/tests/CMakeLists.txt
index 19d302d2b30..324a38b1a17 100644
--- a/src/Interpreters/tests/CMakeLists.txt
+++ b/src/Interpreters/tests/CMakeLists.txt
@@ -1,15 +1,3 @@
-add_executable (expression expression.cpp)
-target_link_libraries (expression PRIVATE dbms clickhouse_parsers)
-
-add_executable (create_query create_query.cpp)
-target_link_libraries (create_query PRIVATE dbms clickhouse_parsers)
-
-add_executable (select_query select_query.cpp)
-target_link_libraries (select_query PRIVATE clickhouse_storages_system dbms clickhouse_common_io)
-
-add_executable (aggregate aggregate.cpp)
-target_link_libraries (aggregate PRIVATE dbms)
-
 add_executable (hash_map hash_map.cpp)
 target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map PRIVATE dbms)
@@ -19,8 +7,7 @@ target_include_directories (hash_map_lookup SYSTEM BEFORE PRIVATE ${SPARSEHASH_I
 target_link_libraries (hash_map_lookup PRIVATE dbms)
 
 add_executable (hash_map3 hash_map3.cpp)
-target_include_directories(hash_map3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR})
-target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})
+target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} metrohash)
 
 add_executable (hash_map_string hash_map_string.cpp)
 target_include_directories (hash_map_string SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
@@ -30,8 +17,7 @@ add_executable (hash_map_string_2 hash_map_string_2.cpp)
 target_link_libraries (hash_map_string_2 PRIVATE dbms)
 
 add_executable (hash_map_string_3 hash_map_string_3.cpp)
-target_include_directories(hash_map_string_3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR})
-target_link_libraries (hash_map_string_3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})
+target_link_libraries (hash_map_string_3 PRIVATE dbms ${FARMHASH_LIBRARIES} metrohash)
 
 add_executable (hash_map_string_small hash_map_string_small.cpp)
 target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
diff --git a/src/Interpreters/tests/aggregate.cpp b/src/Interpreters/tests/aggregate.cpp
deleted file mode 100644
index 9959bca7aac..00000000000
--- a/src/Interpreters/tests/aggregate.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnString.h>
-
-#include <DataStreams/OneBlockInputStream.h>
-
-#include <Interpreters/Aggregator.h>
-
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-
-
-int main(int argc, char ** argv)
-{
-    using namespace DB;
-
-    try
-    {
-        size_t n = argc == 2 ? std::stol(argv[1]) : 10;
-
-        Block block;
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "x";
-            column.type = std::make_shared<DataTypeInt16>();
-            auto col = ColumnInt16::create();
-            auto & vec_x = col->getData();
-
-            vec_x.resize(n);
-            for (size_t i = 0; i < n; ++i)
-                vec_x[i] = i % 9;
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        const char * strings[] = {"abc", "def", "abcd", "defg", "ac"};
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s1";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 5]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s2";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 3]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        BlockInputStreamPtr stream = std::make_shared<OneBlockInputStream>(block);
-        AggregatedDataVariants aggregated_data_variants;
-
-        AggregateFunctionFactory factory;
-
-        AggregateDescriptions aggregate_descriptions(1);
-
-        DataTypes empty_list_of_types;
-        aggregate_descriptions[0].function = factory.get("count", empty_list_of_types);
-
-        Aggregator::Params params(
-            stream->getHeader(), {0, 1}, aggregate_descriptions,
-            false, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, 1, 0);
-
-        Aggregator aggregator(params);
-
-        {
-            Stopwatch stopwatch;
-            stopwatch.start();
-
-            aggregator.execute(stream, aggregated_data_variants);
-
-            stopwatch.stop();
-            std::cout << std::fixed << std::setprecision(2)
-                << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-                << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-                << std::endl;
-        }
-    }
-    catch (const Exception & e)
-    {
-        std::cerr << e.displayText() << std::endl;
-    }
-
-    return 0;
-}
diff --git a/src/Interpreters/tests/create_query.cpp b/src/Interpreters/tests/create_query.cpp
deleted file mode 100644
index 82bb8db5d78..00000000000
--- a/src/Interpreters/tests/create_query.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-
-#include <Databases/DatabaseOrdinary.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/InterpreterCreateQuery.h>
-
-
-using namespace DB;
-
-int main(int, char **)
-try
-{
-    std::string input = "CREATE TABLE IF NOT EXISTS hits (\n"
-        "WatchID                UInt64,\n"
-        "JavaEnable             UInt8,\n"
-        "Title                  String,\n"
-        "EventTime              DateTime,\n"
-        "CounterID              UInt32,\n"
-        "ClientIP               UInt32,\n"
-        "RegionID               UInt32,\n"
-        "UniqID                 UInt64,\n"
-        "CounterClass           UInt8,\n"
-        "OS                     UInt8,\n"
-        "UserAgent              UInt8,\n"
-        "URL                    String,\n"
-        "Referer                String,\n"
-        "ResolutionWidth        UInt16,\n"
-        "ResolutionHeight       UInt16,\n"
-        "ResolutionDepth        UInt8,\n"
-        "FlashMajor             UInt8,\n"
-        "FlashMinor             UInt8,\n"
-        "FlashMinor2            String,\n"
-        "NetMajor               UInt8,\n"
-        "NetMinor               UInt8,\n"
-        "UserAgentMajor         UInt16,\n"
-        "UserAgentMinor         FixedString(2),\n"
-        "CookieEnable           UInt8,\n"
-        "JavascriptEnable       UInt8,\n"
-        "IsMobile               UInt8,\n"
-        "MobilePhone            UInt8,\n"
-        "MobilePhoneModel       String,\n"
-        "Params                 String,\n"
-        "IPNetworkID            UInt32,\n"
-        "TraficSourceID         Int8,\n"
-        "SearchEngineID         UInt16,\n"
-        "SearchPhrase           String,\n"
-        "AdvEngineID            UInt8,\n"
-        "IsArtifical            UInt8,\n"
-        "WindowClientWidth      UInt16,\n"
-        "WindowClientHeight     UInt16,\n"
-        "ClientTimeZone         Int16,\n"
-        "ClientEventTime        DateTime,\n"
-        "SilverlightVersion1    UInt8,\n"
-        "SilverlightVersion2    UInt8,\n"
-        "SilverlightVersion3    UInt32,\n"
-        "SilverlightVersion4    UInt16,\n"
-        "PageCharset            String,\n"
-        "CodeVersion            UInt32,\n"
-        "IsLink                 UInt8,\n"
-        "IsDownload             UInt8,\n"
-        "IsNotBounce            UInt8,\n"
-        "FUniqID                UInt64,\n"
-        "OriginalURL            String,\n"
-        "HID                    UInt32,\n"
-        "IsOldCounter           UInt8,\n"
-        "IsEvent                UInt8,\n"
-        "IsParameter            UInt8,\n"
-        "DontCountHits          UInt8,\n"
-        "WithHash               UInt8\n"
-        ") ENGINE = Log";
-
-    ParserCreateQuery parser;
-    ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    context.setPath("./");
-    auto database = std::make_shared<DatabaseOrdinary>("test", "./metadata/test/", context);
-    DatabaseCatalog::instance().attachDatabase("test", database);
-    database->loadStoredObjects(context, false);
-    context.setCurrentDatabase("test");
-
-    InterpreterCreateQuery interpreter(ast, context);
-    interpreter.execute();
-
-    return 0;
-}
-catch (const Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl
-        << std::endl
-        << "Stack trace:" << std::endl
-        << e.getStackTraceString();
-    return 1;
-}
diff --git a/src/Interpreters/tests/expression.cpp b/src/Interpreters/tests/expression.cpp
deleted file mode 100644
index 8327514b3d3..00000000000
--- a/src/Interpreters/tests/expression.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <IO/WriteBufferFromOStream.h>
-
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/ParserSelectQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-
-#include <Formats/FormatFactory.h>
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/OneBlockInputStream.h>
-#include <DataStreams/copyData.h>
-
-#include <Interpreters/SyntaxAnalyzer.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/Context.h>
-
-
-int main(int argc, char ** argv)
-{
-    using namespace DB;
-
-    try
-    {
-        std::string input = "SELECT x, s1, s2, "
-            "/*"
-            "2 + x * 2, x * 2, x % 3 == 1, "
-            "s1 == 'abc', s1 == s2, s1 != 'abc', s1 != s2, "
-            "s1 <  'abc', s1 <  s2, s1 >  'abc', s1 >  s2, "
-            "s1 <= 'abc', s1 <= s2, s1 >= 'abc', s1 >= s2, "
-            "*/"
-            "s1 < s2 AND x % 3 < x % 5";
-
-        ParserSelectQuery parser;
-        ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
-
-        formatAST(*ast, std::cerr);
-        std::cerr << std::endl;
-
-        SharedContextHolder shared_context = Context::createShared();
-        Context context = Context::createGlobal(shared_context.get());
-        context.makeGlobalContext();
-        NamesAndTypesList columns
-        {
-            {"x", std::make_shared<DataTypeInt16>()},
-            {"s1", std::make_shared<DataTypeString>()},
-            {"s2", std::make_shared<DataTypeString>()}
-        };
-
-        auto syntax_result = SyntaxAnalyzer(context).analyze(ast, columns);
-        SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context);
-        ExpressionActionsChain chain(context);
-        analyzer.appendSelect(chain, false);
-        analyzer.appendProjectResult(chain);
-        chain.finalize();
-        ExpressionActionsPtr expression = chain.getLastActions();
-
-        size_t n = argc == 2 ? std::stol(argv[1]) : 10;
-
-        Block block;
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "x";
-            column.type = std::make_shared<DataTypeInt16>();
-            auto col = ColumnInt16::create();
-            auto & vec_x = col->getData();
-
-            vec_x.resize(n);
-            for (size_t i = 0; i < n; ++i)
-                vec_x[i] = i % 9;
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        const char * strings[] = {"abc", "def", "abcd", "defg", "ac"};
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s1";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 5]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "s2";
-            column.type = std::make_shared<DataTypeString>();
-            auto col = ColumnString::create();
-
-            for (size_t i = 0; i < n; ++i)
-                col->insert(std::string(strings[i % 3]));
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        {
-            Stopwatch stopwatch;
-            stopwatch.start();
-
-            expression->execute(block);
-
-            stopwatch.stop();
-            std::cout << std::fixed << std::setprecision(2)
-                << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
-                << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
-                << std::endl;
-        }
-
-        auto is = std::make_shared<OneBlockInputStream>(block);
-        LimitBlockInputStream lis(is, 20, std::max(0, static_cast<int>(n) - 20));
-        WriteBufferFromOStream out_buf(std::cout);
-        BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out_buf, block, context);
-
-        copyData(lis, *out);
-    }
-    catch (const Exception & e)
-    {
-        std::cerr << e.displayText() << std::endl;
-    }
-
-    return 0;
-}
diff --git a/src/Interpreters/tests/select_query.cpp b/src/Interpreters/tests/select_query.cpp
deleted file mode 100644
index fb364d28086..00000000000
--- a/src/Interpreters/tests/select_query.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <iostream>
-#include <iomanip>
-
-#include <common/DateLUT.h>
-
-#include <Poco/ConsoleChannel.h>
-
-#include <IO/ReadBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromFileDescriptor.h>
-
-#include <Storages/StorageLog.h>
-#include <Storages/System/attachSystemTables.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/loadMetadata.h>
-#include <Interpreters/executeQuery.h>
-#include <Databases/IDatabase.h>
-#include <Databases/DatabaseOrdinary.h>
-
-
-using namespace DB;
-
-int main(int, char **)
-try
-{
-    Poco::AutoPtr<Poco::ConsoleChannel> channel = new Poco::ConsoleChannel(std::cerr);
-    Poco::Logger::root().setChannel(channel);
-    Poco::Logger::root().setLevel("trace");
-
-    /// Pre-initialize the `DateLUT` so that the first initialization does not affect the measured execution speed.
-    DateLUT::instance();
-
-    SharedContextHolder shared_context = Context::createShared();
-    Context context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-
-    context.setPath("./");
-
-    loadMetadata(context);
-
-    DatabasePtr system = std::make_shared<DatabaseOrdinary>("system", "./metadata/system/", context);
-    DatabaseCatalog::instance().attachDatabase("system", system);
-    system->loadStoredObjects(context, false);
-    attachSystemTablesLocal(*DatabaseCatalog::instance().getSystemDatabase());
-    context.setCurrentDatabase("default");
-
-    ReadBufferFromFileDescriptor in(STDIN_FILENO);
-    WriteBufferFromFileDescriptor out(STDOUT_FILENO);
-
-    executeQuery(in, out, /* allow_into_outfile = */ false, context, {});
-
-    return 0;
-}
-catch (const Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl
-        << std::endl
-        << "Stack trace:" << std::endl
-        << e.getStackTraceString();
-    return 1;
-}
diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp
index b281315f555..7a0d14dbc69 100644
--- a/src/Parsers/ASTColumnDeclaration.cpp
+++ b/src/Parsers/ASTColumnDeclaration.cpp
@@ -12,7 +12,9 @@ ASTPtr ASTColumnDeclaration::clone() const
 
     if (type)
     {
-        res->type = type;
+        // Type may be an ASTFunction (e.g. `create table t (a Decimal(9,0))`),
+        // so we have to clone it properly as well.
+        res->type = type->clone();
         res->children.push_back(res->type);
     }
 
diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h
index d921ff427ae..d7a40a2eb85 100644
--- a/src/Parsers/ASTExplainQuery.h
+++ b/src/Parsers/ASTExplainQuery.h
@@ -23,7 +23,13 @@ public:
 
     String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
     ExplainKind getKind() const { return kind; }
-    ASTPtr clone() const override { return std::make_shared<ASTExplainQuery>(*this); }
+    ASTPtr clone() const override
+    {
+        auto res = std::make_shared<ASTExplainQuery>(*this);
+        res->children.clear();
+        res->children.push_back(children[0]->clone());
+        return res;
+    }
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt
index 6e3ab9decb7..b83fc20e818 100644
--- a/src/Parsers/CMakeLists.txt
+++ b/src/Parsers/CMakeLists.txt
@@ -2,7 +2,6 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
 add_headers_and_sources(clickhouse_parsers .)
 add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources})
 target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io)
-target_include_directories(clickhouse_parsers PUBLIC ${DBMS_INCLUDE_DIR})
 
 if (USE_DEBUG_HELPERS)
     set (INCLUDE_DEBUG_HELPERS "-I${ClickHouse_SOURCE_DIR}/base -include ${ClickHouse_SOURCE_DIR}/src/Parsers/iostream_debug_helpers.h")
diff --git a/src/Parsers/IParserBase.cpp b/src/Parsers/IParserBase.cpp
index 0aade8e36ac..0241250926d 100644
--- a/src/Parsers/IParserBase.cpp
+++ b/src/Parsers/IParserBase.cpp
@@ -4,11 +4,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-}
-
-
 bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected)
 {
     expected.add(pos, getName());
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index 720ca666023..70a2b339f28 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -59,11 +59,31 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
 
         case Type::RESTART_REPLICA:
         case Type::SYNC_REPLICA:
-        case Type::FLUSH_DISTRIBUTED:
             if (!parseDatabaseAndTableName(pos, expected, res->database, res->table))
                 return false;
             break;
 
+        case Type::STOP_DISTRIBUTED_SENDS:
+        case Type::START_DISTRIBUTED_SENDS:
+        case Type::FLUSH_DISTRIBUTED:
+        {
+            String cluster_str;
+            if (ParserKeyword{"ON"}.ignore(pos, expected))
+            {
+                if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
+                    return false;
+            }
+            res->cluster = cluster_str;
+            if (!parseDatabaseAndTableName(pos, expected, res->database, res->table))
+            {
+                /// FLUSH DISTRIBUTED requires table
+                /// START/STOP DISTRIBUTED SENDS does not requires table
+                if (res->type == Type::FLUSH_DISTRIBUTED)
+                    return false;
+            }
+            break;
+        }
+
         case Type::STOP_MERGES:
         case Type::START_MERGES:
         case Type::STOP_TTL_MERGES:
@@ -76,8 +96,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
         case Type::START_REPLICATED_SENDS:
         case Type::STOP_REPLICATION_QUEUES:
         case Type::START_REPLICATION_QUEUES:
-        case Type::STOP_DISTRIBUTED_SENDS:
-        case Type::START_DISTRIBUTED_SENDS:
             parseDatabaseAndTableName(pos, expected, res->database, res->table);
             break;
 
diff --git a/src/Processors/ConcatProcessor.cpp b/src/Processors/ConcatProcessor.cpp
index 27338c7c879..f4648caf0f0 100644
--- a/src/Processors/ConcatProcessor.cpp
+++ b/src/Processors/ConcatProcessor.cpp
@@ -4,6 +4,11 @@
 namespace DB
 {
 
+ConcatProcessor::ConcatProcessor(const Block & header, size_t num_inputs)
+    : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin())
+{
+}
+
 ConcatProcessor::Status ConcatProcessor::prepare()
 {
     auto & output = outputs.front();
diff --git a/src/Processors/ConcatProcessor.h b/src/Processors/ConcatProcessor.h
index 4aa5099b38a..64f9712c69a 100644
--- a/src/Processors/ConcatProcessor.h
+++ b/src/Processors/ConcatProcessor.h
@@ -16,10 +16,7 @@ namespace DB
 class ConcatProcessor : public IProcessor
 {
 public:
-    ConcatProcessor(const Block & header, size_t num_inputs)
-        : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin())
-    {
-    }
+    ConcatProcessor(const Block & header, size_t num_inputs);
 
     String getName() const override { return "Concat"; }
 
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 8017667909b..364e3282f00 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -638,7 +638,7 @@ private:
             }
             catch (const Poco::Exception & e)
             {
-                throw Exception(Exception::CreateFromPoco, e);
+                throw Exception(Exception::CreateFromPocoTag{}, e);
             }
             catch (const avro::Exception & e)
             {
diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h
index b7c230cb6de..a9bd73d8026 100644
--- a/src/Processors/IProcessor.h
+++ b/src/Processors/IProcessor.h
@@ -158,11 +158,11 @@ public:
 
     static std::string statusToName(Status status);
 
-    /** Method 'prepare' is responsible for all cheap ("instantenous": O(1) of data volume, no wait) calculations.
+    /** Method 'prepare' is responsible for all cheap ("instantaneous": O(1) of data volume, no wait) calculations.
       *
       * It may access input and output ports,
       *  indicate the need for work by another processor by returning NeedData or PortFull,
-      *  or indicate the absense of work by returning Finished or Unneeded,
+      *  or indicate the absence of work by returning Finished or Unneeded,
       *  it may pull data from input ports and push data to output ports.
       *
       * The method is not thread-safe and must be called from a single thread in one moment of time,
diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp
index 92c91a81b8a..5b6109440d5 100644
--- a/src/Processors/QueryPipeline.cpp
+++ b/src/Processors/QueryPipeline.cpp
@@ -20,6 +20,7 @@
 #include <Common/CurrentThread.h>
 #include <Processors/DelayedPortsProcessor.h>
 #include <Processors/RowsBeforeLimitCounter.h>
+#include <Processors/Sources/RemoteSource.h>
 
 namespace DB
 {
@@ -673,8 +674,10 @@ void QueryPipeline::initRowsBeforeLimit()
 {
     RowsBeforeLimitCounterPtr rows_before_limit_at_least;
 
+    /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor.
     std::vector<LimitTransform *> limits;
     std::vector<SourceFromInputStream *> sources;
+    std::vector<RemoteSource *> remote_sources;
 
     std::unordered_set<IProcessor *> visited;
 
@@ -705,6 +708,9 @@ void QueryPipeline::initRowsBeforeLimit()
 
             if (auto * source = typeid_cast<SourceFromInputStream *>(processor))
                 sources.emplace_back(source);
+
+            if (auto * source = typeid_cast<RemoteSource *>(processor))
+                remote_sources.emplace_back(source);
         }
         else if (auto * sorting = typeid_cast<PartialSortingTransform *>(processor))
         {
@@ -735,7 +741,7 @@ void QueryPipeline::initRowsBeforeLimit()
         }
     }
 
-    if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty()))
+    if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty()))
     {
         rows_before_limit_at_least = std::make_shared<RowsBeforeLimitCounter>();
 
@@ -744,6 +750,9 @@ void QueryPipeline::initRowsBeforeLimit()
 
         for (auto & source : sources)
             source->setRowsBeforeLimitCounter(rows_before_limit_at_least);
+
+        for (auto & source : remote_sources)
+            source->setRowsBeforeLimitCounter(rows_before_limit_at_least);
     }
 
     /// If there is a limit, then enable rows_before_limit_at_least
diff --git a/src/Processors/RowsBeforeLimitCounter.h b/src/Processors/RowsBeforeLimitCounter.h
index 36ea4a557a8..f5eb40ff84a 100644
--- a/src/Processors/RowsBeforeLimitCounter.h
+++ b/src/Processors/RowsBeforeLimitCounter.h
@@ -15,6 +15,12 @@ public:
         rows_before_limit.fetch_add(rows, std::memory_order_release);
     }
 
+    void set(uint64_t rows)
+    {
+        setAppliedLimit();
+        rows_before_limit.store(rows, std::memory_order_release);
+    }
+
     uint64_t get() const { return rows_before_limit.load(std::memory_order_acquire); }
 
     void setAppliedLimit() { has_applied_limit.store(true, std::memory_order_release); }
diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp
new file mode 100644
index 00000000000..42a33d00196
--- /dev/null
+++ b/src/Processors/Sources/DelayedSource.cpp
@@ -0,0 +1,119 @@
+#include <Processors/Sources/DelayedSource.h>
+#include "NullSource.h"
+
+namespace DB
+{
+
+DelayedSource::DelayedSource(const Block & header, Creator processors_creator)
+    : IProcessor({}, OutputPorts(3, header))
+    , creator(std::move(processors_creator))
+{
+}
+
+IProcessor::Status DelayedSource::prepare()
+{
+    /// At first, wait for main input is needed and expand pipeline.
+    if (inputs.empty())
+    {
+        auto & first_output = outputs.front();
+
+        /// If main port was finished before callback was called, stop execution.
+        if (first_output.isFinished())
+        {
+            for (auto & output : outputs)
+                output.finish();
+
+            return Status::Finished;
+        }
+
+        if (!first_output.isNeeded())
+            return Status::PortFull;
+
+        /// Call creator callback to get processors.
+        if (processors.empty())
+            return Status::Ready;
+
+        return Status::ExpandPipeline;
+    }
+
+    /// Process ports in order: main, totals, extremes
+    auto output = outputs.begin();
+    for (auto input = inputs.begin(); input != inputs.end(); ++input, ++output)
+    {
+        if (output->isFinished())
+        {
+            input->close();
+            continue;
+        }
+
+        if (!output->isNeeded())
+            return Status::PortFull;
+
+        if (input->isFinished())
+        {
+            output->finish();
+            continue;
+        }
+
+        input->setNeeded();
+        if (!input->hasData())
+            return Status::PortFull;
+
+        output->pushData(input->pullData(true));
+        return Status::PortFull;
+    }
+
+    return Status::Finished;
+}
+
+void DelayedSource::work()
+{
+    auto pipe = creator();
+
+    main_output = &pipe.getPort();
+    totals_output = pipe.getTotalsPort();
+    extremes_output = pipe.getExtremesPort();
+
+    processors = std::move(pipe).detachProcessors();
+
+    if (!totals_output)
+    {
+        processors.emplace_back(std::make_shared<NullSource>(main_output->getHeader()));
+        totals_output = &processors.back()->getOutputs().back();
+    }
+
+    if (!extremes_output)
+    {
+        processors.emplace_back(std::make_shared<NullSource>(main_output->getHeader()));
+        extremes_output = &processors.back()->getOutputs().back();
+    }
+}
+
+Processors DelayedSource::expandPipeline()
+{
+    /// Add new inputs. They must have the same header as output.
+    for (const auto & output : {main_output, totals_output, extremes_output})
+    {
+        inputs.emplace_back(outputs.front().getHeader(), this);
+        /// Connect checks that header is same for ports.
+        connect(*output, inputs.back());
+        inputs.back().setNeeded();
+    }
+
+    /// Executor will check that all processors are connected.
+    return std::move(processors);
+}
+
+Pipe createDelayedPipe(const Block & header, DelayedSource::Creator processors_creator)
+{
+    auto source = std::make_shared<DelayedSource>(header, std::move(processors_creator));
+
+    Pipe pipe(&source->getPort(DelayedSource::Main));
+    pipe.setTotalsPort(&source->getPort(DelayedSource::Totals));
+    pipe.setExtremesPort(&source->getPort(DelayedSource::Extremes));
+
+    pipe.addProcessors({std::move(source)});
+    return pipe;
+}
+
+}
diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h
new file mode 100644
index 00000000000..31ec1e054fe
--- /dev/null
+++ b/src/Processors/Sources/DelayedSource.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <Processors/IProcessor.h>
+#include <Processors/Pipe.h>
+
+namespace DB
+{
+
+/// DelayedSource delays pipeline calculation until it starts execution.
+/// It accepts callback which creates a new pipe.
+///
+/// First time when DelayedSource's main output port needs data, callback is called.
+/// Then, DelayedSource expands pipeline: adds new inputs and connects pipe with it.
+/// Then, DelayedSource just move data from inputs to outputs until finished.
+///
+/// It main output port of DelayedSource is never needed, callback won't be called.
+class DelayedSource : public IProcessor
+{
+public:
+    using Creator = std::function<Pipe()>;
+
+    DelayedSource(const Block & header, Creator processors_creator);
+    String getName() const override { return "Delayed"; }
+
+    Status prepare() override;
+    void work() override;
+    Processors expandPipeline() override;
+
+    enum PortKind { Main = 0, Totals = 1, Extremes = 2 };
+    OutputPort & getPort(PortKind kind) { return *std::next(outputs.begin(), kind); }
+
+private:
+    Creator creator;
+    Processors processors;
+
+    /// Outputs from returned pipe.
+    OutputPort * main_output = nullptr;
+    OutputPort * totals_output = nullptr;
+    OutputPort * extremes_output = nullptr;
+};
+
+/// Creates pipe from DelayedSource.
+Pipe createDelayedPipe(const Block & header, DelayedSource::Creator processors_creator);
+
+}
diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp
new file mode 100644
index 00000000000..2f76e0c87d4
--- /dev/null
+++ b/src/Processors/Sources/RemoteSource.cpp
@@ -0,0 +1,132 @@
+#include <Processors/Sources/RemoteSource.h>
+#include <DataStreams/RemoteQueryExecutor.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <DataTypes/DataTypeAggregateFunction.h>
+
+namespace DB
+{
+
+RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_)
+    : SourceWithProgress(executor->getHeader(), false)
+    , add_aggregation_info(add_aggregation_info_), query_executor(std::move(executor))
+{
+    /// Add AggregatedChunkInfo if we expect DataTypeAggregateFunction as a result.
+    const auto & sample = getPort().getHeader();
+    for (auto & type : sample.getDataTypes())
+        if (typeid_cast<const DataTypeAggregateFunction *>(type.get()))
+            add_aggregation_info = true;
+}
+
+RemoteSource::~RemoteSource() = default;
+
+Chunk RemoteSource::generate()
+{
+    if (!was_query_sent)
+    {
+        /// Progress method will be called on Progress packet.
+        query_executor->setProgressCallback([this](const Progress & value) { progress(value); });
+
+        /// Get rows_before_limit result for remote query from ProfileInfo packet.
+        query_executor->setProfileInfoCallback([this](const BlockStreamProfileInfo & info)
+        {
+            if (rows_before_limit && info.hasAppliedLimit())
+                rows_before_limit->set(info.getRowsBeforeLimit());
+        });
+
+        query_executor->sendQuery();
+
+        was_query_sent = true;
+    }
+
+    auto block = query_executor->read();
+
+    if (!block)
+    {
+        query_executor->finish();
+        return {};
+    }
+
+    UInt64 num_rows = block.rows();
+    Chunk chunk(block.getColumns(), num_rows);
+
+    if (add_aggregation_info)
+    {
+        auto info = std::make_shared<AggregatedChunkInfo>();
+        info->bucket_num = block.info.bucket_num;
+        info->is_overflows = block.info.is_overflows;
+        chunk.setChunkInfo(std::move(info));
+    }
+
+    return chunk;
+}
+
+void RemoteSource::onCancel()
+{
+    query_executor->cancel();
+}
+
+
+RemoteTotalsSource::RemoteTotalsSource(RemoteQueryExecutorPtr executor)
+    : ISource(executor->getHeader())
+    , query_executor(std::move(executor))
+{
+}
+
+RemoteTotalsSource::~RemoteTotalsSource() = default;
+
+Chunk RemoteTotalsSource::generate()
+{
+    if (auto block = query_executor->getTotals())
+    {
+        UInt64 num_rows = block.rows();
+        return Chunk(block.getColumns(), num_rows);
+    }
+
+    return {};
+}
+
+
+RemoteExtremesSource::RemoteExtremesSource(RemoteQueryExecutorPtr executor)
+    : ISource(executor->getHeader())
+    , query_executor(std::move(executor))
+{
+}
+
+RemoteExtremesSource::~RemoteExtremesSource() = default;
+
+Chunk RemoteExtremesSource::generate()
+{
+    if (auto block = query_executor->getExtremes())
+    {
+        UInt64 num_rows = block.rows();
+        return Chunk(block.getColumns(), num_rows);
+    }
+
+    return {};
+}
+
+
+Pipe createRemoteSourcePipe(
+    RemoteQueryExecutorPtr query_executor,
+    bool add_aggregation_info, bool add_totals, bool add_extremes)
+{
+    Pipe pipe(std::make_shared<RemoteSource>(query_executor, add_aggregation_info));
+
+    if (add_totals)
+    {
+        auto totals_source = std::make_shared<RemoteTotalsSource>(query_executor);
+        pipe.setTotalsPort(&totals_source->getPort());
+        pipe.addProcessors({std::move(totals_source)});
+    }
+
+    if (add_extremes)
+    {
+        auto extremes_source = std::make_shared<RemoteExtremesSource>(query_executor);
+        pipe.setExtremesPort(&extremes_source->getPort());
+        pipe.addProcessors({std::move(extremes_source)});
+    }
+
+    return pipe;
+}
+
+}
diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h
new file mode 100644
index 00000000000..0b4405a0905
--- /dev/null
+++ b/src/Processors/Sources/RemoteSource.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include <Processors/Sources/SourceWithProgress.h>
+#include <Processors/RowsBeforeLimitCounter.h>
+#include <Processors/Pipe.h>
+
+namespace DB
+{
+
+class RemoteQueryExecutor;
+using RemoteQueryExecutorPtr = std::shared_ptr<RemoteQueryExecutor>;
+
+/// Source from RemoteQueryExecutor. Executes remote query and returns query result chunks.
+class RemoteSource : public SourceWithProgress
+{
+public:
+    /// Flag add_aggregation_info tells if AggregatedChunkInfo should be added to result chunk.
+    /// AggregatedChunkInfo stores the bucket number used for two-level aggregation.
+    /// This flag should be typically enabled for queries with GROUP BY which are executed till WithMergeableState.
+    RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_);
+    ~RemoteSource() override;
+
+    String getName() const override { return "Remote"; }
+
+    void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); }
+
+    /// Stop reading from stream if output port is finished.
+    void onUpdatePorts() override
+    {
+        if (getPort().isFinished())
+            cancel();
+    }
+
+protected:
+    Chunk generate() override;
+    void onCancel() override;
+
+private:
+    bool was_query_sent = false;
+    bool add_aggregation_info = false;
+    RemoteQueryExecutorPtr query_executor;
+    RowsBeforeLimitCounterPtr rows_before_limit;
+};
+
+/// Totals source from RemoteQueryExecutor.
+class RemoteTotalsSource : public ISource
+{
+public:
+    explicit RemoteTotalsSource(RemoteQueryExecutorPtr executor);
+    ~RemoteTotalsSource() override;
+
+    String getName() const override { return "RemoteTotals"; }
+
+protected:
+    Chunk generate() override;
+
+private:
+    RemoteQueryExecutorPtr query_executor;
+};
+
+/// Extremes source from RemoteQueryExecutor.
+class RemoteExtremesSource : public ISource
+{
+public:
+    explicit RemoteExtremesSource(RemoteQueryExecutorPtr executor);
+    ~RemoteExtremesSource() override;
+
+    String getName() const override { return "RemoteExtremes"; }
+
+protected:
+    Chunk generate() override;
+
+private:
+    RemoteQueryExecutorPtr query_executor;
+};
+
+/// Create pipe with remote sources.
+Pipe createRemoteSourcePipe(
+    RemoteQueryExecutorPtr query_executor,
+    bool add_aggregation_info, bool add_totals, bool add_extremes);
+
+}
diff --git a/src/Processors/Sources/SourceWithProgress.cpp b/src/Processors/Sources/SourceWithProgress.cpp
index 8d7a0a3d946..6488289d5ce 100644
--- a/src/Processors/Sources/SourceWithProgress.cpp
+++ b/src/Processors/Sources/SourceWithProgress.cpp
@@ -12,6 +12,11 @@ namespace ErrorCodes
     extern const int TOO_MANY_BYTES;
 }
 
+SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress)
+    : ISourceWithProgress(header), auto_progress(enable_auto_progress)
+{
+}
+
 void SourceWithProgress::work()
 {
     if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode))
@@ -24,7 +29,7 @@ void SourceWithProgress::work()
 
         ISourceWithProgress::work();
 
-        if (!was_progress_called && has_input)
+        if (auto_progress && !was_progress_called && has_input)
             progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() });
     }
 }
diff --git a/src/Processors/Sources/SourceWithProgress.h b/src/Processors/Sources/SourceWithProgress.h
index 4778c50e49d..34810045143 100644
--- a/src/Processors/Sources/SourceWithProgress.h
+++ b/src/Processors/Sources/SourceWithProgress.h
@@ -44,6 +44,8 @@ class SourceWithProgress : public ISourceWithProgress
 {
 public:
     using ISourceWithProgress::ISourceWithProgress;
+    /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk.
+    SourceWithProgress(Block header, bool enable_auto_progress);
 
     using LocalLimits = IBlockInputStream::LocalLimits;
     using LimitsMode = IBlockInputStream::LimitsMode;
@@ -76,6 +78,9 @@ private:
     /// This flag checks if progress() was manually called at generate() call.
     /// If not, it will be called for chunk after generate() was finished.
     bool was_progress_called = false;
+
+    /// If enabled, progress() will be automatically called on each generated chunk.
+    bool auto_progress = true;
 };
 
 }
diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp
new file mode 100644
index 00000000000..3cac1c9602c
--- /dev/null
+++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp
@@ -0,0 +1,244 @@
+#include <Processors/Transforms/AggregatingInOrderTransform.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+
+namespace DB
+{
+
+AggregatingInOrderTransform::AggregatingInOrderTransform(
+    Block header, AggregatingTransformParamsPtr params_,
+    const SortDescription & group_by_description_, size_t res_block_size_)
+    : AggregatingInOrderTransform(std::move(header), std::move(params_)
+    , group_by_description_, res_block_size_, std::make_unique<ManyAggregatedData>(1), 0)
+{
+}
+
+AggregatingInOrderTransform::AggregatingInOrderTransform(
+    Block header, AggregatingTransformParamsPtr params_,
+    const SortDescription & group_by_description_, size_t res_block_size_,
+    ManyAggregatedDataPtr many_data_, size_t current_variant)
+    : IProcessor({std::move(header)}, {params_->getCustomHeader(false)})
+    , res_block_size(res_block_size_)
+    , params(std::move(params_))
+    , group_by_description(group_by_description_)
+    , aggregate_columns(params->params.aggregates_size)
+    , many_data(std::move(many_data_))
+    , variants(*many_data->variants[current_variant])
+{
+    /// We won't finalize states in order to merge same states (generated due to multi-thread execution) in AggregatingSortedTransform
+    res_header = params->getCustomHeader(false);
+
+    /// Replace column names to column position in description_sorted.
+    for (auto & column_description : group_by_description)
+    {
+        if (!column_description.column_name.empty())
+        {
+            column_description.column_number = res_header.getPositionByName(column_description.column_name);
+            column_description.column_name.clear();
+        }
+    }
+}
+
+AggregatingInOrderTransform::~AggregatingInOrderTransform() = default;
+
+static bool less(const MutableColumns & lhs, const Columns & rhs, size_t i, size_t j, const SortDescription & descr)
+{
+    for (const auto & elem : descr)
+    {
+        size_t ind = elem.column_number;
+        int res = elem.direction * lhs[ind]->compareAt(i, j, *rhs[ind], elem.nulls_direction);
+        if (res < 0)
+            return true;
+        else if (res > 0)
+            return false;
+    }
+    return false;
+}
+
+
+void AggregatingInOrderTransform::consume(Chunk chunk)
+{
+    size_t rows = chunk.getNumRows();
+    if (rows == 0)
+        return;
+
+    if (!is_consume_started)
+    {
+        LOG_TRACE(log, "Aggregating in order");
+        is_consume_started = true;
+    }
+    src_rows += rows;
+    src_bytes += chunk.bytes();
+
+    Columns materialized_columns;
+    Columns key_columns(params->params.keys_size);
+    for (size_t i = 0; i < params->params.keys_size; ++i)
+    {
+        materialized_columns.push_back(chunk.getColumns().at(params->params.keys[i])->convertToFullColumnIfConst());
+        key_columns[i] = materialized_columns.back();
+    }
+
+    Aggregator::NestedColumnsHolder nested_columns_holder;
+    Aggregator::AggregateFunctionInstructions aggregate_function_instructions;
+    params->aggregator.prepareAggregateInstructions(chunk.getColumns(), aggregate_columns, materialized_columns, aggregate_function_instructions, nested_columns_holder);
+
+    size_t key_end = 0;
+    size_t key_begin = 0;
+    /// If we don't have a block we create it and fill with first key
+    if (!cur_block_size)
+    {
+        res_key_columns.resize(params->params.keys_size);
+        res_aggregate_columns.resize(params->params.aggregates_size);
+
+        for (size_t i = 0; i < params->params.keys_size; ++i)
+        {
+            res_key_columns[i] = res_header.safeGetByPosition(i).type->createColumn();
+        }
+        for (size_t i = 0; i < params->params.aggregates_size; ++i)
+        {
+            res_aggregate_columns[i] = res_header.safeGetByPosition(i + params->params.keys_size).type->createColumn();
+        }
+        params->aggregator.createStatesAndFillKeyColumnsWithSingleKey(variants, key_columns, key_begin, res_key_columns);
+        ++cur_block_size;
+    }
+    size_t mid = 0;
+    size_t high = 0;
+    size_t low = -1;
+    /// Will split block into segments with the same key
+    while (key_end != rows)
+    {
+        high = rows;
+        /// Find the first position of new (not current) key in current chunk
+        while (high - low > 1)
+        {
+            mid = (low + high) / 2;
+            if (!less(res_key_columns, key_columns, cur_block_size - 1, mid, group_by_description))
+                low = mid;
+            else
+                high = mid;
+        }
+        key_end = high;
+        /// Add data to aggr. state if interval is not empty. Empty when haven't found current key in new block.
+        if (key_begin != key_end)
+        {
+            params->aggregator.executeOnIntervalWithoutKeyImpl(variants.without_key, key_begin, key_end, aggregate_function_instructions.data(), variants.aggregates_pool);
+        }
+
+        low = key_begin = key_end;
+        /// We finalize last key aggregation state if a new key found.
+        if (key_begin != rows)
+        {
+            params->aggregator.fillAggregateColumnsWithSingleKey(variants, res_aggregate_columns);
+            /// If res_block_size is reached we have to stop consuming and generate the block. Save the extra rows into new chunk.
+            if (cur_block_size == res_block_size)
+            {
+                Columns source_columns = chunk.detachColumns();
+
+                for (auto & source_column : source_columns)
+                    source_column = source_column->cut(key_begin, rows - key_begin);
+
+                current_chunk = Chunk(source_columns, rows - key_begin);
+                block_end_reached = true;
+                need_generate = true;
+                cur_block_size = 0;
+                return;
+            }
+
+            /// We create a new state for the new key and update res_key_columns
+            params->aggregator.createStatesAndFillKeyColumnsWithSingleKey(variants, key_columns, key_begin, res_key_columns);
+            ++cur_block_size;
+        }
+    }
+    block_end_reached = false;
+}
+
+
+void AggregatingInOrderTransform::work()
+{
+    if (is_consume_finished || need_generate)
+    {
+        generate();
+    }
+    else
+    {
+        consume(std::move(current_chunk));
+    }
+}
+
+
+IProcessor::Status AggregatingInOrderTransform::prepare()
+{
+    auto & output = outputs.front();
+    auto & input = inputs.back();
+
+    /// Check can output.
+    if (output.isFinished())
+    {
+        input.close();
+        return Status::Finished;
+    }
+
+    if (!output.canPush())
+    {
+        input.setNotNeeded();
+        return Status::PortFull;
+    }
+
+    if (block_end_reached)
+    {
+        if (need_generate)
+        {
+            return Status::Ready;
+        }
+        else
+        {
+            output.push(std::move(to_push_chunk));
+            return Status::Ready;
+        }
+    }
+    else
+    {
+        if (is_consume_finished)
+        {
+            output.push(std::move(to_push_chunk));
+            output.finish();
+            LOG_TRACE(log, "Aggregated. {} to {} rows (from {})", src_rows, res_rows,
+                                        formatReadableSizeWithBinarySuffix(src_bytes));
+            return Status::Finished;
+        }
+        if (input.isFinished())
+        {
+            is_consume_finished = true;
+            return Status::Ready;
+        }
+    }
+    if (!input.hasData())
+    {
+        input.setNeeded();
+        return Status::NeedData;
+    }
+    current_chunk = input.pull(!is_consume_finished);
+    return Status::Ready;
+}
+
+void AggregatingInOrderTransform::generate()
+{
+    if (cur_block_size && is_consume_finished)
+        params->aggregator.fillAggregateColumnsWithSingleKey(variants, res_aggregate_columns);
+
+    Block res = res_header.cloneEmpty();
+
+    for (size_t i = 0; i < res_key_columns.size(); ++i)
+    {
+        res.getByPosition(i).column = std::move(res_key_columns[i]);
+    }
+    for (size_t i = 0; i < res_aggregate_columns.size(); ++i)
+    {
+        res.getByPosition(i + res_key_columns.size()).column = std::move(res_aggregate_columns[i]);
+    }
+    to_push_chunk = convertToChunk(res);
+    res_rows += to_push_chunk.getNumRows();
+    need_generate = false;
+}
+
+
+}
diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h
new file mode 100644
index 00000000000..10793e885ce
--- /dev/null
+++ b/src/Processors/Transforms/AggregatingInOrderTransform.h
@@ -0,0 +1,92 @@
+#pragma once
+
+#include <Core/SortDescription.h>
+#include <Interpreters/Aggregator.h>
+#include <Processors/ISimpleTransform.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <Processors/Transforms/TotalsHavingTransform.h>
+
+namespace DB
+{
+
+class AggregatingInOrderTransform : public IProcessor
+{
+
+public:
+    AggregatingInOrderTransform(Block header, AggregatingTransformParamsPtr params,
+                                const SortDescription & group_by_description, size_t res_block_size,
+                                ManyAggregatedDataPtr many_data, size_t current_variant);
+
+    AggregatingInOrderTransform(Block header, AggregatingTransformParamsPtr params,
+                                const SortDescription & group_by_description, size_t res_block_size);
+
+    ~AggregatingInOrderTransform() override;
+
+    String getName() const override { return "AggregatingInOrderTransform"; }
+
+    Status prepare() override;
+
+    void work() override;
+
+    void consume(Chunk chunk);
+
+private:
+    void generate();
+
+    size_t res_block_size;
+    size_t cur_block_size = 0;
+
+    MutableColumns res_key_columns;
+    MutableColumns res_aggregate_columns;
+
+    AggregatingTransformParamsPtr params;
+    SortDescription group_by_description;
+
+    Aggregator::AggregateColumns aggregate_columns;
+
+    ManyAggregatedDataPtr many_data;
+    AggregatedDataVariants & variants;
+
+    UInt64 src_rows = 0;
+    UInt64 src_bytes = 0;
+    UInt64 res_rows = 0;
+
+    bool need_generate = false;
+    bool block_end_reached = false;
+    bool is_consume_started = false;
+    bool is_consume_finished = false;
+
+    Block res_header;
+    Chunk current_chunk;
+    Chunk to_push_chunk;
+
+    Poco::Logger * log = &Poco::Logger::get("AggregatingInOrderTransform");
+};
+
+
+class FinalizingSimpleTransform : public ISimpleTransform
+{
+public:
+    FinalizingSimpleTransform(Block header, AggregatingTransformParamsPtr params_)
+        : ISimpleTransform({std::move(header)}, {params_->getHeader()}, true)
+        , params(params_) {}
+
+    void transform(Chunk & chunk) override
+    {
+        if (params->final)
+            finalizeChunk(chunk);
+        else if (!chunk.getChunkInfo())
+        {
+            auto info = std::make_shared<AggregatedChunkInfo>();
+            chunk.setChunkInfo(std::move(info));
+        }
+    }
+
+    String getName() const override { return "FinalizingSimpleTransform"; }
+
+private:
+    AggregatingTransformParamsPtr params;
+};
+
+
+}
diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index 8be353b4d7a..c5be62e276a 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -19,23 +19,23 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+/// Convert block to chunk.
+/// Adds additional info about aggregation.
+Chunk convertToChunk(const Block & block)
+{
+    auto info = std::make_shared<AggregatedChunkInfo>();
+    info->bucket_num = block.info.bucket_num;
+    info->is_overflows = block.info.is_overflows;
+
+    UInt64 num_rows = block.rows();
+    Chunk chunk(block.getColumns(), num_rows);
+    chunk.setChunkInfo(std::move(info));
+
+    return chunk;
+}
+
 namespace
 {
-    /// Convert block to chunk.
-    /// Adds additional info about aggregation.
-    Chunk convertToChunk(const Block & block)
-    {
-        auto info = std::make_shared<AggregatedChunkInfo>();
-        info->bucket_num = block.info.bucket_num;
-        info->is_overflows = block.info.is_overflows;
-
-        UInt64 num_rows = block.rows();
-        Chunk chunk(block.getColumns(), num_rows);
-        chunk.setChunkInfo(std::move(info));
-
-        return chunk;
-    }
-
     const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
     {
         const auto & info = chunk.getChunkInfo();
diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h
index c2693579c67..235d01ebc77 100644
--- a/src/Processors/Transforms/AggregatingTransform.h
+++ b/src/Processors/Transforms/AggregatingTransform.h
@@ -28,6 +28,8 @@ struct AggregatingTransformParams
         : params(params_), aggregator(params), final(final_) {}
 
     Block getHeader() const { return aggregator.getHeader(final); }
+
+    Block getCustomHeader(bool final_) const { return aggregator.getHeader(final_); }
 };
 
 struct ManyAggregatedData
@@ -117,4 +119,6 @@ private:
     void initGenerate();
 };
 
+Chunk convertToChunk(const Block & block);
+
 }
diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp
index 4c904eb95a1..b58b008339d 100644
--- a/src/Processors/Transforms/FinishSortingTransform.cpp
+++ b/src/Processors/Transforms/FinishSortingTransform.cpp
@@ -112,7 +112,7 @@ void FinishSortingTransform::consume(Chunk chunk)
         }
     }
 
-    /// If we reach here, that means that current cunk is first in portion
+    /// If we reach here, that means that current chunk is first in portion
     /// or it all consists of rows with the same key as tail of a previous chunk.
     chunks.push_back(std::move(chunk));
 }
diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h
index b6069da66f3..f16b333ffd4 100644
--- a/src/Processors/Transforms/TotalsHavingTransform.h
+++ b/src/Processors/Transforms/TotalsHavingTransform.h
@@ -1,5 +1,6 @@
-#include <Processors/ISimpleTransform.h>
+#pragma once
 
+#include <Processors/ISimpleTransform.h>
 #include <Common/Arena.h>
 
 namespace DB
diff --git a/src/Processors/tests/CMakeLists.txt b/src/Processors/tests/CMakeLists.txt
index 4ddb6c68416..e69de29bb2d 100644
--- a/src/Processors/tests/CMakeLists.txt
+++ b/src/Processors/tests/CMakeLists.txt
@@ -1,15 +0,0 @@
-add_executable (processors_test processors_test.cpp)
-add_executable (processors_test_chain processors_test_chain.cpp)
-add_executable (processors_test_merge processors_test_merge.cpp)
-add_executable (processors_test_merging_sorted_transform processors_test_merging_sorted_transform.cpp)
-add_executable (processors_test_merge_sorting_transform processors_test_merge_sorting_transform.cpp)
-add_executable (processors_test_expand_pipeline processors_test_expand_pipeline.cpp)
-add_executable (processors_test_aggregation processors_test_aggregation.cpp)
-
-target_link_libraries (processors_test PRIVATE dbms)
-target_link_libraries (processors_test_chain PRIVATE dbms)
-target_link_libraries (processors_test_merge PRIVATE dbms)
-target_link_libraries (processors_test_expand_pipeline PRIVATE dbms)
-target_link_libraries (processors_test_merging_sorted_transform PRIVATE dbms)
-target_link_libraries (processors_test_merge_sorting_transform PRIVATE dbms)
-target_link_libraries (processors_test_aggregation PRIVATE dbms clickhouse_aggregate_functions)
diff --git a/src/Processors/tests/processors_test.cpp b/src/Processors/tests/processors_test.cpp
deleted file mode 100644
index 3c73223e59c..00000000000
--- a/src/Processors/tests/processors_test.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-#include <iostream>
-#include <thread>
-#include <atomic>
-#include <Processors/IProcessor.h>
-#include <Processors/ISource.h>
-#include <Processors/ISink.h>
-#include <Processors/ResizeProcessor.h>
-#include <Processors/ConcatProcessor.h>
-#include <Processors/ForkProcessor.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/QueueBuffer.h>
-#include <Processors/printPipeline.h>
-
-#include <Columns/ColumnsNumber.h>
-#include <Common/ThreadPool.h>
-#include <Common/EventCounter.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteHelpers.h>
-#include <IO/WriteBufferFromOStream.h>
-#include <Processors/Executors/PipelineExecutor.h>
-
-
-using namespace DB;
-
-
-class NumbersSource : public ISource
-{
-public:
-    String getName() const override { return "Numbers"; }
-
-    NumbersSource(UInt64 start_number, unsigned sleep_useconds_)
-        : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-        current_number(start_number), sleep_useconds(sleep_useconds_)
-    {
-    }
-
-private:
-    UInt64 current_number = 0;
-    unsigned sleep_useconds;
-
-    Chunk generate() override
-    {
-        usleep(sleep_useconds);
-
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create(1, current_number));
-        ++current_number;
-        return Chunk(std::move(columns), 1);
-    }
-};
-
-
-class SleepyNumbersSource : public IProcessor
-{
-protected:
-    OutputPort & output;
-
-public:
-    String getName() const override { return "SleepyNumbers"; }
-
-    SleepyNumbersSource(UInt64 start_number, unsigned sleep_useconds_)
-        : IProcessor({}, {Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})})
-        , output(outputs.front()), current_number(start_number), sleep_useconds(sleep_useconds_)
-    {
-    }
-
-    Status prepare() override
-    {
-        if (active)
-            return Status::Wait;
-
-        if (output.isFinished())
-            return Status::Finished;
-
-        if (!output.canPush())
-            return Status::PortFull;
-
-        if (!current_chunk)
-            return Status::Async;
-
-        output.push(std::move(current_chunk));
-        return Status::Async;
-    }
-
-    void schedule(EventCounter & watch) override
-    {
-        active = true;
-        pool.scheduleOrThrowOnError([&watch, this]
-        {
-            usleep(sleep_useconds);
-            current_chunk = generate();
-            active = false;
-            watch.notify();
-        });
-    }
-
-    OutputPort & getPort() { return output; }
-
-private:
-    ThreadPool pool{1, 1, 0};
-    Chunk current_chunk;
-    std::atomic_bool active {false};
-
-    UInt64 current_number = 0;
-    unsigned sleep_useconds;
-
-    Chunk generate()
-    {
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create(1, current_number));
-        ++current_number;
-        return Chunk(std::move(columns), 1);
-    }
-};
-
-
-class PrintSink : public ISink
-{
-public:
-    String getName() const override { return "Print"; }
-
-    explicit PrintSink(String prefix_)
-        : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-        prefix(std::move(prefix_))
-    {
-    }
-
-private:
-    String prefix;
-    WriteBufferFromFileDescriptor out{STDOUT_FILENO};
-    FormatSettings settings;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-        size_t columns = chunk.getNumColumns();
-
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            writeString(prefix, out);
-            for (size_t column_num = 0; column_num < columns; ++column_num)
-            {
-                if (column_num != 0)
-                    writeChar('\t', out);
-                getPort().getHeader().getByPosition(column_num).type->serializeAsText(*chunk.getColumns()[column_num], row_num, out, settings);
-            }
-            writeChar('\n', out);
-        }
-
-        out.next();
-    }
-};
-
-
-int main(int, char **)
-try
-{
-    auto source0 = std::make_shared<NumbersSource>(0, 300000);
-    auto header = source0->getPort().getHeader();
-    auto limit0 = std::make_shared<LimitTransform>(header, 10, 0);
-
-    connect(source0->getPort(), limit0->getInputPort());
-
-    auto queue = std::make_shared<QueueBuffer>(header);
-
-    connect(limit0->getOutputPort(), queue->getInputPort());
-
-    auto source1 = std::make_shared<SleepyNumbersSource>(100, 100000);
-    auto source2 = std::make_shared<SleepyNumbersSource>(1000, 200000);
-
-    auto source3 = std::make_shared<NumbersSource>(10, 100000);
-    auto limit3 = std::make_shared<LimitTransform>(header, 5, 0);
-
-    connect(source3->getPort(), limit3->getInputPort());
-
-    auto source4 = std::make_shared<NumbersSource>(10, 100000);
-    auto limit4 = std::make_shared<LimitTransform>(header, 5, 0);
-
-    connect(source4->getPort(), limit4->getInputPort());
-
-    auto concat = std::make_shared<ConcatProcessor>(header, 2);
-
-    connect(limit3->getOutputPort(), concat->getInputs().front());
-    connect(limit4->getOutputPort(), concat->getInputs().back());
-
-    auto fork = std::make_shared<ForkProcessor>(header, 2);
-
-    connect(concat->getOutputPort(), fork->getInputPort());
-
-    auto print_after_concat = std::make_shared<PrintSink>("---------- ");
-
-    connect(fork->getOutputs().back(), print_after_concat->getPort());
-
-    auto resize = std::make_shared<ResizeProcessor>(header, 4, 1);
-
-    auto input_it = resize->getInputs().begin();
-    connect(queue->getOutputPort(), *(input_it++));
-    connect(source1->getPort(), *(input_it++));
-    connect(source2->getPort(), *(input_it++));
-    connect(fork->getOutputs().front(), *(input_it++));
-
-    auto limit = std::make_shared<LimitTransform>(header, 100, 0);
-
-    connect(resize->getOutputs().front(), limit->getInputPort());
-
-    auto sink = std::make_shared<PrintSink>("");
-
-    connect(limit->getOutputPort(), sink->getPort());
-
-    WriteBufferFromOStream out(std::cout);
-    std::vector<ProcessorPtr> processors = {source0, source1, source2, source3, source4, limit0, limit3, limit4, limit,
-                                            queue, concat, fork, print_after_concat, resize, sink};
-    printPipeline(processors, out);
-
-    // ThreadPool pool(4, 4, 10);
-    PipelineExecutor executor(processors);
-    /// SequentialPipelineExecutor executor({sink});
-
-    executor.execute(1);
-
-    return 0;
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    throw;
-}
diff --git a/src/Processors/tests/processors_test_aggregation.cpp b/src/Processors/tests/processors_test_aggregation.cpp
deleted file mode 100644
index 9b8bee67d52..00000000000
--- a/src/Processors/tests/processors_test_aggregation.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-#include <iostream>
-#include <thread>
-#include <atomic>
-#include <Processors/IProcessor.h>
-#include <Processors/ISource.h>
-#include <Processors/ISink.h>
-#include <Processors/ResizeProcessor.h>
-#include <Processors/ConcatProcessor.h>
-#include <Processors/ForkProcessor.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/QueueBuffer.h>
-#include <Processors/printPipeline.h>
-
-#include <Columns/ColumnsNumber.h>
-#include <Common/ThreadPool.h>
-#include <Common/EventCounter.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteHelpers.h>
-#include <IO/WriteBufferFromOStream.h>
-#include <Processors/Executors/PipelineExecutor.h>
-#include <Processors/Transforms/AggregatingTransform.h>
-#include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <DataTypes/DataTypeFactory.h>
-#include <Processors/Transforms/MergingAggregatedTransform.h>
-#include <AggregateFunctions/registerAggregateFunctions.h>
-#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
-#include <Disks/StoragePolicy.h>
-#include <Disks/DiskLocal.h>
-#include <Poco/ConsoleChannel.h>
-#include <Poco/AutoPtr.h>
-#include <Common/CurrentThread.h>
-#include <Poco/Path.h>
-
-
-using namespace DB;
-
-namespace DB::ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-class NumbersSource : public ISource
-{
-public:
-    String getName() const override { return "Numbers"; }
-
-    NumbersSource(UInt64 start_number, UInt64 step_, UInt64 block_size_, unsigned sleep_useconds_)
-            : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              current_number(start_number), step(step_), block_size(block_size_), sleep_useconds(sleep_useconds_)
-    {
-    }
-
-private:
-    UInt64 current_number = 0;
-    UInt64 step;
-    UInt64 block_size;
-    unsigned sleep_useconds;
-
-    Chunk generate() override
-    {
-        usleep(sleep_useconds);
-
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create());
-
-        for (UInt64 i = 0; i < block_size; ++i, current_number += step)
-            columns.back()->insert(Field(current_number));
-
-        return Chunk(std::move(columns), block_size);
-    }
-};
-
-class PrintSink : public ISink
-{
-public:
-    String getName() const override { return "Print"; }
-
-    PrintSink(String prefix_, Block header)
-            : ISink(std::move(header)),
-              prefix(std::move(prefix_))
-    {
-    }
-
-private:
-    String prefix;
-    WriteBufferFromFileDescriptor out{STDOUT_FILENO};
-    FormatSettings settings;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-        size_t columns = chunk.getNumColumns();
-
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            writeString(prefix, out);
-            for (size_t column_num = 0; column_num < columns; ++column_num)
-            {
-                if (column_num != 0)
-                    writeChar('\t', out);
-                getPort().getHeader().getByPosition(column_num).type->serializeAsText(*chunk.getColumns()[column_num], row_num, out, settings);
-            }
-            writeChar('\n', out);
-        }
-
-        out.next();
-    }
-};
-
-class CheckSink : public ISink
-{
-public:
-    String getName() const override { return "Check"; }
-
-    CheckSink(Block header, size_t num_rows)
-            : ISink(std::move(header)), read_rows(num_rows, false)
-    {
-    }
-
-    void checkAllRead()
-    {
-        for (size_t i = 0; i < read_rows.size(); ++i)
-        {
-           if (!read_rows[i])
-           {
-               throw Exception("Check Failed. Row " + toString(i) + " was not read.", ErrorCodes::LOGICAL_ERROR);
-           }
-        }
-    }
-
-private:
-    std::vector<bool> read_rows;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-        size_t columns = chunk.getNumColumns();
-
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            std::vector<UInt64> values(columns);
-            for (size_t column_num = 0; column_num < columns; ++column_num)
-            {
-                values[column_num] = chunk.getColumns()[column_num]->getUInt(row_num);
-            }
-
-            if (values.size() >= 2 && 3 * values[0] != values[1])
-                throw Exception("Check Failed. Got (" + toString(values[0]) + ", " + toString(values[1]) + ") in result,"
-                               + "but "  + toString(values[0]) + " * 3 !=  " + toString(values[1]),
-                               ErrorCodes::LOGICAL_ERROR);
-
-            if (values[0] >= read_rows.size())
-                throw Exception("Check Failed. Got string with number " + toString(values[0]) +
-                                " (max " + toString(read_rows.size()), ErrorCodes::LOGICAL_ERROR);
-
-            if (read_rows[values[0]])
-                throw Exception("Row " + toString(values[0]) + " was already read.", ErrorCodes::LOGICAL_ERROR);
-
-            read_rows[values[0]] = true;
-        }
-    }
-};
-
-template<typename TimeT = std::chrono::milliseconds>
-struct Measure
-{
-    template<typename F, typename ...Args>
-    static typename TimeT::rep execution(F&& func, Args&&... args)
-    {
-        auto start = std::chrono::steady_clock::now();
-        std::forward<decltype(func)>(func)(std::forward<Args>(args)...);
-        auto duration = std::chrono::duration_cast< TimeT>
-                (std::chrono::steady_clock::now() - start);
-        return duration.count();
-    }
-};
-
-int main(int, char **)
-try
-{
-    ThreadStatus thread_status;
-    CurrentThread::initializeQuery();
-    auto thread_group = CurrentThread::getGroup();
-
-    Poco::AutoPtr<Poco::ConsoleChannel> channel = new Poco::ConsoleChannel(std::cerr);
-    Poco::Logger::root().setChannel(channel);
-    Poco::Logger::root().setLevel("trace");
-
-    registerAggregateFunctions();
-    auto & factory = AggregateFunctionFactory::instance();
-
-    auto cur_path = Poco::Path().absolute().toString();
-    auto disk = std::make_shared<DiskLocal>("tmp", cur_path, 0);
-    auto tmp_volume = std::make_shared<VolumeJBOD>("tmp", std::vector<DiskPtr>{disk}, 0);
-
-    auto execute_one_stream = [&](String msg, size_t num_threads, bool two_level, bool external)
-    {
-        std::cerr << '\n' << msg << "\n";
-
-        size_t num_rows = 1000000;
-        size_t block_size = 1000;
-
-        auto source1 = std::make_shared<NumbersSource>(0, 1, block_size, 0);
-        auto source2 = std::make_shared<NumbersSource>(0, 1, block_size, 0);
-        auto source3 = std::make_shared<NumbersSource>(0, 1, block_size, 0);
-
-        auto limit1 = std::make_shared<LimitTransform>(source1->getPort().getHeader(), num_rows, 0);
-        auto limit2 = std::make_shared<LimitTransform>(source2->getPort().getHeader(), num_rows, 0);
-        auto limit3 = std::make_shared<LimitTransform>(source3->getPort().getHeader(), num_rows, 0);
-
-        auto resize = std::make_shared<ResizeProcessor>(source1->getPort().getHeader(), 3, 1);
-
-        AggregateDescriptions aggregate_descriptions(1);
-
-        DataTypes sum_types = { std::make_shared<DataTypeUInt64>() };
-        aggregate_descriptions[0].function = factory.get("sum", sum_types);
-        aggregate_descriptions[0].arguments = {0};
-
-        bool overflow_row = false; /// Without overflow row.
-        size_t max_rows_to_group_by = 0; /// All.
-        size_t group_by_two_level_threshold = two_level ? 10 : 0;
-        size_t group_by_two_level_threshold_bytes = two_level ? 128 : 0;
-        size_t max_bytes_before_external_group_by = external ? 10000000 : 0;
-
-        Aggregator::Params params(
-                source1->getPort().getHeader(),
-                {0},
-                aggregate_descriptions,
-                overflow_row,
-                max_rows_to_group_by,
-                OverflowMode::THROW,
-                group_by_two_level_threshold,
-                group_by_two_level_threshold_bytes,
-                max_bytes_before_external_group_by,
-                false, /// empty_result_for_aggregation_by_empty_set
-                tmp_volume,
-                1, /// max_threads
-                0
-            );
-
-        auto agg_params = std::make_shared<AggregatingTransformParams>(params, /* final =*/ false);
-        auto merge_params = std::make_shared<AggregatingTransformParams>(params, /* final =*/ true);
-        auto aggregating = std::make_shared<AggregatingTransform>(source1->getPort().getHeader(), agg_params);
-        auto merging = std::make_shared<MergingAggregatedTransform>(aggregating->getOutputs().front().getHeader(), merge_params, 4);
-        auto sink = std::make_shared<CheckSink>(merging->getOutputPort().getHeader(), num_rows);
-
-        connect(source1->getPort(), limit1->getInputPort());
-        connect(source2->getPort(), limit2->getInputPort());
-        connect(source3->getPort(), limit3->getInputPort());
-
-        auto it = resize->getInputs().begin();
-        connect(limit1->getOutputPort(), *(it++));
-        connect(limit2->getOutputPort(), *(it++));
-        connect(limit3->getOutputPort(), *(it++));
-
-        connect(resize->getOutputs().front(), aggregating->getInputs().front());
-        connect(aggregating->getOutputs().front(), merging->getInputPort());
-        connect(merging->getOutputPort(), sink->getPort());
-
-        std::vector<ProcessorPtr> processors = {source1, source2, source3,
-                                                limit1, limit2, limit3,
-                                                resize, aggregating, merging, sink};
-//        WriteBufferFromOStream out(std::cout);
-//        printPipeline(processors, out);
-
-        PipelineExecutor executor(processors);
-        executor.execute(num_threads);
-        sink->checkAllRead();
-    };
-
-    auto execute_mult_streams = [&](String msg, size_t num_threads, bool two_level, bool external)
-    {
-        std::cerr << '\n' << msg << "\n";
-
-        size_t num_rows = 1000000;
-        size_t block_size = 1000;
-
-        auto source1 = std::make_shared<NumbersSource>(0, 1, block_size, 0);
-        auto source2 = std::make_shared<NumbersSource>(0, 1, block_size, 0);
-        auto source3 = std::make_shared<NumbersSource>(0, 1, block_size, 0);
-
-        auto limit1 = std::make_shared<LimitTransform>(source1->getPort().getHeader(), num_rows, 0);
-        auto limit2 = std::make_shared<LimitTransform>(source2->getPort().getHeader(), num_rows, 0);
-        auto limit3 = std::make_shared<LimitTransform>(source3->getPort().getHeader(), num_rows, 0);
-
-        AggregateDescriptions aggregate_descriptions(1);
-
-        DataTypes sum_types = { std::make_shared<DataTypeUInt64>() };
-        aggregate_descriptions[0].function = factory.get("sum", sum_types);
-        aggregate_descriptions[0].arguments = {0};
-
-        bool overflow_row = false; /// Without overflow row.
-        size_t max_rows_to_group_by = 0; /// All.
-        size_t group_by_two_level_threshold = two_level ? 10 : 0;
-        size_t group_by_two_level_threshold_bytes = two_level ? 128 : 0;
-        size_t max_bytes_before_external_group_by = external ? 10000000 : 0;
-
-        Aggregator::Params params(
-                source1->getPort().getHeader(),
-                {0},
-                aggregate_descriptions,
-                overflow_row,
-                max_rows_to_group_by,
-                OverflowMode::THROW,
-                group_by_two_level_threshold,
-                group_by_two_level_threshold_bytes,
-                max_bytes_before_external_group_by,
-                false, /// empty_result_for_aggregation_by_empty_set
-                tmp_volume,
-                1, /// max_threads
-                0
-        );
-
-        auto agg_params = std::make_shared<AggregatingTransformParams>(params, /* final =*/ false);
-        auto merge_params = std::make_shared<AggregatingTransformParams>(params, /* final =*/ true);
-
-        ManyAggregatedDataPtr data = std::make_unique<ManyAggregatedData>(3);
-
-        auto aggregating1 = std::make_shared<AggregatingTransform>(source1->getPort().getHeader(), agg_params, data, 0, 4, 4);
-        auto aggregating2 = std::make_shared<AggregatingTransform>(source1->getPort().getHeader(), agg_params, data, 1, 4, 4);
-        auto aggregating3 = std::make_shared<AggregatingTransform>(source1->getPort().getHeader(), agg_params, data, 2, 4, 4);
-
-        Processors merging_pipe = createMergingAggregatedMemoryEfficientPipe(
-                aggregating1->getOutputs().front().getHeader(),
-                merge_params,
-                3, 2);
-
-        auto sink = std::make_shared<CheckSink>(merging_pipe.back()->getOutputs().back().getHeader(), num_rows);
-
-        connect(source1->getPort(), limit1->getInputPort());
-        connect(source2->getPort(), limit2->getInputPort());
-        connect(source3->getPort(), limit3->getInputPort());
-
-        connect(limit1->getOutputPort(), aggregating1->getInputs().front());
-        connect(limit2->getOutputPort(), aggregating2->getInputs().front());
-        connect(limit3->getOutputPort(), aggregating3->getInputs().front());
-
-        auto it = merging_pipe.front()->getInputs().begin();
-        connect(aggregating1->getOutputs().front(), *(it++));
-        connect(aggregating2->getOutputs().front(), *(it++));
-        connect(aggregating3->getOutputs().front(), *(it++));
-
-        connect(merging_pipe.back()->getOutputs().back(), sink->getPort());
-
-        std::vector<ProcessorPtr> processors = {source1, source2, source3,
-                                                limit1, limit2, limit3,
-                                                aggregating1, aggregating2, aggregating3, sink};
-
-        processors.insert(processors.end(), merging_pipe.begin(), merging_pipe.end());
-//        WriteBufferFromOStream out(std::cout);
-//        printPipeline(processors, out);
-
-        PipelineExecutor executor(processors);
-        executor.execute(num_threads);
-        sink->checkAllRead();
-    };
-
-    std::vector<String> messages;
-    std::vector<Int64> times;
-
-    auto exec = [&](auto func, String msg, size_t num_threads, bool two_level, bool external)
-    {
-        msg += ", two_level = " + toString(two_level) + ", external = " + toString(external);
-        Int64 time = 0;
-
-        auto wrapper = [&]()
-        {
-            ThreadStatus cur_status;
-
-            CurrentThread::attachToIfDetached(thread_group);
-            time = Measure<>::execution(func, msg, num_threads, two_level, external);
-        };
-
-        std::thread thread(wrapper);
-        thread.join();
-
-        messages.emplace_back(msg);
-        times.emplace_back(time);
-    };
-
-    size_t num_threads = 4;
-
-    exec(execute_one_stream, "One stream, single thread", 1, false, false);
-    exec(execute_one_stream, "One stream, multiple threads", num_threads, false, false);
-
-    exec(execute_mult_streams, "Multiple streams, single thread", 1, false, false);
-    exec(execute_mult_streams, "Multiple streams, multiple threads", num_threads, false, false);
-
-    exec(execute_one_stream, "One stream, single thread", 1, true, false);
-    exec(execute_one_stream, "One stream, multiple threads", num_threads, true, false);
-
-    exec(execute_mult_streams, "Multiple streams, single thread", 1, true, false);
-    exec(execute_mult_streams, "Multiple streams, multiple threads", num_threads, true, false);
-
-    exec(execute_one_stream, "One stream, single thread", 1, true, true);
-    exec(execute_one_stream, "One stream, multiple threads", num_threads, true, true);
-
-    exec(execute_mult_streams, "Multiple streams, single thread", 1, true, true);
-    exec(execute_mult_streams, "Multiple streams, multiple threads", num_threads, true, true);
-
-    for (size_t i = 0; i < messages.size(); ++i)
-        std::cout << messages[i] << " time: " << times[i] << " ms.\n";
-
-    return 0;
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    throw;
-}
diff --git a/src/Processors/tests/processors_test_chain.cpp b/src/Processors/tests/processors_test_chain.cpp
deleted file mode 100644
index 0fbd52eef39..00000000000
--- a/src/Processors/tests/processors_test_chain.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-#include <Columns/ColumnsNumber.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Processors/IProcessor.h>
-#include <Processors/ISource.h>
-#include <Processors/ISink.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/printPipeline.h>
-#include <Processors/Executors/PipelineExecutor.h>
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/WriteHelpers.h>
-
-#include <Formats/FormatSettings.h>
-
-#include <iostream>
-#include <chrono>
-
-
-using namespace DB;
-
-
-class NumbersSource : public ISource
-{
-public:
-    String getName() const override { return "Numbers"; }
-
-    NumbersSource(UInt64 start_number, unsigned sleep_useconds_)
-        : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-          current_number(start_number), sleep_useconds(sleep_useconds_)
-    {
-    }
-
-private:
-    UInt64 current_number = 0;
-    unsigned sleep_useconds;
-
-    Chunk generate() override
-    {
-        usleep(sleep_useconds);
-
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create(1, current_number));
-        ++current_number;
-        return Chunk(std::move(columns), 1);
-    }
-};
-
-class SleepyTransform : public ISimpleTransform
-{
-public:
-    explicit SleepyTransform(unsigned sleep_useconds_)
-        : ISimpleTransform(
-                Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }}),
-                Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }}),
-                /*skip_empty_chunks =*/ false)
-        , sleep_useconds(sleep_useconds_) {}
-
-    String getName() const override { return "SleepyTransform"; }
-
-protected:
-    void transform(Chunk &) override
-    {
-        usleep(sleep_useconds);
-    }
-
-private:
-    unsigned sleep_useconds;
-};
-
-class PrintSink : public ISink
-{
-public:
-    String getName() const override { return "Print"; }
-
-    explicit PrintSink(String prefix_)
-            : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              prefix(std::move(prefix_))
-    {
-    }
-
-private:
-    String prefix;
-    WriteBufferFromFileDescriptor out{STDOUT_FILENO};
-    FormatSettings settings;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-        size_t columns = chunk.getNumColumns();
-
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            writeString(prefix, out);
-            for (size_t column_num = 0; column_num < columns; ++column_num)
-            {
-                if (column_num != 0)
-                    writeChar('\t', out);
-                getPort().getHeader().getByPosition(column_num).type->serializeAsText(*chunk.getColumns()[column_num], row_num, out, settings);
-            }
-            writeChar('\n', out);
-        }
-
-        out.next();
-    }
-};
-
-template<typename TimeT = std::chrono::milliseconds>
-struct Measure
-{
-    template<typename F, typename ...Args>
-    static typename TimeT::rep execution(F&& func, Args&&... args)
-    {
-        auto start = std::chrono::steady_clock::now();
-        std::forward<decltype(func)>(func)(std::forward<Args>(args)...);
-        auto duration = std::chrono::duration_cast< TimeT>
-                (std::chrono::steady_clock::now() - start);
-        return duration.count();
-    }
-};
-
-int main(int, char **)
-try
-{
-    auto execute_chain = [](size_t num_threads)
-    {
-        std::cerr << "---------------------\n";
-
-        auto source = std::make_shared<NumbersSource>(0, 100000);
-        auto transform1 = std::make_shared<SleepyTransform>(100000);
-        auto transform2 = std::make_shared<SleepyTransform>(100000);
-        auto transform3 = std::make_shared<SleepyTransform>(100000);
-        auto limit = std::make_shared<LimitTransform>(source->getPort().getHeader(), 20, 0);
-        auto sink = std::make_shared<PrintSink>("");
-
-        connect(source->getPort(), transform1->getInputPort());
-        connect(transform1->getOutputPort(), transform2->getInputPort());
-        connect(transform2->getOutputPort(), transform3->getInputPort());
-        connect(transform3->getOutputPort(), limit->getInputPort());
-        connect(limit->getOutputPort(), sink->getPort());
-
-        std::vector<ProcessorPtr> processors = {source, transform1, transform2, transform3, limit, sink};
-//        WriteBufferFromOStream out(std::cout);
-//        printPipeline(processors, out);
-
-        PipelineExecutor executor(processors);
-        executor.execute(num_threads);
-    };
-
-    auto time_single = Measure<>::execution(execute_chain, 1);
-    auto time_mt = Measure<>::execution(execute_chain, 4);
-
-    std::cout << "Single Thread time: " << time_single << " ms.\n";
-    std::cout << "Multiple Threads time: " << time_mt << " ms.\n";
-
-    return 0;
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    throw;
-}
diff --git a/src/Processors/tests/processors_test_expand_pipeline.cpp b/src/Processors/tests/processors_test_expand_pipeline.cpp
deleted file mode 100644
index 83ac2ed0168..00000000000
--- a/src/Processors/tests/processors_test_expand_pipeline.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-#include <Columns/ColumnsNumber.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Processors/ISink.h>
-#include <Processors/ISource.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/printPipeline.h>
-#include <Processors/Executors/PipelineExecutor.h>
-
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/WriteHelpers.h>
-
-#include <Formats/FormatSettings.h>
-
-#include <iostream>
-#include <chrono>
-#include <Processors/ISimpleTransform.h>
-
-using namespace DB;
-
-class PrintSink : public ISink
-{
-public:
-    String getName() const override { return "Print"; }
-
-    explicit PrintSink(String prefix_)
-            : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              prefix(std::move(prefix_))
-    {
-    }
-
-private:
-    String prefix;
-    WriteBufferFromFileDescriptor out{STDOUT_FILENO};
-    FormatSettings settings;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-        size_t columns = chunk.getNumColumns();
-
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            writeString(prefix, out);
-            for (size_t column_num = 0; column_num < columns; ++column_num)
-            {
-                if (column_num != 0)
-                    writeChar('\t', out);
-                getPort().getHeader().getByPosition(column_num).type->serializeAsText(*chunk.getColumns()[column_num], row_num, out, settings);
-            }
-            writeChar('\n', out);
-        }
-
-        out.next();
-    }
-};
-
-
-class OneNumberSource : public ISource
-{
-public:
-    String getName() const override { return "OneNumber"; }
-
-    explicit OneNumberSource(UInt64 number_)
-            : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              number(number_)
-    {
-    }
-
-private:
-    UInt64 number;
-    bool done = false;
-
-    Chunk generate() override
-    {
-        if (done)
-            return Chunk();
-
-        done = true;
-
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create(1, number));
-        return Chunk(std::move(columns), 1);
-    }
-};
-
-
-class ExpandingProcessor : public IProcessor
-{
-public:
-    String getName() const override { return "Expanding"; }
-    ExpandingProcessor()
-    : IProcessor({Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})},
-                 {Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})})
-    {}
-
-    Status prepare() override
-    {
-        auto & main_input = inputs.front();
-        auto & main_output = outputs.front();
-        auto & additional_input = inputs.back();
-        auto & additional_output = outputs.back();
-        /// Check can output.
-
-
-        if (main_output.isFinished())
-        {
-            main_input.close();
-            additional_input.close();
-            additional_output.finish();
-            return Status::Finished;
-        }
-
-        if (!main_output.canPush())
-        {
-            main_input.setNotNeeded();
-            additional_input.setNotNeeded();
-            return Status::PortFull;
-        }
-
-        if (chunk_from_add_inp && is_processed)
-        {
-            if (is_processed)
-                main_output.push(std::move(chunk_from_add_inp));
-            else
-                return Status::Ready;
-        }
-
-        if (expanded)
-        {
-            if (chunk_from_main_inp)
-            {
-                if (additional_output.isFinished())
-                {
-                    main_input.close();
-                    return Status::Finished;
-                }
-
-                if (!additional_output.canPush())
-                {
-                    main_input.setNotNeeded();
-                    return Status::PortFull;
-                }
-
-                additional_output.push(std::move(chunk_from_main_inp));
-                main_input.close();
-            }
-
-            if (additional_input.isFinished())
-            {
-                main_output.finish();
-                return Status::Finished;
-            }
-
-            additional_input.setNeeded();
-
-            if (!additional_input.hasData())
-                return Status::NeedData;
-
-            chunk_from_add_inp = additional_input.pull();
-            is_processed = false;
-            return Status::Ready;
-        }
-        else
-        {
-            if (!chunk_from_main_inp)
-            {
-
-                if (main_input.isFinished())
-                {
-                    main_output.finish();
-                    return Status::Finished;
-                }
-
-                main_input.setNeeded();
-
-                if (!main_input.hasData())
-                    return Status::NeedData;
-
-                chunk_from_main_inp = main_input.pull();
-                main_input.close();
-            }
-
-            UInt64 val = chunk_from_main_inp.getColumns()[0]->getUInt(0);
-            if (val)
-            {
-                --val;
-                chunk_from_main_inp.setColumns(Columns{ColumnUInt64::create(1, val)}, 1);
-                return Status::ExpandPipeline;
-            }
-
-            main_output.push(std::move(chunk_from_main_inp));
-            main_output.finish();
-            return Status::Finished;
-        }
-    }
-
-    Processors expandPipeline() override
-    {
-        auto & main_input = inputs.front();
-        auto & main_output = outputs.front();
-
-        Processors processors = {std::make_shared<ExpandingProcessor>()};
-        inputs.push_back({main_input.getHeader(), this});
-        outputs.push_back({main_output.getHeader(), this});
-        connect(outputs.back(), processors.back()->getInputs().front());
-        connect(processors.back()->getOutputs().front(), inputs.back());
-        inputs.back().setNeeded();
-
-        expanded = true;
-        return processors;
-    }
-
-    void work() override
-    {
-        auto num_rows = chunk_from_add_inp.getNumRows();
-        auto columns = chunk_from_add_inp.mutateColumns();
-        columns.front()->insert(Field(num_rows));
-        chunk_from_add_inp.setColumns(std::move(columns), num_rows + 1);
-        is_processed = true;
-    }
-
-private:
-    bool expanded = false;
-    Chunk chunk_from_main_inp;
-    Chunk chunk_from_add_inp;
-    bool is_processed = false;
-};
-
-
-template<typename TimeT = std::chrono::milliseconds>
-struct Measure
-{
-    template<typename F, typename ...Args>
-    static typename TimeT::rep execution(F&& func, Args&&... args)
-    {
-        auto start = std::chrono::steady_clock::now();
-        std::forward<decltype(func)>(func)(std::forward<Args>(args)...);
-        auto duration = std::chrono::duration_cast< TimeT>
-                (std::chrono::steady_clock::now() - start);
-        return duration.count();
-    }
-};
-
-int main(int, char **)
-try
-{
-    auto execute = [](String msg, size_t num, size_t num_threads)
-    {
-        std::cerr << msg << "\n";
-
-        auto source = std::make_shared<OneNumberSource>(num);
-        auto expanding = std::make_shared<ExpandingProcessor>();
-        auto sink = std::make_shared<PrintSink>("");
-
-        connect(source->getPort(), expanding->getInputs().front());
-        connect(expanding->getOutputs().front(), sink->getPort());
-
-        std::vector<ProcessorPtr> processors = {source, expanding, sink};
-
-        PipelineExecutor executor(processors);
-        executor.execute(num_threads);
-
-        WriteBufferFromOStream out(std::cout);
-        printPipeline(executor.getProcessors(), out);
-    };
-
-    ThreadPool pool(4, 4, 10);
-
-    auto time_single = Measure<>::execution(execute, "Single thread", 10, 1);
-    auto time_mt = Measure<>::execution(execute, "Multiple threads", 10, 4);
-
-    std::cout << "Single Thread time: " << time_single << " ms.\n";
-    std::cout << "Multiple Threads time:" << time_mt << " ms.\n";
-
-    return 0;
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    throw;
-}
diff --git a/src/Processors/tests/processors_test_merge.cpp b/src/Processors/tests/processors_test_merge.cpp
deleted file mode 100644
index 11b0bfd1365..00000000000
--- a/src/Processors/tests/processors_test_merge.cpp
+++ /dev/null
@@ -1,334 +0,0 @@
-#include <Columns/ColumnsNumber.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Processors/IProcessor.h>
-#include <Processors/ISource.h>
-#include <Processors/ISink.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/printPipeline.h>
-#include <Processors/Executors/PipelineExecutor.h>
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/WriteHelpers.h>
-
-#include <Formats/FormatSettings.h>
-
-#include <iostream>
-#include <chrono>
-
-
-using namespace DB;
-
-
-class MergingSortedProcessor : public IProcessor
-{
-public:
-    MergingSortedProcessor(const Block & header, size_t num_inputs)
-        : IProcessor(InputPorts(num_inputs, header), OutputPorts{header})
-        , chunks(num_inputs), positions(num_inputs, 0), finished(num_inputs, false)
-    {
-    }
-
-    String getName() const override { return "MergingSortedProcessor"; }
-
-    Status prepare() override
-    {
-        auto & output = outputs.front();
-
-        /// Check can output.
-
-        if (output.isFinished())
-        {
-            for (auto & in : inputs)
-                in.close();
-
-            return Status::Finished;
-        }
-
-        if (!output.isNeeded())
-        {
-            for (auto & in : inputs)
-                in.setNotNeeded();
-
-            return Status::PortFull;
-        }
-
-        if (output.hasData())
-            return Status::PortFull;
-
-        /// Push if has data.
-        if (res)
-        {
-            output.push(std::move(res));
-            return Status::PortFull;
-        }
-
-        /// Check for inputs we need.
-        bool all_inputs_finished = true;
-        bool all_inputs_has_data = true;
-        auto it = inputs.begin();
-        for (size_t i = 0; it != inputs.end(); ++it, ++i)
-        {
-            auto & input = *it;
-            if (!finished[i])
-            {
-                if (!input.isFinished())
-                {
-                    all_inputs_finished = false;
-                    bool needed = positions[i] >= chunks[i].getNumRows();
-                    if (needed)
-                    {
-                        input.setNeeded();
-                        if (input.hasData())
-                        {
-                            chunks[i] = input.pull();
-                            positions[i] = 0;
-                        }
-                        else
-                            all_inputs_has_data = false;
-                    }
-                    else
-                        input.setNotNeeded();
-                }
-                else
-                    finished[i] = true;
-            }
-        }
-
-        if (all_inputs_finished)
-        {
-            output.finish();
-            return Status::Finished;
-        }
-
-        if (!all_inputs_has_data)
-            return Status::NeedData;
-
-        return Status::Ready;
-    }
-
-    void work() override
-    {
-        using Key = std::pair<UInt64, size_t>;
-        std::priority_queue<Key, std::vector<Key>, std::greater<>> queue;
-        for (size_t i = 0; i < chunks.size(); ++i)
-        {
-            if (finished[i])
-                continue;
-
-            if (positions[i] >= chunks[i].getNumRows())
-                return;
-
-            queue.push({chunks[i].getColumns()[0]->getUInt(positions[i]), i});
-        }
-
-        auto col = ColumnUInt64::create();
-
-        while (!queue.empty())
-        {
-            size_t ps = queue.top().second;
-            queue.pop();
-
-            const auto & cur_col = chunks[ps].getColumns()[0];
-            col->insertFrom(*cur_col, positions[ps]);
-            ++positions[ps];
-
-            if (positions[ps] == cur_col->size())
-                break;
-
-            queue.push({cur_col->getUInt(positions[ps]), ps});
-        }
-
-        UInt64 num_rows = col->size();
-        res.setColumns(Columns({std::move(col)}), num_rows);
-    }
-
-    OutputPort & getOutputPort() { return outputs.front(); }
-
-private:
-    Chunks chunks;
-    Chunk res;
-    std::vector<size_t> positions;
-    std::vector<bool> finished;
-};
-
-
-class NumbersSource : public ISource
-{
-public:
-    String getName() const override { return "Numbers"; }
-
-    NumbersSource(UInt64 start_number, UInt64 step_, unsigned sleep_useconds_)
-            : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              current_number(start_number), step(step_), sleep_useconds(sleep_useconds_)
-    {
-    }
-
-private:
-    UInt64 current_number = 0;
-    UInt64 step;
-    unsigned sleep_useconds;
-
-    Chunk generate() override
-    {
-        usleep(sleep_useconds);
-
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create(1, current_number));
-        current_number += step;
-        return Chunk(std::move(columns), 1);
-    }
-};
-
-
-class SleepyTransform : public ISimpleTransform
-{
-public:
-    explicit SleepyTransform(unsigned sleep_useconds_)
-            : ISimpleTransform(
-            Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }}),
-            Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }}),
-            false)
-            , sleep_useconds(sleep_useconds_) {}
-
-    String getName() const override { return "SleepyTransform"; }
-
-protected:
-    void transform(Chunk &) override
-    {
-        usleep(sleep_useconds);
-    }
-
-private:
-    unsigned sleep_useconds;
-};
-
-class PrintSink : public ISink
-{
-public:
-    String getName() const override { return "Print"; }
-
-    explicit PrintSink(String prefix_)
-            : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              prefix(std::move(prefix_))
-    {
-    }
-
-private:
-    String prefix;
-    WriteBufferFromFileDescriptor out{STDOUT_FILENO};
-    FormatSettings settings;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-        size_t columns = chunk.getNumColumns();
-
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            writeString(prefix, out);
-            for (size_t column_num = 0; column_num < columns; ++column_num)
-            {
-                if (column_num != 0)
-                    writeChar('\t', out);
-                getPort().getHeader().getByPosition(column_num).type->serializeAsText(*chunk.getColumns()[column_num], row_num, out, settings);
-            }
-            writeChar('\n', out);
-        }
-
-        out.next();
-    }
-};
-
-template<typename TimeT = std::chrono::milliseconds>
-struct Measure
-{
-    template<typename F, typename ...Args>
-    static typename TimeT::rep execution(F&& func, Args&&... args)
-    {
-        auto start = std::chrono::steady_clock::now();
-        std::forward<decltype(func)>(func)(std::forward<Args>(args)...);
-        auto duration = std::chrono::duration_cast< TimeT>
-                (std::chrono::steady_clock::now() - start);
-        return duration.count();
-    }
-};
-
-int main(int, char **)
-try
-{
-    auto execute_chain = [](String msg, size_t start1, size_t start2, size_t start3, size_t num_threads)
-    {
-        std::cerr << msg << "\n";
-
-        auto source1 = std::make_shared<NumbersSource>(start1, 3, 100000);
-        auto source2 = std::make_shared<NumbersSource>(start2, 3, 100000);
-        auto source3 = std::make_shared<NumbersSource>(start3, 3, 100000);
-
-        auto transform1 = std::make_shared<SleepyTransform>(100000);
-        auto transform2 = std::make_shared<SleepyTransform>(100000);
-        auto transform3 = std::make_shared<SleepyTransform>(100000);
-
-        auto limit1 = std::make_shared<LimitTransform>(source1->getPort().getHeader(), 20, 0);
-        auto limit2 = std::make_shared<LimitTransform>(source2->getPort().getHeader(), 20, 0);
-        auto limit3 = std::make_shared<LimitTransform>(source3->getPort().getHeader(), 20, 0);
-
-        auto merge = std::make_shared<MergingSortedProcessor>(source1->getPort().getHeader(), 3);
-        auto limit_fin = std::make_shared<LimitTransform>(source1->getPort().getHeader(), 54, 0);
-        auto sink = std::make_shared<PrintSink>("");
-
-        connect(source1->getPort(), transform1->getInputPort());
-        connect(source2->getPort(), transform2->getInputPort());
-        connect(source3->getPort(), transform3->getInputPort());
-
-        connect(transform1->getOutputPort(), limit1->getInputPort());
-        connect(transform2->getOutputPort(), limit2->getInputPort());
-        connect(transform3->getOutputPort(), limit3->getInputPort());
-
-        auto it = merge->getInputs().begin();
-        connect(limit1->getOutputPort(), *(it++));
-        connect(limit2->getOutputPort(), *(it++));
-        connect(limit3->getOutputPort(), *(it++));
-
-        connect(merge->getOutputPort(), limit_fin->getInputPort());
-        connect(limit_fin->getOutputPort(), sink->getPort());
-
-        std::vector<ProcessorPtr> processors = {source1, source2, source3,
-                                                transform1, transform2, transform3,
-                                                limit1, limit2, limit3,
-                                                merge, limit_fin, sink};
-//        WriteBufferFromOStream out(std::cout);
-//        printPipeline(processors, out);
-
-        PipelineExecutor executor(processors);
-        executor.execute(num_threads);
-    };
-
-    auto even_time_single = Measure<>::execution(execute_chain, "Even distribution single thread", 0, 1, 2, 1);
-    auto even_time_mt = Measure<>::execution(execute_chain, "Even distribution multiple threads", 0, 1, 2, 4);
-
-    auto half_time_single = Measure<>::execution(execute_chain, "Half distribution single thread", 0, 31, 62, 1);
-    auto half_time_mt = Measure<>::execution(execute_chain, "Half distribution multiple threads", 0, 31, 62, 4);
-
-    auto ordered_time_single = Measure<>::execution(execute_chain, "Ordered distribution single thread", 0, 61, 122, 1);
-    auto ordered_time_mt = Measure<>::execution(execute_chain, "Ordered distribution multiple threads", 0, 61, 122, 4);
-
-    std::cout << "Single Thread [0:60:3] [1:60:3] [2:60:3] time: " << even_time_single << " ms.\n";
-    std::cout << "Multiple Threads [0:60:3] [1:60:3] [2:60:3] time:" << even_time_mt << " ms.\n";
-
-    std::cout << "Single Thread [0:60:3] [31:90:3] [62:120:3] time: " << half_time_single << " ms.\n";
-    std::cout << "Multiple Threads [0:60:3] [31:90:3] [62:120:3] time: " << half_time_mt << " ms.\n";
-
-    std::cout << "Single Thread [0:60:3] [61:120:3] [122:180:3] time: " << ordered_time_single << " ms.\n";
-    std::cout << "Multiple Threads [0:60:3] [61:120:3] [122:180:3] time: " << ordered_time_mt << " ms.\n";
-
-    return 0;
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    throw;
-}
diff --git a/src/Processors/tests/processors_test_merge_sorting_transform.cpp b/src/Processors/tests/processors_test_merge_sorting_transform.cpp
deleted file mode 100644
index 5e6720f0167..00000000000
--- a/src/Processors/tests/processors_test_merge_sorting_transform.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-#include <Columns/ColumnsNumber.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <Disks/StoragePolicy.h>
-#include <Disks/DiskLocal.h>
-
-#include <Processors/IProcessor.h>
-#include <Processors/ISource.h>
-#include <Processors/ISink.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/printPipeline.h>
-#include <Processors/Transforms/MergeSortingTransform.h>
-#include <Processors/Executors/PipelineExecutor.h>
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/WriteHelpers.h>
-
-#include <Formats/FormatSettings.h>
-
-#include <iostream>
-#include <chrono>
-#include <Poco/ConsoleChannel.h>
-#include <Poco/AutoPtr.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-class NumbersSource : public ISource
-{
-public:
-    String getName() const override { return "Numbers"; }
-
-    NumbersSource(UInt64 count_, UInt64 block_size_, unsigned sleep_useconds_)
-            : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-            count(count_), block_size(block_size_), sleep_useconds(sleep_useconds_)
-    {
-    }
-
-private:
-    UInt64 current_number = 0;
-    UInt64 count;
-    UInt64 block_size;
-    unsigned sleep_useconds;
-
-    Chunk generate() override
-    {
-        if (current_number == count)
-            return {};
-
-        usleep(sleep_useconds);
-
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create());
-
-        UInt64 number = current_number++;
-        for (UInt64 i = 0; i < block_size; ++i, number += count)
-            columns.back()->insert(Field(number));
-
-        return Chunk(std::move(columns), block_size);
-    }
-};
-
-class CheckSortedSink : public ISink
-{
-public:
-    String getName() const override { return "Print"; }
-
-    CheckSortedSink()
-            : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }}))
-    {
-    }
-
-private:
-    FormatSettings settings;
-    UInt64 current_number = 0;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-
-        UInt64 prev = current_number;
-        const auto & col = chunk.getColumns().at(0);
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            UInt64 val = col->getUInt(row_num);
-            if (val != current_number)
-                throw Exception("Invalid value. Expected " + toString(current_number) + ", got " + toString(val),
-                        ErrorCodes::LOGICAL_ERROR);
-
-            ++current_number;
-        }
-
-        std::cout << "CheckSortedSink: " << prev << " - " << current_number << std::endl;
-    }
-};
-
-template<typename TimeT = std::chrono::milliseconds>
-struct Measure
-{
-    template<typename F, typename ...Args>
-    static typename TimeT::rep execution(F&& func, Args&&... args)
-    {
-        auto start = std::chrono::steady_clock::now();
-        std::forward<decltype(func)>(func)(std::forward<Args>(args)...);
-        auto duration = std::chrono::duration_cast< TimeT>
-                (std::chrono::steady_clock::now() - start);
-        return duration.count();
-    }
-};
-
-}
-
-
-using namespace DB;
-
-int main(int, char **)
-try
-{
-    Poco::AutoPtr<Poco::ConsoleChannel> channel = new Poco::ConsoleChannel(std::cerr);
-    Poco::Logger::root().setChannel(channel);
-    Poco::Logger::root().setLevel("trace");
-
-    auto disk = std::make_shared<DiskLocal>("tmp", ".", 0);
-    auto tmp_volume = std::make_shared<VolumeJBOD>("tmp", std::vector<DiskPtr>{disk}, 0);
-
-    auto execute_chain = [tmp_volume](
-        String msg,
-        UInt64 source_block_size,
-        UInt64 blocks_count,
-        size_t max_merged_block_size,
-        UInt64 limit,
-        size_t max_bytes_before_remerge,
-        size_t max_bytes_before_external_sort,
-        size_t num_threads)
-    {
-        std::cerr << "------------------------\n";
-        std::cerr << msg << "\n";
-
-        auto source = std::make_shared<NumbersSource>(blocks_count, source_block_size, 100);
-        SortDescription description = {{0, 1, 1}};
-        auto transform = std::make_shared<MergeSortingTransform>(
-                source->getPort().getHeader(), description,
-                max_merged_block_size, limit,
-                max_bytes_before_remerge, max_bytes_before_external_sort,
-                tmp_volume, 0);
-        auto sink = std::make_shared<CheckSortedSink>();
-
-        connect(source->getPort(), transform->getInputs().front());
-        connect(transform->getOutputs().front(), sink->getPort());
-
-        std::vector<ProcessorPtr> processors = {source, transform, sink};
-        PipelineExecutor executor(processors);
-        executor.execute(num_threads);
-
-        WriteBufferFromOStream out(std::cout);
-        printPipeline(executor.getProcessors(), out);
-    };
-
-    std::map<std::string, Int64> times;
-
-    for (size_t num_threads : {1, 4})
-    {
-        {
-            UInt64 source_block_size = 100;
-            UInt64 blocks_count = 10;
-            size_t max_merged_block_size = 100;
-            UInt64 limit = 0;
-            size_t max_bytes_before_remerge = 10000000;
-            size_t max_bytes_before_external_sort = 10000000;
-            std::string msg = num_threads > 1 ? "multiple threads" : "single thread";
-            msg += ", " + toString(blocks_count) + " blocks per " + toString(source_block_size) + " numbers" +
-                    ", no remerge and external sorts.";
-
-            Int64 time = Measure<>::execution(execute_chain, msg,
-                                        source_block_size,
-                                        blocks_count,
-                                        max_merged_block_size,
-                                        limit,
-                                        max_bytes_before_remerge,
-                                        max_bytes_before_external_sort,
-                                        num_threads);
-
-            times[msg] = time;
-        }
-
-        {
-            UInt64 source_block_size = 1024;
-            UInt64 blocks_count = 10;
-            size_t max_merged_block_size = 1024;
-            UInt64 limit = 2048;
-            size_t max_bytes_before_remerge = sizeof(UInt64) * source_block_size * 4;
-            size_t max_bytes_before_external_sort = 10000000;
-            std::string msg = num_threads > 1 ? "multiple threads" : "single thread";
-            msg += ", " + toString(blocks_count) + " blocks per " + toString(source_block_size) + " numbers" +
-                   ", with remerge, no external sorts.";
-
-            Int64 time = Measure<>::execution(execute_chain, msg,
-                                              source_block_size,
-                                              blocks_count,
-                                              max_merged_block_size,
-                                              limit,
-                                              max_bytes_before_remerge,
-                                              max_bytes_before_external_sort,
-                                              num_threads);
-
-            times[msg] = time;
-        }
-
-        {
-            UInt64 source_block_size = 1024;
-            UInt64 blocks_count = 10;
-            size_t max_merged_block_size = 1024;
-            UInt64 limit = 0;
-            size_t max_bytes_before_remerge = 0;
-            size_t max_bytes_before_external_sort = sizeof(UInt64) * source_block_size * 4;
-            std::string msg = num_threads > 1 ? "multiple threads" : "single thread";
-            msg += ", " + toString(blocks_count) + " blocks per " + toString(source_block_size) + " numbers" +
-                   ", no remerge, with external sorts.";
-
-            Int64 time = Measure<>::execution(execute_chain, msg,
-                                              source_block_size,
-                                              blocks_count,
-                                              max_merged_block_size,
-                                              limit,
-                                              max_bytes_before_remerge,
-                                              max_bytes_before_external_sort,
-                                              num_threads);
-
-            times[msg] = time;
-        }
-    }
-
-    for (auto & item : times)
-        std::cout << item.first << ' ' << item.second << " ms.\n";
-
-    return 0;
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    throw;
-}
diff --git a/src/Processors/tests/processors_test_merging_sorted_transform.cpp b/src/Processors/tests/processors_test_merging_sorted_transform.cpp
deleted file mode 100644
index 477626d165d..00000000000
--- a/src/Processors/tests/processors_test_merging_sorted_transform.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-#include <Columns/ColumnsNumber.h>
-
-#include <DataTypes/DataTypesNumber.h>
-
-#include <Processors/IProcessor.h>
-#include <Processors/ISource.h>
-#include <Processors/ISink.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/LimitTransform.h>
-#include <Processors/printPipeline.h>
-#include <Processors/Merges/MergingSortedTransform.h>
-#include <Processors/Executors/PipelineExecutor.h>
-
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromOStream.h>
-#include <IO/WriteHelpers.h>
-
-#include <Formats/FormatSettings.h>
-
-#include <iostream>
-#include <chrono>
-
-
-using namespace DB;
-
-
-class NumbersSource : public ISource
-{
-public:
-    String getName() const override { return "Numbers"; }
-
-    NumbersSource(UInt64 start_number, UInt64 step_, UInt64 block_size_, unsigned sleep_useconds_)
-            : ISource(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              current_number(start_number), step(step_), block_size(block_size_), sleep_useconds(sleep_useconds_)
-    {
-    }
-
-private:
-    UInt64 current_number = 0;
-    UInt64 step;
-    UInt64 block_size;
-    unsigned sleep_useconds;
-
-    Chunk generate() override
-    {
-        usleep(sleep_useconds);
-
-        MutableColumns columns;
-        columns.emplace_back(ColumnUInt64::create());
-
-        for (UInt64 i = 0; i < block_size; ++i, current_number += step)
-            columns.back()->insert(Field(current_number));
-
-        return Chunk(std::move(columns), block_size);
-    }
-};
-
-
-class SleepyTransform : public ISimpleTransform
-{
-public:
-    explicit SleepyTransform(unsigned sleep_useconds_)
-            : ISimpleTransform(
-            Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }}),
-            Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }}),
-            false)
-            , sleep_useconds(sleep_useconds_) {}
-
-    String getName() const override { return "SleepyTransform"; }
-
-protected:
-    void transform(Chunk &) override
-    {
-        usleep(sleep_useconds);
-    }
-
-private:
-    unsigned sleep_useconds;
-};
-
-class PrintSink : public ISink
-{
-public:
-    String getName() const override { return "Print"; }
-
-    explicit PrintSink(String prefix_)
-            : ISink(Block({ColumnWithTypeAndName{ ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number" }})),
-              prefix(std::move(prefix_))
-    {
-    }
-
-private:
-    String prefix;
-    WriteBufferFromFileDescriptor out{STDOUT_FILENO};
-    FormatSettings settings;
-
-    void consume(Chunk chunk) override
-    {
-        size_t rows = chunk.getNumRows();
-        size_t columns = chunk.getNumColumns();
-
-        for (size_t row_num = 0; row_num < rows; ++row_num)
-        {
-            writeString(prefix, out);
-            for (size_t column_num = 0; column_num < columns; ++column_num)
-            {
-                if (column_num != 0)
-                    writeChar('\t', out);
-                getPort().getHeader().getByPosition(column_num).type->serializeAsText(*chunk.getColumns()[column_num], row_num, out, settings);
-            }
-            writeChar('\n', out);
-        }
-
-        out.next();
-    }
-};
-
-template<typename TimeT = std::chrono::milliseconds>
-struct Measure
-{
-    template<typename F, typename ...Args>
-    static typename TimeT::rep execution(F&& func, Args&&... args)
-    {
-        auto start = std::chrono::steady_clock::now();
-        std::forward<decltype(func)>(func)(std::forward<Args>(args)...);
-        auto duration = std::chrono::duration_cast< TimeT>
-                (std::chrono::steady_clock::now() - start);
-        return duration.count();
-    }
-};
-
-int main(int, char **)
-try
-{
-    auto execute_chain = [](String msg, size_t start1, size_t start2, size_t start3, size_t num_threads)
-    {
-        std::cerr << msg << "\n";
-
-        auto source1 = std::make_shared<NumbersSource>(start1, 3, 2, 100000);
-        auto source2 = std::make_shared<NumbersSource>(start2, 3, 2, 100000);
-        auto source3 = std::make_shared<NumbersSource>(start3, 3, 2, 100000);
-
-        auto transform1 = std::make_shared<SleepyTransform>(100000);
-        auto transform2 = std::make_shared<SleepyTransform>(100000);
-        auto transform3 = std::make_shared<SleepyTransform>(100000);
-
-        auto limit1 = std::make_shared<LimitTransform>(source1->getPort().getHeader(), 20, 0);
-        auto limit2 = std::make_shared<LimitTransform>(source2->getPort().getHeader(), 20, 0);
-        auto limit3 = std::make_shared<LimitTransform>(source3->getPort().getHeader(), 20, 0);
-
-        SortDescription description = {{0, 1, 1}};
-        auto merge = std::make_shared<MergingSortedTransform>(source1->getPort().getHeader(), 3, description, 2);
-        auto limit_fin = std::make_shared<LimitTransform>(source1->getPort().getHeader(), 54, 0);
-        auto sink = std::make_shared<PrintSink>("");
-
-        connect(source1->getPort(), transform1->getInputPort());
-        connect(source2->getPort(), transform2->getInputPort());
-        connect(source3->getPort(), transform3->getInputPort());
-
-        connect(transform1->getOutputPort(), limit1->getInputPort());
-        connect(transform2->getOutputPort(), limit2->getInputPort());
-        connect(transform3->getOutputPort(), limit3->getInputPort());
-
-        auto it = merge->getInputs().begin();
-        connect(limit1->getOutputPort(), *(it++));
-        connect(limit2->getOutputPort(), *(it++));
-        connect(limit3->getOutputPort(), *(it++));
-
-        connect(merge->getOutputs().front(), limit_fin->getInputPort());
-        connect(limit_fin->getOutputPort(), sink->getPort());
-
-        std::vector<ProcessorPtr> processors = {source1, source2, source3,
-                                                transform1, transform2, transform3,
-                                                limit1, limit2, limit3,
-                                                merge, limit_fin, sink};
-//        WriteBufferFromOStream out(std::cout);
-//        printPipeline(processors, out);
-
-        PipelineExecutor executor(processors);
-        executor.execute(num_threads);
-    };
-
-    auto even_time_single = Measure<>::execution(execute_chain, "Even distribution single thread", 0, 1, 2, 1);
-    auto even_time_mt = Measure<>::execution(execute_chain, "Even distribution multiple threads", 0, 1, 2, 4);
-
-    auto half_time_single = Measure<>::execution(execute_chain, "Half distribution single thread", 0, 31, 62, 1);
-    auto half_time_mt = Measure<>::execution(execute_chain, "Half distribution multiple threads", 0, 31, 62, 4);
-
-    auto ordered_time_single = Measure<>::execution(execute_chain, "Ordered distribution single thread", 0, 61, 122, 1);
-    auto ordered_time_mt = Measure<>::execution(execute_chain, "Ordered distribution multiple threads", 0, 61, 122, 4);
-
-    std::cout << "Single Thread [0:60:3] [1:60:3] [2:60:3] time: " << even_time_single << " ms.\n";
-    std::cout << "Multiple Threads [0:60:3] [1:60:3] [2:60:3] time:" << even_time_mt << " ms.\n";
-
-    std::cout << "Single Thread [0:60:3] [31:90:3] [62:120:3] time: " << half_time_single << " ms.\n";
-    std::cout << "Multiple Threads [0:60:3] [31:90:3] [62:120:3] time: " << half_time_mt << " ms.\n";
-
-    std::cout << "Single Thread [0:60:3] [61:120:3] [122:180:3] time: " << ordered_time_single << " ms.\n";
-    std::cout << "Multiple Threads [0:60:3] [61:120:3] [122:180:3] time: " << ordered_time_mt << " ms.\n";
-
-    return 0;
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(true) << '\n';
-    throw;
-}
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 62320f1c147..ccc48047763 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -106,9 +106,11 @@ SRCS(
     Port.cpp
     QueryPipeline.cpp
     ResizeProcessor.cpp
+    Sources/DelayedSource.cpp
     Sources/SinkToOutputStream.cpp
     Sources/SourceFromInputStream.cpp
     Sources/SourceWithProgress.cpp
+    Sources/RemoteSource.cpp
     Transforms/AddingMissedTransform.cpp
     Transforms/AddingSelectorTransform.cpp
     Transforms/AggregatingTransform.cpp
@@ -134,6 +136,7 @@ SRCS(
     Transforms/RollupTransform.cpp
     Transforms/SortingTransform.cpp
     Transforms/TotalsHavingTransform.cpp
+    Transforms/AggregatingInOrderTransform.cpp
 )
 
 END()
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 6e9275540e5..7e17604c4c7 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -304,17 +304,17 @@ void TCPHandler::runImpl()
              *  We will try to send exception to the client in any case - see below.
              */
             state.io.onException();
-            exception.emplace(Exception::CreateFromPoco, e);
+            exception.emplace(Exception::CreateFromPocoTag{}, e);
         }
         catch (const Poco::Exception & e)
         {
             state.io.onException();
-            exception.emplace(Exception::CreateFromPoco, e);
+            exception.emplace(Exception::CreateFromPocoTag{}, e);
         }
         catch (const std::exception & e)
         {
             state.io.onException();
-            exception.emplace(Exception::CreateFromSTD, e);
+            exception.emplace(Exception::CreateFromSTDTag{}, e);
         }
         catch (...)
         {
diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index a5db9636a5d..a491cc411b1 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -8,8 +8,10 @@
 #include <Common/quoteString.h>
 #include <Common/hex.h>
 #include <common/StringRef.h>
+#include <Common/ActionBlocker.h>
 #include <Interpreters/Context.h>
 #include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/StorageDistributed.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromFile.h>
@@ -108,11 +110,19 @@ StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor()
 
 void StorageDistributedDirectoryMonitor::flushAllData()
 {
-    if (!quit)
+    if (quit)
+        return;
+
+    CurrentMetrics::Increment metric_pending_files{CurrentMetrics::DistributedFilesToInsert, 0};
+    std::unique_lock lock{mutex};
+
+    const auto & files = getFiles(metric_pending_files);
+    if (!files.empty())
     {
-        CurrentMetrics::Increment metric_pending_files{CurrentMetrics::DistributedFilesToInsert, 0};
-        std::unique_lock lock{mutex};
-        processFiles(metric_pending_files);
+        processFiles(files, metric_pending_files);
+
+        /// Update counters
+        getFiles(metric_pending_files);
     }
 }
 
@@ -139,20 +149,31 @@ void StorageDistributedDirectoryMonitor::run()
     while (!quit)
     {
         do_sleep = true;
+
+        const auto & files = getFiles(metric_pending_files);
+        if (files.empty())
+            break;
+
         if (!monitor_blocker.isCancelled())
         {
             try
             {
-                do_sleep = !processFiles(metric_pending_files);
+                do_sleep = !processFiles(files, metric_pending_files);
+
+                std::unique_lock metrics_lock(metrics_mutex);
+                last_exception = std::exception_ptr{};
             }
             catch (...)
             {
+                std::unique_lock metrics_lock(metrics_mutex);
+
                 do_sleep = true;
                 ++error_count;
                 sleep_time = std::min(
                     std::chrono::milliseconds{Int64(default_sleep_time.count() * std::exp2(error_count))},
                     max_sleep_time);
                 tryLogCurrentException(getLoggerName().data());
+                last_exception = std::current_exception();
             }
         }
         else
@@ -163,6 +184,8 @@ void StorageDistributedDirectoryMonitor::run()
         const auto now = std::chrono::system_clock::now();
         if (now - last_decrease_time > decrease_error_count_period)
         {
+            std::unique_lock metrics_lock(metrics_mutex);
+
             error_count /= 2;
             last_decrease_time = now;
         }
@@ -171,6 +194,9 @@ void StorageDistributedDirectoryMonitor::run()
             break;
     }
 
+    /// Update counters
+    getFiles(metric_pending_files);
+
     if (!quit && do_sleep)
         task_handle->scheduleAfter(sleep_time.count());
 }
@@ -226,9 +252,10 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
 }
 
 
-bool StorageDistributedDirectoryMonitor::processFiles(CurrentMetrics::Increment & metric_pending_files)
+std::map<UInt64, std::string> StorageDistributedDirectoryMonitor::getFiles(CurrentMetrics::Increment & metric_pending_files)
 {
     std::map<UInt64, std::string> files;
+    size_t new_bytes_count = 0;
 
     Poco::DirectoryIterator end;
     for (Poco::DirectoryIterator it{path}; it != end; ++it)
@@ -237,16 +264,26 @@ bool StorageDistributedDirectoryMonitor::processFiles(CurrentMetrics::Increment
         Poco::Path file_path{file_path_str};
 
         if (!it->isDirectory() && startsWith(file_path.getExtension(), "bin"))
+        {
             files[parse<UInt64>(file_path.getBaseName())] = file_path_str;
+            new_bytes_count += Poco::File(file_path).getSize();
+        }
     }
 
     /// Note: the value of this metric will be kept if this function will throw an exception.
     /// This is needed, because in case of exception, files still pending.
     metric_pending_files.changeTo(files.size());
 
-    if (files.empty())
-        return false;
+    {
+        std::unique_lock metrics_lock(metrics_mutex);
+        files_count = files.size();
+        bytes_count = new_bytes_count;
+    }
 
+    return files;
+}
+bool StorageDistributedDirectoryMonitor::processFiles(const std::map<UInt64, std::string> & files, CurrentMetrics::Increment & metric_pending_files)
+{
     if (should_batch_inserts)
     {
         processFilesWithBatching(files, metric_pending_files);
@@ -593,6 +630,20 @@ bool StorageDistributedDirectoryMonitor::scheduleAfter(size_t ms)
     return task_handle->scheduleAfter(ms, false);
 }
 
+StorageDistributedDirectoryMonitor::Status StorageDistributedDirectoryMonitor::getStatus() const
+{
+    std::unique_lock metrics_lock(metrics_mutex);
+
+    return Status{
+        path,
+        last_exception,
+        error_count,
+        files_count,
+        bytes_count,
+        monitor_blocker.isCancelled(),
+    };
+}
+
 void StorageDistributedDirectoryMonitor::processFilesWithBatching(
     const std::map<UInt64, std::string> & files,
     CurrentMetrics::Increment & metric_pending_files)
@@ -734,7 +785,10 @@ void StorageDistributedDirectoryMonitor::updatePath(const std::string & new_path
 
     task_handle->deactivate();
 
-    path = new_path;
+    {
+        std::unique_lock metrics_lock(metrics_mutex);
+        path = new_path;
+    }
     current_batch_file_path = path + "current_batch.txt";
 
     task_handle->activateAndSchedule();
diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h
index 418cd430243..960d82f0716 100644
--- a/src/Storages/Distributed/DirectoryMonitor.h
+++ b/src/Storages/Distributed/DirectoryMonitor.h
@@ -1,7 +1,7 @@
 #pragma once
 
-#include <Storages/StorageDistributed.h>
 #include <Core/BackgroundSchedulePool.h>
+#include <Client/ConnectionPool.h>
 
 #include <atomic>
 #include <mutex>
@@ -14,6 +14,10 @@ namespace CurrentMetrics { class Increment; }
 namespace DB
 {
 
+class StorageDistributed;
+class ActionBlocker;
+class BackgroundSchedulePool;
+
 /** Details of StorageDistributed.
   * This type is not designed for standalone use.
   */
@@ -37,9 +41,24 @@ public:
 
     /// For scheduling via DistributedBlockOutputStream
     bool scheduleAfter(size_t ms);
+
+    /// system.distribution_queue interface
+    struct Status
+    {
+        std::string path;
+        std::exception_ptr last_exception;
+        size_t error_count;
+        size_t files_count;
+        size_t bytes_count;
+        bool is_blocked;
+    };
+    Status getStatus() const;
+
 private:
     void run();
-    bool processFiles(CurrentMetrics::Increment & metric_pending_files);
+
+    std::map<UInt64, std::string> getFiles(CurrentMetrics::Increment & metric_pending_files);
+    bool processFiles(const std::map<UInt64, std::string> & files, CurrentMetrics::Increment & metric_pending_files);
     void processFile(const std::string & file_path, CurrentMetrics::Increment & metric_pending_files);
     void processFilesWithBatching(const std::map<UInt64, std::string> & files, CurrentMetrics::Increment & metric_pending_files);
 
@@ -61,7 +80,12 @@ private:
     struct BatchHeader;
     struct Batch;
 
-    size_t error_count{};
+    mutable std::mutex metrics_mutex;
+    size_t error_count = 0;
+    size_t files_count = 0;
+    size_t bytes_count = 0;
+    std::exception_ptr last_exception;
+
     const std::chrono::milliseconds default_sleep_time;
     std::chrono::milliseconds sleep_time;
     const std::chrono::milliseconds max_sleep_time;
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index 5516e85b143..4e28923ebfc 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -518,7 +518,7 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz
     }
     else
     {
-        if (shard_info.isLocal())
+        if (shard_info.isLocal() && settings.prefer_localhost_replica)
             writeToLocal(block, shard_info.getLocalNodeCount());
 
         std::vector<std::string> dir_names;
diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp
index 3e4533f8bb2..3edfcc7b9d2 100644
--- a/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -13,7 +13,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 KafkaBlockInputStream::KafkaBlockInputStream(
-    StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_, bool commit_in_suffix_)
+    StorageKafka & storage_, const std::shared_ptr<Context> & context_, const Names & columns, size_t max_block_size_, bool commit_in_suffix_)
     : storage(storage_)
     , context(context_)
     , column_names(columns)
@@ -22,12 +22,6 @@ KafkaBlockInputStream::KafkaBlockInputStream(
     , non_virtual_header(storage.getSampleBlockNonMaterialized())
     , virtual_header(storage.getSampleBlockForColumns({"_topic", "_key", "_offset", "_partition", "_timestamp","_timestamp_ms","_headers.name","_headers.value"}))
 {
-    context.setSetting("input_format_skip_unknown_fields", 1u); // Always skip unknown fields regardless of the context (JSON or TSKV)
-    context.setSetting("input_format_allow_errors_ratio", 0.);
-    context.setSetting("input_format_allow_errors_num", storage.skipBroken());
-
-    if (!storage.getSchemaName().empty())
-        context.setSetting("format_schema", storage.getSchemaName());
 }
 
 KafkaBlockInputStream::~KafkaBlockInputStream()
@@ -48,7 +42,7 @@ Block KafkaBlockInputStream::getHeader() const
 
 void KafkaBlockInputStream::readPrefixImpl()
 {
-    auto timeout = std::chrono::milliseconds(context.getSettingsRef().kafka_max_wait_ms.totalMilliseconds());
+    auto timeout = std::chrono::milliseconds(context->getSettingsRef().kafka_max_wait_ms.totalMilliseconds());
     buffer = storage.popReadBuffer(timeout);
 
     if (!buffer)
@@ -73,7 +67,7 @@ Block KafkaBlockInputStream::readImpl()
     MutableColumns virtual_columns = virtual_header.cloneEmptyColumns();
 
     auto input_format = FormatFactory::instance().getInputFormat(
-        storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size);
+        storage.getFormatName(), *buffer, non_virtual_header, *context, max_block_size);
 
     InputPort port(input_format->getPort().getHeader(), input_format.get());
     connect(input_format->getPort(), port);
diff --git a/src/Storages/Kafka/KafkaBlockInputStream.h b/src/Storages/Kafka/KafkaBlockInputStream.h
index e3052122894..387f5088721 100644
--- a/src/Storages/Kafka/KafkaBlockInputStream.h
+++ b/src/Storages/Kafka/KafkaBlockInputStream.h
@@ -14,7 +14,7 @@ class KafkaBlockInputStream : public IBlockInputStream
 {
 public:
     KafkaBlockInputStream(
-        StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_, bool commit_in_suffix = true);
+        StorageKafka & storage_, const std::shared_ptr<Context> & context_, const Names & columns, size_t max_block_size_, bool commit_in_suffix = true);
     ~KafkaBlockInputStream() override;
 
     String getName() const override { return storage.getName(); }
@@ -29,7 +29,7 @@ public:
 
 private:
     StorageKafka & storage;
-    Context context;
+    const std::shared_ptr<Context> context;
     Names column_names;
     UInt64 max_block_size;
 
diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.cpp b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
index fe8aa207c93..17ef5aa104c 100644
--- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
@@ -11,7 +11,7 @@ namespace ErrorCodes
     extern const int CANNOT_CREATE_IO_BUFFER;
 }
 
-KafkaBlockOutputStream::KafkaBlockOutputStream(StorageKafka & storage_, const Context & context_) : storage(storage_), context(context_)
+KafkaBlockOutputStream::KafkaBlockOutputStream(StorageKafka & storage_, const std::shared_ptr<Context> & context_) : storage(storage_), context(context_)
 {
 }
 
@@ -26,7 +26,7 @@ void KafkaBlockOutputStream::writePrefix()
     if (!buffer)
         throw Exception("Failed to create Kafka producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER);
 
-    child = FormatFactory::instance().getOutput(storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & columns, size_t row){ buffer->countRow(columns, row); });
+    child = FormatFactory::instance().getOutput(storage.getFormatName(), *buffer, getHeader(), *context, [this](const Columns & columns, size_t row){ buffer->countRow(columns, row); });
 }
 
 void KafkaBlockOutputStream::write(const Block & block)
diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.h b/src/Storages/Kafka/KafkaBlockOutputStream.h
index f3eb3dae0ba..7a973724f1b 100644
--- a/src/Storages/Kafka/KafkaBlockOutputStream.h
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.h
@@ -10,7 +10,7 @@ namespace DB
 class KafkaBlockOutputStream : public IBlockOutputStream
 {
 public:
-    explicit KafkaBlockOutputStream(StorageKafka & storage_, const Context & context_);
+    explicit KafkaBlockOutputStream(StorageKafka & storage_, const std::shared_ptr<Context> & context_);
 
     Block getHeader() const override;
 
@@ -22,7 +22,7 @@ public:
 
 private:
     StorageKafka & storage;
-    Context context;
+    const std::shared_ptr<Context> context;
     ProducerBufferPtr buffer;
     BlockOutputStreamPtr child;
 };
diff --git a/src/Storages/Kafka/KafkaSettings.h b/src/Storages/Kafka/KafkaSettings.h
index 43984f81e05..e65522b3606 100644
--- a/src/Storages/Kafka/KafkaSettings.h
+++ b/src/Storages/Kafka/KafkaSettings.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Core/SettingsCollection.h>
-
+#include <Core/Settings.h>
 
 namespace DB
 {
@@ -15,18 +15,34 @@ struct KafkaSettings : public SettingsCollection<KafkaSettings>
 {
 
 
-#define LIST_OF_KAFKA_SETTINGS(M)                                      \
+#define KAFKA_RELATED_SETTINGS(M)                                      \
     M(SettingString, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.", 0) \
     M(SettingString, kafka_topic_list, "", "A list of Kafka topics.", 0) \
-    M(SettingString, kafka_group_name, "", "A group of Kafka consumers.", 0) \
-    M(SettingString, kafka_client_id, "", "A client id of Kafka consumer.", 0) \
+    M(SettingString, kafka_group_name, "", "Client group id string. All Kafka consumers sharing the same group.id belong to the same group.", 0) \
+    M(SettingString, kafka_client_id, "", "Client identifier.", 0) \
+    M(SettingUInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.", 0) \
+    M(SettingBool, kafka_commit_every_batch, false, "Commit every consumed and handled batch instead of a single commit after writing a whole block", 0) \
+    /* default is stream_poll_timeout_ms */ \
+    M(SettingMilliseconds, kafka_poll_timeout_ms, 0, "Timeout for single poll from Kafka.", 0) \
+    /* default is min(max_block_size, kafka_max_block_size)*/ \
+    M(SettingUInt64, kafka_poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single Kafka poll.", 0) \
+    /* default is = min_insert_block_size / kafka_num_consumers  */ \
+    M(SettingUInt64, kafka_max_block_size, 0, "Number of row collected by poll(s) for flushing data from Kafka.", 0) \
+    /* default is stream_flush_interval_ms */ \
+    M(SettingMilliseconds, kafka_flush_interval_ms, 0, "Timeout for flushing data from Kafka.", 0) \
+    /* those are mapped to format factory settings */ \
     M(SettingString, kafka_format, "", "The message format for Kafka engine.", 0) \
     M(SettingChar, kafka_row_delimiter, '\0', "The character to be considered as a delimiter in Kafka message.", 0) \
     M(SettingString, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine", 0) \
-    M(SettingUInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.", 0) \
-    M(SettingUInt64, kafka_max_block_size, 0, "The maximum batch size for poll.", 0) \
-    M(SettingUInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block", 0) \
-    M(SettingUInt64, kafka_commit_every_batch, 0, "Commit every consumed and handled batch instead of a single commit after writing a whole block", 0)
+    M(SettingUInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block", 0)
+
+    /** TODO: */
+    /* https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md */
+    /* https://github.com/edenhill/librdkafka/blob/v1.4.2/src/rdkafka_conf.c */
+
+#define LIST_OF_KAFKA_SETTINGS(M) \
+    KAFKA_RELATED_SETTINGS(M) \
+    FORMAT_FACTORY_SETTINGS(M)
 
     DECLARE_SETTINGS_COLLECTION(LIST_OF_KAFKA_SETTINGS)
 
diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp
index fc83fd84884..bb721417c5b 100644
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@@ -119,39 +119,74 @@ StorageKafka::StorageKafka(
     const StorageID & table_id_,
     Context & context_,
     const ColumnsDescription & columns_,
-    const String & brokers_,
-    const String & group_,
-    const String & client_id_,
-    const Names & topics_,
-    const String & format_name_,
-    char row_delimiter_,
-    const String & schema_name_,
-    size_t num_consumers_,
-    UInt64 max_block_size_,
-    size_t skip_broken_,
-    bool intermediate_commit_)
+    std::unique_ptr<KafkaSettings> kafka_settings_)
     : IStorage(table_id_)
     , global_context(context_.getGlobalContext())
-    , kafka_context(Context(global_context))
-    , topics(global_context.getMacros()->expand(topics_))
-    , brokers(global_context.getMacros()->expand(brokers_))
-    , group(global_context.getMacros()->expand(group_))
-    , client_id(client_id_.empty() ? getDefaultClientId(table_id_) : global_context.getMacros()->expand(client_id_))
-    , format_name(global_context.getMacros()->expand(format_name_))
-    , row_delimiter(row_delimiter_)
-    , schema_name(global_context.getMacros()->expand(schema_name_))
-    , num_consumers(num_consumers_)
-    , max_block_size(max_block_size_)
+    , kafka_context(std::make_shared<Context>(global_context))
+    , kafka_settings(std::move(kafka_settings_))
+    , topics(parseTopics(global_context.getMacros()->expand(kafka_settings->kafka_topic_list.value)))
+    , brokers(global_context.getMacros()->expand(kafka_settings->kafka_broker_list.value))
+    , group(global_context.getMacros()->expand(kafka_settings->kafka_group_name.value))
+    , client_id(kafka_settings->kafka_client_id.value.empty() ? getDefaultClientId(table_id_) : global_context.getMacros()->expand(kafka_settings->kafka_client_id.value))
+    , format_name(global_context.getMacros()->expand(kafka_settings->kafka_format.value))
+    , row_delimiter(kafka_settings->kafka_row_delimiter.value)
+    , schema_name(global_context.getMacros()->expand(kafka_settings->kafka_schema.value))
+    , num_consumers(kafka_settings->kafka_num_consumers.value)
     , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")"))
-    , semaphore(0, num_consumers_)
-    , skip_broken(skip_broken_)
-    , intermediate_commit(intermediate_commit_)
+    , semaphore(0, num_consumers)
+    , intermediate_commit(kafka_settings->kafka_commit_every_batch.value)
+    , settings_adjustments(createSettingsAdjustments())
 {
-    kafka_context.makeQueryContext();
-
     setColumns(columns_);
     task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); });
     task->deactivate();
+
+    kafka_context->makeQueryContext();
+    kafka_context->applySettingsChanges(settings_adjustments);
+}
+
+SettingsChanges StorageKafka::createSettingsAdjustments()
+{
+    SettingsChanges result;
+    // Needed for backward compatibility
+    if (!kafka_settings->input_format_skip_unknown_fields.changed)
+    {
+        // Always skip unknown fields regardless of the context (JSON or TSKV)
+        kafka_settings->input_format_skip_unknown_fields = true;
+    }
+
+    if (!kafka_settings->input_format_allow_errors_ratio.changed)
+    {
+        kafka_settings->input_format_allow_errors_ratio = 0.;
+    }
+
+    if (!kafka_settings->input_format_allow_errors_num.changed)
+    {
+        kafka_settings->input_format_allow_errors_num = kafka_settings->kafka_skip_broken_messages.value;
+    }
+
+    if (!schema_name.empty())
+        result.emplace_back("format_schema", schema_name);
+
+    for (auto & it : *kafka_settings)
+    {
+        if (it.isChanged() && it.getName().toString().rfind("kafka_",0) == std::string::npos)
+        {
+            result.emplace_back(it.getName().toString(), it.getValueAsString());
+        }
+    }
+    return result;
+}
+
+Names StorageKafka::parseTopics(String topic_list)
+{
+    Names result;
+    boost::split(result,topic_list,[](char c){ return c == ','; });
+    for (String & topic : result)
+    {
+        boost::trim(topic);
+    }
+    return result;
 }
 
 String StorageKafka::getDefaultClientId(const StorageID & table_id_)
@@ -176,6 +211,8 @@ Pipes StorageKafka::read(
     /// Always use all consumers at once, otherwise SELECT may not read messages from all partitions.
     Pipes pipes;
     pipes.reserve(num_created_consumers);
+    auto modified_context = std::make_shared<Context>(context);
+    modified_context->applySettingsChanges(settings_adjustments);
 
     // Claim as many consumers as requested, but don't block
     for (size_t i = 0; i < num_created_consumers; ++i)
@@ -184,7 +221,7 @@ Pipes StorageKafka::read(
         /// TODO: probably that leads to awful performance.
         /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages.
         /// TODO: rewrite KafkaBlockInputStream to KafkaSource. Now it is used in other place.
-        pipes.emplace_back(std::make_shared<SourceFromInputStream>(std::make_shared<KafkaBlockInputStream>(*this, context, column_names, 1)));
+        pipes.emplace_back(std::make_shared<SourceFromInputStream>(std::make_shared<KafkaBlockInputStream>(*this, modified_context, column_names, 1)));
     }
 
     LOG_DEBUG(log, "Starting reading {} streams", pipes.size());
@@ -194,9 +231,12 @@ Pipes StorageKafka::read(
 
 BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const Context & context)
 {
+    auto modified_context = std::make_shared<Context>(context);
+    modified_context->applySettingsChanges(settings_adjustments);
+
     if (topics.size() > 1)
         throw Exception("Can't write to Kafka table with multiple topics!", ErrorCodes::NOT_IMPLEMENTED);
-    return std::make_shared<KafkaBlockOutputStream>(*this, context);
+    return std::make_shared<KafkaBlockOutputStream>(*this, modified_context);
 }
 
 
@@ -268,13 +308,14 @@ ConsumerBufferPtr StorageKafka::popReadBuffer(std::chrono::milliseconds timeout)
     return buffer;
 }
 
-
 ProducerBufferPtr StorageKafka::createWriteBuffer(const Block & header)
 {
     cppkafka::Configuration conf;
     conf.set("metadata.broker.list", brokers);
     conf.set("group.id", group);
     conf.set("client.id", client_id);
+    conf.set("client.software.name", VERSION_NAME);
+    conf.set("client.software.version", VERSION_DESCRIBE);
     // TODO: fill required settings
     updateConfiguration(conf);
 
@@ -303,9 +344,16 @@ ConsumerBufferPtr StorageKafka::createReadBuffer(const size_t consumer_number)
     {
         conf.set("client.id", client_id);
     }
-
+    conf.set("client.software.name", VERSION_NAME);
+    conf.set("client.software.version", VERSION_DESCRIBE);
     conf.set("auto.offset.reset", "smallest");     // If no offset stored for this group, read all messages from the start
 
+    // that allows to prevent fast draining of the librdkafka queue
+    // during building of single insert block. Improves performance
+    // significantly, but may lead to bigger memory consumption.
+    size_t default_queued_min_messages = 100000; // we don't want to decrease the default
+    conf.set("queued.min.messages", std::max(getMaxBlockSize(),default_queued_min_messages));
+
     updateConfiguration(conf);
 
     // those settings should not be changed by users.
@@ -317,17 +365,32 @@ ConsumerBufferPtr StorageKafka::createReadBuffer(const size_t consumer_number)
     auto consumer = std::make_shared<cppkafka::Consumer>(conf);
     consumer->set_destroy_flags(RD_KAFKA_DESTROY_F_NO_CONSUMER_CLOSE);
 
-    // Limit the number of batched messages to allow early cancellations
-    const Settings & settings = global_context.getSettingsRef();
-    size_t batch_size = max_block_size;
-    if (!batch_size)
-        batch_size = settings.max_block_size.value;
-    size_t poll_timeout = settings.stream_poll_timeout_ms.totalMilliseconds();
-
     /// NOTE: we pass |stream_cancelled| by reference here, so the buffers should not outlive the storage.
-    return std::make_shared<ReadBufferFromKafkaConsumer>(consumer, log, batch_size, poll_timeout, intermediate_commit, stream_cancelled, getTopics());
+    return std::make_shared<ReadBufferFromKafkaConsumer>(consumer, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, stream_cancelled, topics);
 }
 
+size_t StorageKafka::getMaxBlockSize() const
+{
+    return kafka_settings->kafka_max_block_size.changed
+        ? kafka_settings->kafka_max_block_size.value
+        : (global_context.getSettingsRef().max_insert_block_size.value / num_consumers);
+}
+
+size_t StorageKafka::getPollMaxBatchSize() const
+{
+    size_t batch_size = kafka_settings->kafka_poll_max_batch_size.changed
+                        ? kafka_settings->kafka_poll_max_batch_size.value
+                        : global_context.getSettingsRef().max_block_size.value;
+
+    return std::min(batch_size,getMaxBlockSize());
+}
+
+size_t StorageKafka::getPollTimeoutMillisecond() const
+{
+    return kafka_settings->kafka_poll_timeout_ms.changed
+        ? kafka_settings->kafka_poll_timeout_ms.totalMilliseconds()
+        : global_context.getSettingsRef().stream_poll_timeout_ms.totalMilliseconds();
+}
 
 void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
 {
@@ -458,19 +521,17 @@ bool StorageKafka::streamToViews()
     auto insert = std::make_shared<ASTInsertQuery>();
     insert->table_id = table_id;
 
-    const Settings & settings = global_context.getSettingsRef();
-    size_t block_size = max_block_size;
-    if (block_size == 0)
-        block_size = settings.max_block_size;
+    size_t block_size = getMaxBlockSize();
 
     // Create a stream for each consumer and join them in a union stream
     // Only insert into dependent views and expect that input blocks contain virtual columns
-    InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true);
+    InterpreterInsertQuery interpreter(insert, *kafka_context, false, true, true);
     auto block_io = interpreter.execute();
 
     // Create a stream for each consumer and join them in a union stream
     BlockInputStreams streams;
     streams.reserve(num_created_consumers);
+
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
         auto stream
@@ -479,7 +540,11 @@ bool StorageKafka::streamToViews()
 
         // Limit read batch to maximum block size to allow DDL
         IBlockInputStream::LocalLimits limits;
-        limits.speed_limits.max_execution_time = settings.stream_flush_interval_ms;
+
+        limits.speed_limits.max_execution_time = kafka_settings->kafka_flush_interval_ms.changed
+                                                 ? kafka_settings->kafka_flush_interval_ms
+                                                 : global_context.getSettingsRef().stream_flush_interval_ms;
+
         limits.timeout_overflow_mode = OverflowMode::BREAK;
         stream->setLimits(limits);
     }
@@ -514,17 +579,61 @@ void registerStorageKafka(StorageFactory & factory)
         size_t args_count = engine_args.size();
         bool has_settings = args.storage_def->settings;
 
-        KafkaSettings kafka_settings;
+        auto kafka_settings = std::make_unique<KafkaSettings>();
         if (has_settings)
         {
-            kafka_settings.loadFromQuery(*args.storage_def);
+            kafka_settings->loadFromQuery(*args.storage_def);
         }
 
+        // Check arguments and settings
+        #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME, EVAL)       \
+            /* One of the four required arguments is not specified */       \
+            if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 &&                 \
+                !kafka_settings->PAR_NAME.changed)                          \
+            {                                                               \
+                throw Exception(                                            \
+                    "Required parameter '" #PAR_NAME "' "                   \
+                    "for storage Kafka not specified",                      \
+                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);          \
+            }                                                               \
+            if (args_count >= (ARG_NUM))                                    \
+            {                                                               \
+                /* The same argument is given in two places */              \
+                if (has_settings &&                                         \
+                    kafka_settings->PAR_NAME.changed)                       \
+                {                                                           \
+                    throw Exception(                                        \
+                        "The argument №" #ARG_NUM " of storage Kafka "      \
+                        "and the parameter '" #PAR_NAME "' "                \
+                        "in SETTINGS cannot be specified at the same time", \
+                        ErrorCodes::BAD_ARGUMENTS);                         \
+                }                                                           \
+                /* move engine args to settings */                          \
+                else                                                        \
+                {                                                           \
+                    if ((EVAL) == 1)                                        \
+                    {                                                       \
+                        engine_args[(ARG_NUM)-1] =                          \
+                            evaluateConstantExpressionAsLiteral(            \
+                                engine_args[(ARG_NUM)-1],                   \
+                                args.local_context);                        \
+                    }                                                       \
+                    if ((EVAL) == 2)                                        \
+                    {                                                       \
+                        engine_args[(ARG_NUM)-1] =                          \
+                           evaluateConstantExpressionOrIdentifierAsLiteral( \
+                                engine_args[(ARG_NUM)-1],                   \
+                                args.local_context);                        \
+                    }                                                       \
+                    kafka_settings->PAR_NAME.set(                           \
+                        engine_args[(ARG_NUM)-1]->as<ASTLiteral &>().value);\
+                }                                                           \
+            }
+
         /** Arguments of engine is following:
           * - Kafka broker list
           * - List of topics
           * - Group ID (may be a constaint expression with a string result)
-          * - Client ID
           * - Message format (string)
           * - Row delimiter
           * - Schema (optional, if the format supports it)
@@ -534,209 +643,32 @@ void registerStorageKafka(StorageFactory & factory)
           * - Do intermediate commits when the batch consumed and handled
           */
 
-        // Check arguments and settings
-        #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME)            \
-            /* One of the four required arguments is not specified */      \
-            if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 &&                    \
-                !kafka_settings.PAR_NAME.changed)                          \
-            {                                                              \
-                throw Exception(                                           \
-                    "Required parameter '" #PAR_NAME "' "                  \
-                    "for storage Kafka not specified",                     \
-                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);         \
-            }                                                              \
-            /* The same argument is given in two places */                 \
-            if (has_settings &&                                            \
-                kafka_settings.PAR_NAME.changed &&                         \
-                args_count >= (ARG_NUM))                                     \
-            {                                                              \
-                throw Exception(                                           \
-                    "The argument №" #ARG_NUM " of storage Kafka "         \
-                    "and the parameter '" #PAR_NAME "' "                   \
-                    "in SETTINGS cannot be specified at the same time",    \
-                    ErrorCodes::BAD_ARGUMENTS);                            \
-            }
-
-        CHECK_KAFKA_STORAGE_ARGUMENT(1, kafka_broker_list)
-        CHECK_KAFKA_STORAGE_ARGUMENT(2, kafka_topic_list)
-        CHECK_KAFKA_STORAGE_ARGUMENT(3, kafka_group_name)
-        CHECK_KAFKA_STORAGE_ARGUMENT(4, kafka_format)
-        CHECK_KAFKA_STORAGE_ARGUMENT(5, kafka_row_delimiter)
-        CHECK_KAFKA_STORAGE_ARGUMENT(6, kafka_schema)
-        CHECK_KAFKA_STORAGE_ARGUMENT(7, kafka_num_consumers)
-        CHECK_KAFKA_STORAGE_ARGUMENT(8, kafka_max_block_size)
-        CHECK_KAFKA_STORAGE_ARGUMENT(9, kafka_skip_broken_messages)
-        CHECK_KAFKA_STORAGE_ARGUMENT(10, kafka_commit_every_batch)
+        /* 0 = raw, 1 = evaluateConstantExpressionAsLiteral, 2=evaluateConstantExpressionOrIdentifierAsLiteral */
+        CHECK_KAFKA_STORAGE_ARGUMENT(1, kafka_broker_list, 0)
+        CHECK_KAFKA_STORAGE_ARGUMENT(2, kafka_topic_list, 1)
+        CHECK_KAFKA_STORAGE_ARGUMENT(3, kafka_group_name, 2)
+        CHECK_KAFKA_STORAGE_ARGUMENT(4, kafka_format, 2)
+        CHECK_KAFKA_STORAGE_ARGUMENT(5, kafka_row_delimiter, 2)
+        CHECK_KAFKA_STORAGE_ARGUMENT(6, kafka_schema, 2)
+        CHECK_KAFKA_STORAGE_ARGUMENT(7, kafka_num_consumers, 0)
+        CHECK_KAFKA_STORAGE_ARGUMENT(8, kafka_max_block_size, 0)
+        CHECK_KAFKA_STORAGE_ARGUMENT(9, kafka_skip_broken_messages, 0)
+        CHECK_KAFKA_STORAGE_ARGUMENT(10, kafka_commit_every_batch, 0)
 
         #undef CHECK_KAFKA_STORAGE_ARGUMENT
 
-        // Get and check broker list
-        String brokers = kafka_settings.kafka_broker_list;
-        if (args_count >= 1)
+        auto num_consumers = kafka_settings->kafka_num_consumers.value;
+
+        if (num_consumers > 16)
         {
-            const auto * ast = engine_args[0]->as<ASTLiteral>();
-            if (ast && ast->value.getType() == Field::Types::String)
-            {
-                brokers = safeGet<String>(ast->value);
-            }
-            else
-            {
-                throw Exception(String("Kafka broker list must be a string"), ErrorCodes::BAD_ARGUMENTS);
-            }
+            throw Exception("Number of consumers can not be bigger than 16", ErrorCodes::BAD_ARGUMENTS);
+        }
+        else if (num_consumers < 1)
+        {
+            throw Exception("Number of consumers can not be lower than 1", ErrorCodes::BAD_ARGUMENTS);
         }
 
-        // Get and check topic list
-        String topic_list = kafka_settings.kafka_topic_list.value;
-        if (args_count >= 2)
-        {
-            engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.local_context);
-            topic_list = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
-        }
-
-        Names topics;
-        boost::split(topics, topic_list , [](char c){ return c == ','; });
-        for (String & topic : topics)
-        {
-            boost::trim(topic);
-        }
-
-        // Get and check group name
-        String group = kafka_settings.kafka_group_name.value;
-        if (args_count >= 3)
-        {
-            engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context);
-            group = engine_args[2]->as<ASTLiteral &>().value.safeGet<String>();
-        }
-
-        // Get and check message format name
-        String format = kafka_settings.kafka_format.value;
-        if (args_count >= 4)
-        {
-            engine_args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[3], args.local_context);
-
-            const auto * ast = engine_args[3]->as<ASTLiteral>();
-            if (ast && ast->value.getType() == Field::Types::String)
-            {
-                format = safeGet<String>(ast->value);
-            }
-            else
-            {
-                throw Exception("Format must be a string", ErrorCodes::BAD_ARGUMENTS);
-            }
-        }
-
-        // Parse row delimiter (optional)
-        char row_delimiter = kafka_settings.kafka_row_delimiter;
-        if (args_count >= 5)
-        {
-            engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], args.local_context);
-
-            const auto * ast = engine_args[4]->as<ASTLiteral>();
-            String arg;
-            if (ast && ast->value.getType() == Field::Types::String)
-            {
-                arg = safeGet<String>(ast->value);
-            }
-            else
-            {
-                throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS);
-            }
-            if (arg.size() > 1)
-            {
-                throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS);
-            }
-            else if (arg.empty())
-            {
-                row_delimiter = '\0';
-            }
-            else
-            {
-                row_delimiter = arg[0];
-            }
-        }
-
-        // Parse format schema if supported (optional)
-        String schema = kafka_settings.kafka_schema.value;
-        if (args_count >= 6)
-        {
-            engine_args[5] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[5], args.local_context);
-
-            const auto * ast = engine_args[5]->as<ASTLiteral>();
-            if (ast && ast->value.getType() == Field::Types::String)
-            {
-                schema = safeGet<String>(ast->value);
-            }
-            else
-            {
-                throw Exception("Format schema must be a string", ErrorCodes::BAD_ARGUMENTS);
-            }
-        }
-
-        // Parse number of consumers (optional)
-        UInt64 num_consumers = kafka_settings.kafka_num_consumers;
-        if (args_count >= 7)
-        {
-            const auto * ast = engine_args[6]->as<ASTLiteral>();
-            if (ast && ast->value.getType() == Field::Types::UInt64)
-            {
-                num_consumers = safeGet<UInt64>(ast->value);
-            }
-            else
-            {
-                throw Exception("Number of consumers must be a positive integer", ErrorCodes::BAD_ARGUMENTS);
-            }
-        }
-
-        // Parse max block size (optional)
-        UInt64 max_block_size = static_cast<size_t>(kafka_settings.kafka_max_block_size);
-        if (args_count >= 8)
-        {
-            const auto * ast = engine_args[7]->as<ASTLiteral>();
-            if (ast && ast->value.getType() == Field::Types::UInt64)
-            {
-                max_block_size = static_cast<size_t>(safeGet<UInt64>(ast->value));
-            }
-            else
-            {
-                // TODO: no check if the integer is really positive
-                throw Exception("Maximum block size must be a positive integer", ErrorCodes::BAD_ARGUMENTS);
-            }
-        }
-
-        size_t skip_broken = static_cast<size_t>(kafka_settings.kafka_skip_broken_messages);
-        if (args_count >= 9)
-        {
-            const auto * ast = engine_args[8]->as<ASTLiteral>();
-            if (ast && ast->value.getType() == Field::Types::UInt64)
-            {
-                skip_broken = static_cast<size_t>(safeGet<UInt64>(ast->value));
-            }
-            else
-            {
-                throw Exception("Number of broken messages to skip must be a non-negative integer", ErrorCodes::BAD_ARGUMENTS);
-            }
-        }
-
-        bool intermediate_commit = static_cast<bool>(kafka_settings.kafka_commit_every_batch);
-        if (args_count >= 10)
-        {
-            const auto * ast = engine_args[9]->as<ASTLiteral>();
-            if (ast && ast->value.getType() == Field::Types::UInt64)
-            {
-                intermediate_commit = static_cast<bool>(safeGet<UInt64>(ast->value));
-            }
-            else
-            {
-                throw Exception("Flag for committing every batch must be 0 or 1", ErrorCodes::BAD_ARGUMENTS);
-            }
-        }
-
-        // Get and check client id
-        String client_id = kafka_settings.kafka_client_id.value;
-
-        return StorageKafka::create(
-            args.table_id, args.context, args.columns,
-            brokers, group, client_id, topics, format, row_delimiter, schema, num_consumers, max_block_size, skip_broken, intermediate_commit);
+        return StorageKafka::create(args.table_id, args.context, args.columns, std::move(kafka_settings));
     };
 
     factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });
diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h
index 1ea7d6dcad7..be3f89687fe 100644
--- a/src/Storages/Kafka/StorageKafka.h
+++ b/src/Storages/Kafka/StorageKafka.h
@@ -3,6 +3,7 @@
 #include <Core/BackgroundSchedulePool.h>
 #include <Storages/IStorage.h>
 #include <Storages/Kafka/Buffer_fwd.h>
+#include <Storages/Kafka/KafkaSettings.h>
 #include <Interpreters/Context.h>
 
 #include <Poco/Semaphore.h>
@@ -54,10 +55,7 @@ public:
 
     ProducerBufferPtr createWriteBuffer(const Block & header);
 
-    const auto & getTopics() const { return topics; }
     const auto & getFormatName() const { return format_name; }
-    const auto & getSchemaName() const { return schema_name; }
-    const auto & skipBroken() const { return skip_broken; }
 
     NamesAndTypesList getVirtuals() const override;
 protected:
@@ -65,58 +63,53 @@ protected:
         const StorageID & table_id_,
         Context & context_,
         const ColumnsDescription & columns_,
-        const String & brokers_,
-        const String & group_,
-        const String & client_id_,
-        const Names & topics_,
-        const String & format_name_,
-        char row_delimiter_,
-        const String & schema_name_,
-        size_t num_consumers_,
-        UInt64 max_block_size_,
-        size_t skip_broken,
-        bool intermediate_commit_);
+        std::unique_ptr<KafkaSettings> kafka_settings_);
 
 private:
     // Configuration and state
-    Context global_context;
-    Context kafka_context;
-    Names topics;
+    Context & global_context;
+    std::shared_ptr<Context> kafka_context;
+    std::unique_ptr<KafkaSettings> kafka_settings;
+    const Names topics;
     const String brokers;
     const String group;
     const String client_id;
     const String format_name;
-    char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy.
+    const char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy.
     const String schema_name;
-    size_t num_consumers; /// total number of consumers
-    UInt64 max_block_size; /// maximum block size for insertion into this table
+    const size_t num_consumers; /// total number of consumers
+    Poco::Logger * log;
+    Poco::Semaphore semaphore;
+    const bool intermediate_commit;
+    const SettingsChanges settings_adjustments;
 
     /// Can differ from num_consumers in case of exception in startup() (or if startup() hasn't been called).
     /// In this case we still need to be able to shutdown() properly.
     size_t num_created_consumers = 0; /// number of actually created consumers.
 
-    Poco::Logger * log;
-
-    // Consumer list
-    Poco::Semaphore semaphore;
-    std::mutex mutex;
     std::vector<ConsumerBufferPtr> buffers; /// available buffers for Kafka consumers
 
-    size_t skip_broken;
-
-    bool intermediate_commit;
+    std::mutex mutex;
 
     // Stream thread
     BackgroundSchedulePool::TaskHolder task;
     std::atomic<bool> stream_cancelled{false};
 
+    SettingsChanges createSettingsAdjustments();
     ConsumerBufferPtr createReadBuffer(const size_t consumer_number);
 
     // Update Kafka configuration with values from CH user configuration.
-    void updateConfiguration(cppkafka::Configuration & conf);
 
+    void updateConfiguration(cppkafka::Configuration & conf);
     void threadFunc();
+
+    size_t getPollMaxBatchSize() const;
+    size_t getMaxBlockSize() const;
+    size_t getPollTimeoutMillisecond() const;
+
+    static Names parseTopics(String topic_list);
     static String getDefaultClientId(const StorageID & table_id_);
+
     bool streamToViews();
     bool checkDependencies(const StorageID & table_id);
 };
diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h
index a21a9374137..2a9d7766fd7 100644
--- a/src/Storages/LiveView/StorageBlocks.h
+++ b/src/Storages/LiveView/StorageBlocks.h
@@ -26,6 +26,11 @@ public:
         return std::make_shared<StorageBlocks>(table_id, columns, std::move(pipes), to_stage);
     }
     std::string getName() const override { return "Blocks"; }
+    /// It is passed inside the query and solved at its level.
+    bool supportsPrewhere() const override { return true; }
+    bool supportsSampling() const override { return true; }
+    bool supportsFinal() const override { return true; }
+
     QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override { return to_stage; }
 
     Pipes read(
diff --git a/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/src/Storages/MergeTree/BackgroundProcessingPool.cpp
index 8f6d7c19549..ec062d3d138 100644
--- a/src/Storages/MergeTree/BackgroundProcessingPool.cpp
+++ b/src/Storages/MergeTree/BackgroundProcessingPool.cpp
@@ -16,30 +16,24 @@
 namespace DB
 {
 
-void BackgroundProcessingPoolTaskInfo::wake()
+void BackgroundProcessingPoolTaskInfo::signalReadyToRun()
 {
     Poco::Timestamp current_time;
-
     {
         std::unique_lock lock(pool.tasks_mutex);
 
-        /// This will ensure that iterator is valid. Must be done under the same mutex when the iterator is invalidated.
+        /// This check ensures that the iterator is valid. Must be performed under the same mutex as invalidation.
         if (removed)
             return;
 
-        auto next_time_to_execute = iterator->first;
-        auto this_task_handle = iterator->second;
+        /// If this task did nothing the previous time and still should sleep, then reschedule to cancel the sleep.
+        const auto & scheduled_time = iterator->first;
+        if (scheduled_time > current_time)
+            pool.rescheduleTask(iterator, current_time);
 
-        /// If this task was done nothing at previous time and it has to sleep, then cancel sleep time.
-        if (next_time_to_execute > current_time)
-            next_time_to_execute = current_time;
-
-        pool.tasks.erase(iterator);
-        iterator = pool.tasks.emplace(next_time_to_execute, this_task_handle);
+        /// Note that if all threads are currently busy doing their work, this call will not wakeup any thread.
+        pool.wake_event.notify_one();
     }
-
-    /// Note that if all threads are currently do some work, this call will not wakeup any thread.
-    pool.wake_event.notify_one();
 }
 
 
@@ -56,7 +50,7 @@ BackgroundProcessingPool::BackgroundProcessingPool(int size_,
 
     threads.resize(size);
     for (auto & thread : threads)
-        thread = ThreadFromGlobalPool([this] { threadFunction(); });
+        thread = ThreadFromGlobalPool([this] { workLoopFunc(); });
 }
 
 
@@ -65,16 +59,19 @@ BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::createTask(const
     return std::make_shared<TaskInfo>(*this, task);
 }
 
-void BackgroundProcessingPool::startTask(const TaskHandle & task)
+void BackgroundProcessingPool::startTask(const TaskHandle & task, bool allow_execute_in_parallel)
 {
     Poco::Timestamp current_time;
 
+    task->allow_execute_in_parallel = allow_execute_in_parallel;
+
     {
         std::unique_lock lock(tasks_mutex);
         task->iterator = tasks.emplace(current_time, task);
+
+        wake_event.notify_all();
     }
 
-    wake_event.notify_all();
 }
 
 BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::addTask(const Task & task)
@@ -105,8 +102,12 @@ BackgroundProcessingPool::~BackgroundProcessingPool()
 {
     try
     {
-        shutdown = true;
-        wake_event.notify_all();
+        {
+            std::lock_guard lock(tasks_mutex);
+            shutdown = true;
+            wake_event.notify_all();
+        }
+
         for (auto & thread : threads)
             thread.join();
     }
@@ -117,7 +118,7 @@ BackgroundProcessingPool::~BackgroundProcessingPool()
 }
 
 
-void BackgroundProcessingPool::threadFunction()
+void BackgroundProcessingPool::workLoopFunc()
 {
     setThreadName(thread_name);
 
@@ -137,80 +138,82 @@ void BackgroundProcessingPool::threadFunction()
     }
 
     SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
-    if (auto * memory_tracker = CurrentThread::getMemoryTracker())
+    if (auto * const memory_tracker = CurrentThread::getMemoryTracker())
         memory_tracker->setMetric(settings.memory_metric);
 
     pcg64 rng(randomSeed());
     std::this_thread::sleep_for(std::chrono::duration<double>(std::uniform_real_distribution<double>(0, settings.thread_sleep_seconds_random_part)(rng)));
 
-    while (!shutdown)
+    Poco::Timestamp scheduled_task_start_time;
+
+    while (true)
     {
         TaskResult task_result = TaskResult::ERROR;
         TaskHandle task;
 
-        try
         {
-            Poco::Timestamp min_time;
+            std::unique_lock lock(tasks_mutex);
 
+            while (!task && !shutdown)
             {
-                std::unique_lock lock(tasks_mutex);
-
-                if (!tasks.empty())
+                for (const auto & [time, handle] : tasks)
                 {
-                    for (const auto & time_handle : tasks)
+                    if (!handle->removed
+                        && (handle->allow_execute_in_parallel || handle->concurrent_executors == 0))
                     {
-                        if (!time_handle.second->removed)
-                        {
-                            min_time = time_handle.first;
-                            task = time_handle.second;
-                            break;
-                        }
+                        task = handle;
+                        scheduled_task_start_time = time;
+                        ++task->concurrent_executors;
+                        break;
                     }
                 }
+
+                if (task)
+                {
+                    Poco::Timestamp current_time;
+
+                    if (scheduled_task_start_time <= current_time)
+                        continue;
+
+                    wake_event.wait_for(lock,
+                        std::chrono::microseconds(scheduled_task_start_time - current_time
+                            + std::uniform_int_distribution<uint64_t>(0, settings.thread_sleep_seconds_random_part * 1000000)(rng)));
+                }
+                else
+                {
+                    wake_event.wait_for(lock,
+                        std::chrono::duration<double>(settings.thread_sleep_seconds
+                            + std::uniform_real_distribution<double>(0, settings.thread_sleep_seconds_random_part)(rng)));
+                }
             }
 
             if (shutdown)
                 break;
+        }
 
-            if (!task)
-            {
-                std::unique_lock lock(tasks_mutex);
-                wake_event.wait_for(lock,
-                    std::chrono::duration<double>(settings.thread_sleep_seconds
-                        + std::uniform_real_distribution<double>(0, settings.thread_sleep_seconds_random_part)(rng)));
-                continue;
-            }
+        std::shared_lock rlock(task->rwlock);
 
-            /// No tasks ready for execution.
-            Poco::Timestamp current_time;
-            if (min_time > current_time)
-            {
-                std::unique_lock lock(tasks_mutex);
-                wake_event.wait_for(lock, std::chrono::microseconds(
-                    min_time - current_time + std::uniform_int_distribution<uint64_t>(0, settings.thread_sleep_seconds_random_part * 1000000)(rng)));
-            }
+        if (task->removed)
+            continue;
 
-            std::shared_lock rlock(task->rwlock);
-
-            if (task->removed)
-                continue;
-
-            {
-                CurrentMetrics::Increment metric_increment{settings.tasks_metric};
-                task_result = task->function();
-            }
+        try
+        {
+            CurrentMetrics::Increment metric_increment{settings.tasks_metric};
+            task_result = task->task_function();
         }
         catch (...)
         {
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
 
-        if (shutdown)
-            break;
-
         {
             std::unique_lock lock(tasks_mutex);
 
+            if (shutdown)
+                break;
+
+            --task->concurrent_executors;
+
             if (task->removed)
                 continue;
 
@@ -231,8 +234,7 @@ void BackgroundProcessingPool::threadFunction()
             else if (task_result == TaskResult::NOTHING_TO_DO)
                 next_time_to_execute += 1000000 * settings.thread_sleep_seconds_if_nothing_to_do;
 
-            tasks.erase(task->iterator);
-            task->iterator = tasks.emplace(next_time_to_execute, task);
+            rescheduleTask(task->iterator, next_time_to_execute);
         }
     }
 }
diff --git a/src/Storages/MergeTree/BackgroundProcessingPool.h b/src/Storages/MergeTree/BackgroundProcessingPool.h
index 526cab0800e..8bed696ab2c 100644
--- a/src/Storages/MergeTree/BackgroundProcessingPool.h
+++ b/src/Storages/MergeTree/BackgroundProcessingPool.h
@@ -85,11 +85,13 @@ public:
     /// Create task and start it.
     TaskHandle addTask(const Task & task);
 
+    /// The following two methods are invoked by Storage*MergeTree at startup
     /// Create task but not start it.
     TaskHandle createTask(const Task & task);
     /// Start the task that was created but not started. Precondition: task was not started.
-    void startTask(const TaskHandle & task);
+    void startTask(const TaskHandle & task, bool allow_execute_in_parallel = true);
 
+    /// Invoked by Storage*MergeTree at shutdown
     void removeTask(const TaskHandle & task);
 
     ~BackgroundProcessingPool();
@@ -109,13 +111,20 @@ protected:
 
     Threads threads;
 
-    std::atomic<bool> shutdown {false};
+    bool shutdown{false};
     std::condition_variable wake_event;
 
     /// Thread group used for profiling purposes
     ThreadGroupStatusPtr thread_group;
 
-    void threadFunction();
+    void workLoopFunc();
+
+    void rescheduleTask(Tasks::iterator & task_it, const Poco::Timestamp & new_scheduled_ts)
+    {
+        auto node_handle = tasks.extract(task_it);
+        node_handle.key() = new_scheduled_ts;
+        task_it = tasks.insert(std::move(node_handle));
+    }
 
 private:
     PoolSettings settings;
@@ -125,23 +134,29 @@ private:
 class BackgroundProcessingPoolTaskInfo
 {
 public:
-    /// Wake up any thread.
-    void wake();
+    /// Signals random idle thread from the pool that this task is ready to be executed.
+    void signalReadyToRun();
 
     BackgroundProcessingPoolTaskInfo(BackgroundProcessingPool & pool_, const BackgroundProcessingPool::Task & function_)
-        : pool(pool_), function(function_) {}
+        : pool(pool_), task_function(function_) {}
 
 protected:
     friend class BackgroundProcessingPool;
 
     BackgroundProcessingPool & pool;
-    BackgroundProcessingPool::Task function;
+    BackgroundProcessingPool::Task task_function;
 
-    /// Read lock is hold when task is executed.
+    /// Read lock is held while task is being executed.
+    /// Write lock is used for stopping BGProcPool
     std::shared_mutex rwlock;
+
+    bool allow_execute_in_parallel = false;
+    size_t concurrent_executors = 0;
+
+    /// Signals that this task must no longer be planned for execution and is about to be removed
     std::atomic<bool> removed {false};
 
-    std::multimap<Poco::Timestamp, std::shared_ptr<BackgroundProcessingPoolTaskInfo>>::iterator iterator;
+    BackgroundProcessingPool::Tasks::iterator iterator;
 
     /// For exponential backoff.
     size_t count_no_work_done = 0;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index d98457f7f4b..f5ca0fee070 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -107,6 +107,7 @@ public:
     virtual ~IMergeTreeDataPart();
 
     using ColumnToSize = std::map<std::string, UInt64>;
+    /// Populates columns_to_size map (compressed size).
     void accumulateColumnSizes(ColumnToSize & /* column_to_size */) const;
 
     Type getType() const { return part_type; }
@@ -117,6 +118,7 @@ public:
 
     const NamesAndTypesList & getColumns() const { return columns; }
 
+    /// Throws an exception if part is not stored in on-disk format.
     void assertOnDisk() const;
 
     void remove() const;
@@ -161,6 +163,8 @@ public:
 
     VolumePtr volume;
 
+    /// A directory path (relative to storage's path) where part data is actually stored
+    /// Examples: 'detached/tmp_fetch_<name>', 'tmp_<name>', '<name>'
     mutable String relative_path;
     MergeTreeIndexGranularityInfo index_granularity_info;
 
@@ -290,10 +294,21 @@ public:
     void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; }
 
     size_t getFileSizeOrZero(const String & file_name) const;
+
+    /// Returns path to part dir relatively to disk mount point
     String getFullRelativePath() const;
+
+    /// Returns full path to part dir
     String getFullPath() const;
-    void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = false) const;
+
+    /// Makes checks and move part to new directory
+    /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
+    void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = true) const;
+
+    /// Moves a part to detached/ directory and adds prefix to its name
     void renameToDetached(const String & prefix) const;
+
+    /// Makes clone of a part in detached/ directory via hard links
     void makeCloneInDetached(const String & prefix) const;
 
     /// Makes full clone of part in detached/ on another disk
@@ -306,6 +321,7 @@ public:
     /// storage and pass it to this method.
     virtual bool hasColumnFiles(const String & /* column */, const IDataType & /* type */) const{ return false; }
 
+    /// Calculate the total size of the entire directory with all the files
     static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk_, const String & from);
     void calculateColumnsSizesOnDisk();
 
@@ -358,6 +374,7 @@ private:
 
     void loadPartitionAndMinMaxIndex();
 
+    /// Generate unique path to detach part
     String getRelativePathForDetachedPart(const String & prefix) const;
 };
 
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index ee381709dd4..dad73b6a003 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -849,7 +849,7 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont
             || const_value.getType() == Field::Types::Float64)
         {
             /// Zero in all types is represented in memory the same way as in UInt64.
-            out.function = const_value.get<UInt64>()
+            out.function = const_value.safeGet<UInt64>()
                 ? RPNElement::ALWAYS_TRUE
                 : RPNElement::ALWAYS_FALSE;
 
diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
index be3caf98ad4..b6376dd3779 100644
--- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
@@ -28,7 +28,7 @@ void MergeTreeBlockOutputStream::write(const Block & block)
 
         /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'.
         if (storage.merging_mutating_task_handle)
-            storage.merging_mutating_task_handle->wake();
+            storage.merging_mutating_task_handle->signalReadyToRun();
     }
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 63d163a593e..b399584f4d9 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1,5 +1,4 @@
 #include <Compression/CompressedReadBuffer.h>
-#include <DataStreams/ExpressionBlockInputStream.h>
 #include <DataStreams/copyData.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeDate.h>
@@ -1866,7 +1865,7 @@ void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVect
             part->remove_time.store(remove_time, std::memory_order_relaxed);
 
         if (part->state != IMergeTreeDataPart::State::Outdated)
-            modifyPartState(part,IMergeTreeDataPart::State::Outdated);
+            modifyPartState(part, IMergeTreeDataPart::State::Outdated);
     }
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 1e81bb6d0a8..c1fc8184206 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -637,9 +637,9 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
             reader_settings,
             result_projection);
     }
-    else if (settings.optimize_read_in_order && query_info.input_sorting_info)
+    else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.input_order_info)
     {
-        size_t prefix_size = query_info.input_sorting_info->order_key_prefix_descr.size();
+        size_t prefix_size = query_info.input_order_info->order_key_prefix_descr.size();
         auto order_key_prefix_ast = data.getSortingKey().expression_list_ast->clone();
         order_key_prefix_ast->children.resize(prefix_size);
 
@@ -855,7 +855,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     ExpressionActionsPtr & out_projection) const
 {
     size_t sum_marks = 0;
-    const InputSortingInfoPtr & input_sorting_info = query_info.input_sorting_info;
+    const InputOrderInfoPtr & input_order_info = query_info.input_order_info;
+
     size_t adaptive_parts = 0;
     std::vector<size_t> sum_marks_in_parts(parts.size());
     const auto data_settings = data.getSettings();
@@ -998,10 +999,9 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
                 }
                 parts.emplace_back(part);
             }
+            ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction);
 
-            ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_sorting_info->direction);
-
-            if (input_sorting_info->direction == 1)
+            if (input_order_info->direction == 1)
             {
                 pipes.emplace_back(std::make_shared<MergeTreeSelectProcessor>(
                     data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
@@ -1024,9 +1024,9 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
         if (pipes.size() > 1)
         {
             SortDescription sort_description;
-            for (size_t j = 0; j < input_sorting_info->order_key_prefix_descr.size(); ++j)
+            for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j)
                 sort_description.emplace_back(data.getSortingKey().column_names[j],
-                    input_sorting_info->direction, 1);
+                      input_order_info->direction, 1);
 
             /// Drop temporary columns, added by 'sorting_key_prefix_expr'
             out_projection = createProjection(pipes.back(), data);
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 4ea7ddda738..a6dec4816bf 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -559,7 +559,7 @@ void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, C
         }
 
         if (storage.queue_task_handle)
-            storage.queue_task_handle->wake();
+            storage.queue_task_handle->signalReadyToRun();
     }
 }
 
@@ -641,7 +641,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C
     }
 
     if (some_active_mutations_were_killed)
-        storage.queue_task_handle->wake();
+        storage.queue_task_handle->signalReadyToRun();
 
     if (!entries_to_load.empty())
     {
@@ -754,7 +754,7 @@ ReplicatedMergeTreeMutationEntryPtr ReplicatedMergeTreeQueue::removeMutation(
     }
 
     if (mutation_was_active)
-        storage.queue_task_handle->wake();
+        storage.queue_task_handle->signalReadyToRun();
 
     return entry;
 }
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 26a02f8904c..e08ea1739a5 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -205,121 +205,32 @@ static void setGraphitePatternsFromConfig(const Context & context,
 }
 
 
-static String getMergeTreeVerboseHelp(bool is_extended_syntax)
+static String getMergeTreeVerboseHelp(bool)
 {
     using namespace std::string_literals;
 
     String help = R"(
 
-MergeTree is a family of storage engines.
+Syntax for the MergeTree table engine:
 
-MergeTrees are different in two ways:
-- they may be replicated and non-replicated;
-- they may do different actions on merge: nothing; sign collapse; sum; apply aggregete functions.
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
+    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
+    ...
+    INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
+    INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
+) ENGINE = MergeTree()
+ORDER BY expr
+[PARTITION BY expr]
+[PRIMARY KEY expr]
+[SAMPLE BY expr]
+[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
+[SETTINGS name=value, ...]
 
-So we have 14 combinations:
-    MergeTree, CollapsingMergeTree, SummingMergeTree, AggregatingMergeTree, ReplacingMergeTree, GraphiteMergeTree, VersionedCollapsingMergeTree
-    ReplicatedMergeTree, ReplicatedCollapsingMergeTree, ReplicatedSummingMergeTree, ReplicatedAggregatingMergeTree, ReplicatedReplacingMergeTree, ReplicatedGraphiteMergeTree, ReplicatedVersionedCollapsingMergeTree
+See details in documentation: https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/. Other engines of the family support different syntax, see details in the corresponding documentation topics.
 
-In most of cases, you need MergeTree or ReplicatedMergeTree.
-
-For replicated merge trees, you need to supply a path in ZooKeeper and a replica name as the first two parameters.
-Path in ZooKeeper is like '/clickhouse/tables/01/' where /clickhouse/tables/ is a common prefix and 01 is a shard name.
-Replica name is like 'mtstat01-1' - it may be the hostname or any suitable string identifying replica.
-You may use macro substitutions for these parameters. It's like ReplicatedMergeTree('/clickhouse/tables/{shard}/', '{replica}'...
-Look at the <macros> section in server configuration file.
-)";
-
-    if (!is_extended_syntax)
-        help += R"(
-Next parameter (which is the first for unreplicated tables and the third for replicated tables) is the name of date column.
-Date column must exist in the table and have type Date (not DateTime).
-It is used for internal data partitioning and works like some kind of index.
-
-If your source data doesn't have a column of type Date, but has a DateTime column, you may add values for Date column while loading,
-    or you may INSERT your source data to a table of type Log and then transform it with INSERT INTO t SELECT toDate(time) AS date, * FROM ...
-If your source data doesn't have any date or time, you may just pass any constant for a date column while loading.
-
-Next parameter is optional sampling expression. Sampling expression is used to implement SAMPLE clause in query for approximate query execution.
-If you don't need approximate query execution, simply omit this parameter.
-Sample expression must be one of the elements of the primary key tuple. For example, if your primary key is (CounterID, EventDate, intHash64(UserID)), your sampling expression might be intHash64(UserID).
-
-Next parameter is the primary key tuple. It's like (CounterID, EventDate, intHash64(UserID)) - a list of column names or functional expressions in round brackets. If your primary key has just one element, you may omit round brackets.
-
-Careful choice of the primary key is extremely important for processing short-time queries.
-
-Next parameter is index (primary key) granularity. Good value is 8192. You have no reasons to use any other value.
-)";
-
-    help += R"(
-For the Collapsing mode, the )" + (is_extended_syntax ? "only"s : "last"s) + R"( parameter is the name of a sign column - a special column that is used to 'collapse' rows with the same primary key while merging.
-
-For the Summing mode, the optional )" + (is_extended_syntax ? ""s : "last "s) + R"(parameter is a list of columns to sum while merging. This list is passed in round brackets, like (PageViews, Cost).
-If this parameter is omitted, the storage will sum all numeric columns except columns participating in the primary key.
-
-For the Replacing mode, the optional )" + (is_extended_syntax ? ""s : "last "s) + R"(parameter is the name of a 'version' column. While merging, for all rows with the same primary key, only one row is selected: the last row, if the version column was not specified, or the last row with the maximum version value, if specified.
-
-For VersionedCollapsing mode, the )" + (is_extended_syntax ? ""s : "last "s) + R"(2 parameters are the name of a sign column and the name of a 'version' column. Version column must be in primary key. While merging, a pair of rows with the same primary key and different sign may collapse.
-)";
-
-    if (is_extended_syntax)
-        help += R"(
-You can specify a partitioning expression in the PARTITION BY clause. It is optional but highly recommended.
-A common partitioning expression is some function of the event date column e.g. PARTITION BY toYYYYMM(EventDate) will partition the table by month.
-Rows with different partition expression values are never merged together. That allows manipulating partitions with ALTER commands.
-Also it acts as a kind of index.
-
-Sorting key is specified in the ORDER BY clause. It is mandatory for all MergeTree types.
-It is like (CounterID, EventDate, intHash64(UserID)) - a list of column names or functional expressions
-in round brackets.
-If your sorting key has just one element, you may omit round brackets.
-
-By default primary key is equal to the sorting key. You can specify a primary key that is a prefix of the
-sorting key in the PRIMARY KEY clause.
-
-Careful choice of the primary key is extremely important for processing short-time queries.
-
-Optional sampling expression can be specified in the SAMPLE BY clause. It is used to implement the SAMPLE clause in a SELECT query for approximate query execution.
-Sampling expression must be one of the elements of the primary key tuple. For example, if your primary key is (CounterID, EventDate, intHash64(UserID)), your sampling expression might be intHash64(UserID).
-
-Engine settings can be specified in the SETTINGS clause. Full list is in the source code in the 'src/Storages/MergeTree/MergeTreeSettings.h' file.
-E.g. you can specify the index (primary key) granularity with SETTINGS index_granularity = 8192.
-
-Examples:
-
-MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity = 8192
-
-MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) SAMPLE BY intHash32(UserID)
-
-MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) PRIMARY KEY (CounterID, EventDate) SAMPLE BY intHash32(UserID)
-
-CollapsingMergeTree(Sign) PARTITION BY StartDate SAMPLE BY intHash32(UserID) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
-
-SummingMergeTree PARTITION BY toMonday(EventDate) ORDER BY (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo)
-
-SummingMergeTree((Shows, Clicks, Cost, CostCur, ShowsSumPosition, ClicksSumPosition, SessionNum, SessionLen, SessionCost, GoalsNum, SessionDepth)) PARTITION BY toYYYYMM(EventDate) ORDER BY (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo)
-
-ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}') PARTITION BY EventDate ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) SAMPLE BY intHash32(UserID)
-)";
-    else
-        help += R"(
-Examples:
-
-MergeTree(EventDate, (CounterID, EventDate), 8192)
-
-MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
-
-CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
-
-SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192)
-
-SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192, (Shows, Clicks, Cost, CostCur, ShowsSumPosition, ClicksSumPosition, SessionNum, SessionLen, SessionCost, GoalsNum, SessionDepth))
-
-ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
-)";
-
-    help += R"(
-For further info please read the documentation: https://clickhouse.yandex/
+If you use the Replicated version of engines, see https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/.
 )";
 
     return help;
diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp
index 5bbe5be9928..bfdbd7ef557 100644
--- a/src/Storages/ReadInOrderOptimizer.cpp
+++ b/src/Storages/ReadInOrderOptimizer.cpp
@@ -30,7 +30,7 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
         forbidden_columns.insert(elem.first);
 }
 
-InputSortingInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const
+InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const
 {
     Names sorting_key_columns;
     if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get()))
@@ -122,7 +122,7 @@ InputSortingInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & stora
     if (order_key_prefix_descr.empty())
         return {};
 
-    return std::make_shared<InputSortingInfo>(std::move(order_key_prefix_descr), read_direction);
+    return std::make_shared<InputOrderInfo>(std::move(order_key_prefix_descr), read_direction);
 }
 
 }
diff --git a/src/Storages/ReadInOrderOptimizer.h b/src/Storages/ReadInOrderOptimizer.h
index 8416d23a912..de858e8fd92 100644
--- a/src/Storages/ReadInOrderOptimizer.h
+++ b/src/Storages/ReadInOrderOptimizer.h
@@ -20,10 +20,10 @@ public:
         const SortDescription & required_sort_description,
         const SyntaxAnalyzerResultPtr & syntax_result);
 
-    InputSortingInfoPtr getInputOrder(const StoragePtr & storage) const;
+    InputOrderInfoPtr getInputOrder(const StoragePtr & storage) const;
 
 private:
-    /// Actions for every element of order expression to analyze functions for monotonicicy
+    /// Actions for every element of order expression to analyze functions for monotonicity
     ManyExpressionActions elements_actions;
     NameSet forbidden_columns;
     SortDescription required_sort_description;
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index 84cf3a32aa1..c4cd1035ea7 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -2,6 +2,7 @@
 
 #include <Interpreters/PreparedSets.h>
 #include <Core/SortDescription.h>
+#include <Core/Names.h>
 #include <memory>
 
 namespace DB
@@ -35,25 +36,25 @@ struct FilterInfo
     bool do_remove_column = false;
 };
 
-struct InputSortingInfo
+struct InputOrderInfo
 {
     SortDescription order_key_prefix_descr;
     int direction;
 
-    InputSortingInfo(const SortDescription & order_key_prefix_descr_, int direction_)
+    InputOrderInfo(const SortDescription & order_key_prefix_descr_, int direction_)
         : order_key_prefix_descr(order_key_prefix_descr_), direction(direction_) {}
 
-    bool operator ==(const InputSortingInfo & other) const
+    bool operator ==(const InputOrderInfo & other) const
     {
         return order_key_prefix_descr == other.order_key_prefix_descr && direction == other.direction;
     }
 
-    bool operator !=(const InputSortingInfo & other) const { return !(*this == other); }
+    bool operator !=(const InputOrderInfo & other) const { return !(*this == other); }
 };
 
 using PrewhereInfoPtr = std::shared_ptr<PrewhereInfo>;
 using FilterInfoPtr = std::shared_ptr<FilterInfo>;
-using InputSortingInfoPtr = std::shared_ptr<const InputSortingInfo>;
+using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
 
 struct SyntaxAnalyzerResult;
 using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
@@ -61,6 +62,7 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
 class ReadInOrderOptimizer;
 using ReadInOrderOptimizerPtr = std::shared_ptr<const ReadInOrderOptimizer>;
 
+
 /** Query along with some additional data,
   *  that can be used during query processing
   *  inside storage engines.
@@ -73,9 +75,9 @@ struct SelectQueryInfo
 
     PrewhereInfoPtr prewhere_info;
 
-    ReadInOrderOptimizerPtr order_by_optimizer;
+    ReadInOrderOptimizerPtr order_optimizer;
     /// We can modify it while reading from storage
-    mutable InputSortingInfoPtr input_sorting_info;
+    mutable InputOrderInfoPtr input_order_info;
 
     /// Prepared sets are used for indices by storage engine.
     /// Example: x IN (1, 2, 3)
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index 77f2ba1c288..2d8c3fd9a2f 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -171,8 +171,8 @@ Pipes StorageBuffer::read(
 
         if (dst_has_same_structure)
         {
-            if (query_info.order_by_optimizer)
-                query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(destination);
+            if (query_info.order_optimizer)
+                query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination);
 
             /// The destination table has the same structure of the requested columns and we can simply read blocks from there.
             pipes_from_dst = destination->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);
@@ -450,7 +450,6 @@ void StorageBuffer::startup()
         LOG_WARNING(log, "Storage {} is run with readonly settings, it will not be able to insert data. Set appropriate system_profile to fix this.", getName());
     }
 
-
     flush_handle = bg_pool.createTask(log->name() + "/Bg", [this]{ flushBack(); });
     flush_handle->activateAndSchedule();
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 2b10311d8cf..d80fee1e4dc 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -9,7 +9,6 @@
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypesNumber.h>
 
-#include <Storages/Distributed/DirectoryMonitor.h>
 #include <Storages/Distributed/DistributedBlockOutputStream.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/AlterCommands.h>
@@ -649,13 +648,23 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(
     auto & node_data = cluster_nodes_data[key];
     if (!node_data.directory_monitor)
     {
-        node_data.conneciton_pool = StorageDistributedDirectoryMonitor::createPool(name, *this);
+        node_data.connection_pool = StorageDistributedDirectoryMonitor::createPool(name, *this);
         node_data.directory_monitor = std::make_unique<StorageDistributedDirectoryMonitor>(
-            *this, path, node_data.conneciton_pool, monitors_blocker, global_context->getDistributedSchedulePool());
+            *this, path, node_data.connection_pool, monitors_blocker, global_context->getDistributedSchedulePool());
     }
     return *node_data.directory_monitor;
 }
 
+std::vector<StorageDistributedDirectoryMonitor::Status> StorageDistributed::getDirectoryMonitorsStatuses() const
+{
+    std::vector<StorageDistributedDirectoryMonitor::Status> statuses;
+    std::lock_guard lock(cluster_nodes_mutex);
+    statuses.reserve(cluster_nodes_data.size());
+    for (const auto & node : cluster_nodes_data)
+        statuses.push_back(node.second.directory_monitor->getStatus());
+    return statuses;
+}
+
 size_t StorageDistributed::getShardCount() const
 {
     return getCluster()->getShardCount();
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index a7e3a073af4..4067012c449 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -3,6 +3,7 @@
 #include <ext/shared_ptr_helper.h>
 
 #include <Storages/IStorage.h>
+#include <Storages/Distributed/DirectoryMonitor.h>
 #include <Common/SimpleIncrement.h>
 #include <Client/ConnectionPool.h>
 #include <Client/ConnectionPoolWithFailover.h>
@@ -17,7 +18,6 @@ namespace DB
 {
 
 class Context;
-class StorageDistributedDirectoryMonitor;
 
 class VolumeJBOD;
 using VolumeJBODPtr = std::shared_ptr<VolumeJBOD>;
@@ -107,6 +107,9 @@ public:
     void createDirectoryMonitors(const std::string & disk);
     /// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
     StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const std::string & disk, const std::string & name);
+    /// Return list of metrics for all created monitors
+    /// (note that monitors are created lazily, i.e. until at least one INSERT executed)
+    std::vector<StorageDistributedDirectoryMonitor::Status> getDirectoryMonitorsStatuses() const;
 
     void flushClusterNodesAllData();
 
@@ -181,13 +184,13 @@ protected:
     struct ClusterNodeData
     {
         std::unique_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
-        ConnectionPoolPtr conneciton_pool;
+        ConnectionPoolPtr connection_pool;
 
         void flushAllData() const;
         void shutdownAndDropAllData() const;
     };
     std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
-    std::mutex cluster_nodes_mutex;
+    mutable std::mutex cluster_nodes_mutex;
 
 };
 
diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp
index 562ea5c9486..352e0a43f39 100644
--- a/src/Storages/StorageHDFS.cpp
+++ b/src/Storages/StorageHDFS.cpp
@@ -15,7 +15,6 @@
 #include <Formats/FormatFactory.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataStreams/IBlockOutputStream.h>
-#include <DataStreams/UnionBlockInputStream.h>
 #include <DataStreams/OwningBlockInputStream.h>
 #include <DataStreams/IBlockInputStream.h>
 #include <DataStreams/narrowBlockInputStreams.h>
@@ -28,6 +27,7 @@
 #include <Processors/Sources/SourceWithProgress.h>
 #include <Processors/Pipe.h>
 
+
 namespace DB
 {
 namespace ErrorCodes
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 1b8c6acb49f..f7233d67eca 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -180,8 +180,8 @@ Pipes StorageMaterializedView::read(
     auto lock = storage->lockStructureForShare(
             false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
-    if (query_info.order_by_optimizer)
-        query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
+    if (query_info.order_optimizer)
+        query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage);
 
     Pipes pipes = storage->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);
 
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index baf54a1d50a..64d0f11f853 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -172,12 +172,12 @@ Pipes StorageMerge::read(
     num_streams *= num_streams_multiplier;
     size_t remaining_streams = num_streams;
 
-    InputSortingInfoPtr input_sorting_info;
-    if (query_info.order_by_optimizer)
+    InputOrderInfoPtr input_sorting_info;
+    if (query_info.order_optimizer)
     {
         for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
         {
-            auto current_info = query_info.order_by_optimizer->getInputOrder(std::get<0>(*it));
+            auto current_info = query_info.order_optimizer->getInputOrder(std::get<0>(*it));
             if (it == selected_tables.begin())
                 input_sorting_info = current_info;
             else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
@@ -187,7 +187,7 @@ Pipes StorageMerge::read(
                 break;
         }
 
-        query_info.input_sorting_info = input_sorting_info;
+        query_info.input_order_info = input_sorting_info;
     }
 
     for (const auto & table : selected_tables)
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 47b1c3d2837..15e662b27b5 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -95,16 +95,36 @@ void StorageMergeTree::startup()
     /// NOTE background task will also do the above cleanups periodically.
     time_after_previous_cleanup.restart();
 
-    auto & merge_pool = global_context.getBackgroundPool();
-    merging_mutating_task_handle = merge_pool.createTask([this] { return mergeMutateTask(); });
-    /// Ensure that thread started only after assignment to 'merging_mutating_task_handle' is done.
-    merge_pool.startTask(merging_mutating_task_handle);
-
-    if (areBackgroundMovesNeeded())
+    try
     {
-        auto & move_pool = global_context.getBackgroundMovePool();
-        moving_task_handle = move_pool.createTask([this] { return movePartsTask(); });
-        move_pool.startTask(moving_task_handle);
+        auto & merge_pool = global_context.getBackgroundPool();
+        merging_mutating_task_handle = merge_pool.createTask([this] { return mergeMutateTask(); });
+        /// Ensure that thread started only after assignment to 'merging_mutating_task_handle' is done.
+        merge_pool.startTask(merging_mutating_task_handle);
+
+        if (areBackgroundMovesNeeded())
+        {
+            auto & move_pool = global_context.getBackgroundMovePool();
+            moving_task_handle = move_pool.createTask([this] { return movePartsTask(); });
+            move_pool.startTask(moving_task_handle);
+        }
+    }
+    catch (...)
+    {
+        /// Exception safety: failed "startup" does not require a call to "shutdown" from the caller.
+        /// And it should be able to safely destroy table after exception in "startup" method.
+        /// It means that failed "startup" must not create any background tasks that we will have to wait.
+        try
+        {
+            shutdown();
+        }
+        catch (...)
+        {
+            std::terminate();
+        }
+
+        /// Note: after failed "startup", the table will be in a state that only allows to destroy the object.
+        throw;
     }
 }
 
@@ -121,16 +141,6 @@ void StorageMergeTree::shutdown()
         mutation_wait_event.notify_all();
     }
 
-    try
-    {
-        clearOldPartsFromFilesystem(true);
-    }
-    catch (...)
-    {
-        /// Example: the case of readonly filesystem, we have failure removing old parts.
-        /// Should not prevent table shutdown.
-        tryLogCurrentException(log);
-    }
 
     merger_mutator.merges_blocker.cancelForever();
     parts_mover.moves_blocker.cancelForever();
@@ -140,6 +150,23 @@ void StorageMergeTree::shutdown()
 
     if (moving_task_handle)
         global_context.getBackgroundMovePool().removeTask(moving_task_handle);
+
+
+    try
+    {
+        /// We clear all old parts after stopping all background operations.
+        /// It's important, because background operations can produce temporary
+        /// parts which will remove themselves in their descrutors. If so, we
+        /// may have race condition between our remove call and background
+        /// process.
+        clearOldPartsFromFilesystem(true);
+    }
+    catch (...)
+    {
+        /// Example: the case of readonly filesystem, we have failure removing old parts.
+        /// Should not prevent table shutdown.
+        tryLogCurrentException(log);
+    }
 }
 
 
@@ -390,7 +417,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, String
     current_mutations_by_version.emplace(version, insertion.first->second);
 
     LOG_INFO(log, "Added mutation: {}", mutation_file_name);
-    merging_mutating_task_handle->wake();
+    merging_mutating_task_handle->signalReadyToRun();
     return version;
 }
 
@@ -523,7 +550,7 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id)
     }
 
     /// Maybe there is another mutation that was blocked by the killed one. Try to execute it immediately.
-    merging_mutating_task_handle->wake();
+    merging_mutating_task_handle->signalReadyToRun();
 
     return CancellationCode::CancelSent;
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 95e36ef24ad..d109fa464b0 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2907,45 +2907,60 @@ void StorageReplicatedMergeTree::startup()
     if (is_readonly)
         return;
 
-    queue.initialize(
-        zookeeper_path, replica_path,
-        getStorageID().getFullTableName() + " (ReplicatedMergeTreeQueue)",
-        getDataParts());
-
-    data_parts_exchange_endpoint = std::make_shared<DataPartsExchange::Service>(*this);
-    global_context.getInterserverIOHandler().addEndpoint(data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint);
-
-    /// In this thread replica will be activated.
-    restarting_thread.start();
-
-    /// Wait while restarting_thread initializes LeaderElection (and so on) or makes first attmept to do it
-    startup_event.wait();
-
-    /// If we don't separate create/start steps, race condition will happen
-    /// between the assignment of queue_task_handle and queueTask that use the queue_task_handle.
+    try
     {
-        auto lock = queue.lockQueue();
-        auto & pool = global_context.getBackgroundPool();
-        queue_task_handle = pool.createTask([this] { return queueTask(); });
-        pool.startTask(queue_task_handle);
-    }
+        queue.initialize(
+            zookeeper_path, replica_path,
+            getStorageID().getFullTableName() + " (ReplicatedMergeTreeQueue)",
+            getDataParts());
 
-    if (areBackgroundMovesNeeded())
-    {
-        auto & pool = global_context.getBackgroundMovePool();
-        move_parts_task_handle = pool.createTask([this] { return movePartsTask(); });
-        pool.startTask(move_parts_task_handle);
+        data_parts_exchange_endpoint = std::make_shared<DataPartsExchange::Service>(*this);
+        global_context.getInterserverIOHandler().addEndpoint(data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint);
+
+        /// In this thread replica will be activated.
+        restarting_thread.start();
+
+        /// Wait while restarting_thread initializes LeaderElection (and so on) or makes first attmept to do it
+        startup_event.wait();
+
+        /// If we don't separate create/start steps, race condition will happen
+        /// between the assignment of queue_task_handle and queueTask that use the queue_task_handle.
+        {
+            auto lock = queue.lockQueue();
+            auto & pool = global_context.getBackgroundPool();
+            queue_task_handle = pool.createTask([this] { return queueTask(); });
+            pool.startTask(queue_task_handle);
+        }
+
+        if (areBackgroundMovesNeeded())
+        {
+            auto & pool = global_context.getBackgroundMovePool();
+            move_parts_task_handle = pool.createTask([this] { return movePartsTask(); });
+            pool.startTask(move_parts_task_handle);
+        }
+    }
+    catch (...)
+    {
+        /// Exception safety: failed "startup" does not require a call to "shutdown" from the caller.
+        /// And it should be able to safely destroy table after exception in "startup" method.
+        /// It means that failed "startup" must not create any background tasks that we will have to wait.
+        try
+        {
+            shutdown();
+        }
+        catch (...)
+        {
+            std::terminate();
+        }
+
+        /// Note: after failed "startup", the table will be in a state that only allows to destroy the object.
+        throw;
     }
-    need_shutdown.store(true);
 }
 
 
 void StorageReplicatedMergeTree::shutdown()
 {
-    if (!need_shutdown.load())
-        return;
-
-    clearOldPartsFromFilesystem(true);
     /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP.
     fetcher.blocker.cancelForever();
     merger_mutator.merges_blocker.cancelForever();
@@ -2981,7 +2996,12 @@ void StorageReplicatedMergeTree::shutdown()
         std::unique_lock lock(data_parts_exchange_endpoint->rwlock);
     }
     data_parts_exchange_endpoint.reset();
-    need_shutdown.store(false);
+
+    /// We clear all old parts after stopping all background operations. It's
+    /// important, because background operations can produce temporary parts
+    /// which will remove themselves in their descrutors. If so, we may have
+    /// race condition between our remove call and background process.
+    clearOldPartsFromFilesystem(true);
 }
 
 
@@ -5308,7 +5328,7 @@ bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UI
     queue.pullLogsToQueue(getZooKeeper());
     /// This is significant, because the execution of this task could be delayed at BackgroundPool.
     /// And we force it to be executed.
-    queue_task_handle->wake();
+    queue_task_handle->signalReadyToRun();
 
     Poco::Event target_size_event;
     auto callback = [&target_size_event, queue_size] (size_t new_queue_size)
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index f01e51bd769..b82b387a623 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -288,8 +288,6 @@ private:
     /// True if replica was created for existing table with fixed granularity
     bool other_replicas_fixed_granularity = false;
 
-    std::atomic_bool need_shutdown{false};
-
     template <class Func>
     void foreachCommittedParts(const Func & func) const;
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index e8fd89c4505..397d064ba15 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -5,6 +5,7 @@
 #include <IO/S3Common.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageS3.h>
+#include <Storages/StorageS3Settings.h>
 
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
@@ -23,6 +24,7 @@
 
 #include <DataTypes/DataTypeString.h>
 
+#include <aws/core/auth/AWSCredentials.h>
 #include <aws/s3/S3Client.h>
 #include <aws/s3/model/ListObjectsV2Request.h>
 
@@ -33,6 +35,8 @@
 #include <Processors/Sources/SourceWithProgress.h>
 #include <Processors/Pipe.h>
 
+#include <sstream>
+
 
 namespace DB
 {
@@ -200,18 +204,24 @@ StorageS3::StorageS3(
     , format_name(format_name_)
     , min_upload_part_size(min_upload_part_size_)
     , compression_method(compression_method_)
-    , client(S3::ClientFactory::instance().create(uri_.endpoint, uri_.is_virtual_hosted_style, access_key_id_, secret_access_key_))
 {
     context_global.getRemoteHostFilter().checkURL(uri_.uri);
     setColumns(columns_);
     setConstraints(constraints_);
+
+    auto settings = context_.getStorageS3Settings().getSettings(uri.endpoint);
+    Aws::Auth::AWSCredentials credentials(access_key_id_, secret_access_key_);
+    if (access_key_id_.empty())
+        credentials = Aws::Auth::AWSCredentials(std::move(settings.access_key_id), std::move(settings.secret_access_key));
+
+    client = S3::ClientFactory::instance().create(
+        uri_.endpoint, uri_.is_virtual_hosted_style, access_key_id_, secret_access_key_, std::move(settings.headers));
 }
 
 
 namespace
 {
-
-/* "Recursive" directory listing with matched paths as a result.
+    /* "Recursive" directory listing with matched paths as a result.
  * Have the same method in StorageFile.
  */
 Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & globbed_uri)
@@ -241,11 +251,17 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI &
         outcome = client.ListObjectsV2(request);
         if (!outcome.IsSuccess())
         {
-            throw Exception("Could not list objects in bucket " + quoteString(request.GetBucket())
-                    + " with prefix " + quoteString(request.GetPrefix())
-                    + ", page " + std::to_string(page)
-                    + ", S3 exception " + outcome.GetError().GetExceptionName() + " " + outcome.GetError().GetMessage()
-                , ErrorCodes::S3_ERROR);
+            std::ostringstream message;
+            message << "Could not list objects in bucket " << quoteString(request.GetBucket())
+                << " with prefix " << quoteString(request.GetPrefix());
+
+            if (page > 1)
+                message << ", page " << std::to_string(page);
+
+            message << ", S3 exception: " + backQuote(outcome.GetError().GetExceptionName())
+                << ", message: " + quoteString(outcome.GetError().GetMessage());
+
+            throw Exception(message.str(), ErrorCodes::S3_ERROR);
         }
 
         for (const auto & row : outcome.GetResult().GetContents())
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
new file mode 100644
index 00000000000..5b443de6b9a
--- /dev/null
+++ b/src/Storages/StorageS3Settings.cpp
@@ -0,0 +1,57 @@
+#include <Storages/StorageS3Settings.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/Exception.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int INVALID_CONFIG_PARAMETER;
+}
+
+void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config)
+{
+    std::lock_guard lock(mutex);
+    settings.clear();
+    if (!config.has(config_elem))
+        return;
+
+    Poco::Util::AbstractConfiguration::Keys config_keys;
+    config.keys(config_elem, config_keys);
+
+    for (const String & key : config_keys)
+    {
+        auto endpoint = config.getString(config_elem + "." + key + ".endpoint");
+        auto access_key_id = config.getString(config_elem + "." + key + ".access_key_id", "");
+        auto secret_access_key = config.getString(config_elem + "." + key + ".secret_access_key", "");
+
+        HeaderCollection headers;
+        Poco::Util::AbstractConfiguration::Keys subconfig_keys;
+        config.keys(config_elem + "." + key, subconfig_keys);
+        for (const String & subkey : subconfig_keys)
+        {
+            if (subkey.starts_with("header"))
+            {
+                auto header_str = config.getString(config_elem + "." + key + "." + subkey);
+                auto delimiter = header_str.find(':');
+                if (delimiter == String::npos)
+                    throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER);
+                headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)});
+            }
+        }
+
+        settings.emplace(endpoint, S3AuthSettings{std::move(access_key_id), std::move(secret_access_key), std::move(headers)});
+    }
+}
+
+S3AuthSettings StorageS3Settings::getSettings(const String & endpoint) const
+{
+    std::lock_guard lock(mutex);
+    if (auto setting = settings.find(endpoint); setting != settings.end())
+        return setting->second;
+    return {};
+}
+
+}
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
new file mode 100644
index 00000000000..ac31928a240
--- /dev/null
+++ b/src/Storages/StorageS3Settings.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <map>
+#include <memory>
+#include <mutex>
+#include <Core/Types.h>
+
+namespace Poco::Util
+{
+class AbstractConfiguration;
+}
+
+namespace DB
+{
+
+struct HttpHeader
+{
+    const String name;
+    const String value;
+};
+
+using HeaderCollection = std::vector<HttpHeader>;
+
+struct S3AuthSettings
+{
+    const String access_key_id;
+    const String secret_access_key;
+
+    const HeaderCollection headers;
+};
+
+/// Settings for the StorageS3.
+class StorageS3Settings
+{
+public:
+    StorageS3Settings() = default;
+    void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config);
+
+    S3AuthSettings getSettings(const String & endpoint) const;
+
+private:
+    mutable std::mutex mutex;
+    std::map<const String, const S3AuthSettings> settings;
+};
+
+}
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index c6b37a50aa9..97403a359c3 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -124,7 +124,7 @@ ASTPtr StorageView::getRuntimeViewQuery(ASTSelectQuery * outer_query, const Cont
     /// TODO: remove getTableExpressions and getTablesWithColumns
     {
         const auto & table_expressions = getTableExpressions(*outer_query);
-        const auto & tables_with_columns = getDatabaseAndTablesWithColumnNames(table_expressions, context);
+        const auto & tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
 
         replaceTableNameWithSubquery(outer_query, runtime_view_query);
         if (context.getSettingsRef().joined_subquery_requires_alias && tables_with_columns.size() > 1)
diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
index 9f73c00d22b..97998e11ea5 100644
--- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
+++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
@@ -44,14 +44,14 @@ const char * auto_config_build[]
     "USE_RDKAFKA", "@USE_RDKAFKA@",
     "USE_CAPNP", "@USE_CAPNP@",
     "USE_BASE64", "@USE_BASE64@",
-    "USE_XXHASH", "@USE_XXHASH@",
+    "USE_XXHASH", "@USE_INTERNAL_LZ4_LIBRARY@",
     "USE_HDFS", "@USE_HDFS@",
     "USE_SNAPPY", "@USE_SNAPPY@",
     "USE_PARQUET", "@USE_PARQUET@",
     "USE_PROTOBUF", "@USE_PROTOBUF@",
     "USE_BROTLI", "@USE_BROTLI@",
     "USE_SSL", "@USE_SSL@",
-    "USE_HYPERSCAN", "@USE_HYPERSCAN@",
+    "USE_HYPERSCAN", "@ENABLE_HYPERSCAN@",
     "USE_SIMDJSON", "@USE_SIMDJSON@",
     "USE_GRPC", "@USE_GRPC@",
 
diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp
new file mode 100644
index 00000000000..2459be0ba71
--- /dev/null
+++ b/src/Storages/System/StorageSystemDistributionQueue.cpp
@@ -0,0 +1,120 @@
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Storages/System/StorageSystemDistributionQueue.h>
+#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/StorageDistributed.h>
+#include <Storages/VirtualColumnUtils.h>
+#include <Access/ContextAccess.h>
+#include <Common/typeid_cast.h>
+#include <Databases/IDatabase.h>
+
+
+namespace DB
+{
+
+
+NamesAndTypesList StorageSystemDistributionQueue::getNamesAndTypes()
+{
+    return {
+        { "database",              std::make_shared<DataTypeString>() },
+        { "table",                 std::make_shared<DataTypeString>() },
+        { "data_path",             std::make_shared<DataTypeString>() },
+        { "is_blocked",            std::make_shared<DataTypeUInt8>()  },
+        { "error_count",           std::make_shared<DataTypeUInt64>() },
+        { "data_files",            std::make_shared<DataTypeUInt64>() },
+        { "data_compressed_bytes", std::make_shared<DataTypeUInt64>() },
+        { "last_exception",        std::make_shared<DataTypeString>() },
+    };
+}
+
+
+void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const
+{
+    const auto access = context.getAccess();
+    const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES);
+
+    std::map<String, std::map<String, StoragePtr>> tables;
+    for (const auto & db : DatabaseCatalog::instance().getDatabases())
+    {
+        /// Lazy database can not contain distributed tables
+        if (db.second->getEngineName() == "Lazy")
+            continue;
+
+        const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first);
+
+        for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next())
+        {
+            StoragePtr table = iterator->table();
+            if (!table)
+                continue;
+
+            if (!dynamic_cast<const StorageDistributed *>(table.get()))
+                continue;
+            if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, db.first, iterator->name()))
+                continue;
+            tables[db.first][iterator->name()] = table;
+        }
+    }
+
+
+    MutableColumnPtr col_database_mut = ColumnString::create();
+    MutableColumnPtr col_table_mut = ColumnString::create();
+
+    for (auto & db : tables)
+    {
+        for (auto & table : db.second)
+        {
+            col_database_mut->insert(db.first);
+            col_table_mut->insert(table.first);
+        }
+    }
+
+    ColumnPtr col_database_to_filter = std::move(col_database_mut);
+    ColumnPtr col_table_to_filter = std::move(col_table_mut);
+
+    /// Determine what tables are needed by the conditions in the query.
+    {
+        Block filtered_block
+        {
+            { col_database_to_filter, std::make_shared<DataTypeString>(), "database" },
+            { col_table_to_filter, std::make_shared<DataTypeString>(), "table" },
+        };
+
+        VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context);
+
+        if (!filtered_block.rows())
+            return;
+
+        col_database_to_filter = filtered_block.getByName("database").column;
+        col_table_to_filter = filtered_block.getByName("table").column;
+    }
+
+    for (size_t i = 0, tables_size = col_database_to_filter->size(); i < tables_size; ++i)
+    {
+        String database = (*col_database_to_filter)[i].safeGet<const String &>();
+        String table = (*col_table_to_filter)[i].safeGet<const String &>();
+
+        auto & distributed_table = dynamic_cast<StorageDistributed &>(*tables[database][table]);
+
+        for (const auto & status : distributed_table.getDirectoryMonitorsStatuses())
+        {
+            size_t col_num = 0;
+            res_columns[col_num++]->insert(database);
+            res_columns[col_num++]->insert(table);
+            res_columns[col_num++]->insert(status.path);
+            res_columns[col_num++]->insert(status.is_blocked);
+            res_columns[col_num++]->insert(status.error_count);
+            res_columns[col_num++]->insert(status.files_count);
+            res_columns[col_num++]->insert(status.bytes_count);
+
+            if (status.last_exception)
+                res_columns[col_num++]->insert(getExceptionMessage(status.last_exception, false));
+            else
+                res_columns[col_num++]->insertDefault();
+        }
+    }
+}
+
+}
diff --git a/src/Storages/System/StorageSystemDistributionQueue.h b/src/Storages/System/StorageSystemDistributionQueue.h
new file mode 100644
index 00000000000..88e7fa45cf5
--- /dev/null
+++ b/src/Storages/System/StorageSystemDistributionQueue.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <ext/shared_ptr_helper.h>
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+
+namespace DB
+{
+
+class Context;
+
+
+/** Implements the `distribution_queue` system table, which allows you to view the INSERT queues for the Distributed tables.
+  */
+class StorageSystemDistributionQueue final : public ext::shared_ptr_helper<StorageSystemDistributionQueue>, public IStorageSystemOneBlock<StorageSystemDistributionQueue>
+{
+    friend struct ext::shared_ptr_helper<StorageSystemDistributionQueue>;
+public:
+    std::string getName() const override { return "SystemDistributionQueue"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+
+protected:
+    using IStorageSystemOneBlock::IStorageSystemOneBlock;
+
+    void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index 585eab2b4d8..2b52f0fe5cc 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -28,6 +28,7 @@
 #include <Storages/System/StorageSystemProcesses.h>
 #include <Storages/System/StorageSystemReplicas.h>
 #include <Storages/System/StorageSystemReplicationQueue.h>
+#include <Storages/System/StorageSystemDistributionQueue.h>
 #include <Storages/System/StorageSystemSettings.h>
 #include <Storages/System/StorageSystemMergeTreeSettings.h>
 #include <Storages/System/StorageSystemTableEngines.h>
@@ -124,6 +125,7 @@ void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper)
     system_database.attachTable("mutations", StorageSystemMutations::create("mutations"));
     system_database.attachTable("replicas", StorageSystemReplicas::create("replicas"));
     system_database.attachTable("replication_queue", StorageSystemReplicationQueue::create("replication_queue"));
+    system_database.attachTable("distribution_queue", StorageSystemDistributionQueue::create("distribution_queue"));
     system_database.attachTable("dictionaries", StorageSystemDictionaries::create("dictionaries"));
     system_database.attachTable("models", StorageSystemModels::create("models"));
     system_database.attachTable("clusters", StorageSystemClusters::create("clusters"));
diff --git a/src/Storages/tests/CMakeLists.txt b/src/Storages/tests/CMakeLists.txt
index 80dd4c8419c..292f7603838 100644
--- a/src/Storages/tests/CMakeLists.txt
+++ b/src/Storages/tests/CMakeLists.txt
@@ -1,9 +1,3 @@
-add_executable (system_numbers system_numbers.cpp)
-target_link_libraries (system_numbers PRIVATE dbms clickhouse_storages_system clickhouse_common_io)
-
-add_executable (storage_log storage_log.cpp)
-target_link_libraries (storage_log PRIVATE dbms)
-
 add_executable (part_name part_name.cpp)
 target_link_libraries (part_name PRIVATE dbms)
 
@@ -23,7 +17,14 @@ add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_a
 target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper)
 
 add_executable (transform_part_zk_nodes transform_part_zk_nodes.cpp)
-target_link_libraries (transform_part_zk_nodes PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries (transform_part_zk_nodes
+    PRIVATE
+        boost::program_options
+        clickhouse_common_config
+        clickhouse_common_zookeeper
+        dbms
+        string_utils
+)
 
 if (ENABLE_FUZZING)
     add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp)
diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp
index bf86322a676..318d667d9b0 100644
--- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp
+++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp
@@ -18,6 +18,8 @@ using namespace DB;
 /// NOTE How to do better?
 struct State
 {
+    State(const State&) = delete;
+
     Context context;
     NamesAndTypesList columns{
         {"column", std::make_shared<DataTypeUInt8>()},
@@ -27,23 +29,24 @@ struct State
         {"create_time", std::make_shared<DataTypeDateTime>()},
     };
 
+    static const State & instance()
+    {
+        static State state;
+        return state;
+    }
+
+private:
     explicit State()
         : context(getContext().context)
     {
         registerFunctions();
         DatabasePtr database = std::make_shared<DatabaseMemory>("test", context);
         database->attachTable("table", StorageMemory::create(StorageID("test", "table"), ColumnsDescription{columns}, ConstraintsDescription{}));
-        context.makeGlobalContext();
         DatabaseCatalog::instance().attachDatabase("test", database);
         context.setCurrentDatabase("test");
     }
 };
 
-State getState()
-{
-    static State state;
-    return state;
-}
 
 static void check(const std::string & query, const std::string & expected, const Context & context, const NamesAndTypesList & columns)
 {
@@ -60,7 +63,7 @@ static void check(const std::string & query, const std::string & expected, const
 
 TEST(TransformQueryForExternalDatabase, InWithSingleElement)
 {
-    const State & state = getState();
+    const State & state = State::instance();
 
     check("SELECT column FROM test.table WHERE 1 IN (1)",
           R"(SELECT "column" FROM "test"."table" WHERE 1)",
@@ -75,7 +78,7 @@ TEST(TransformQueryForExternalDatabase, InWithSingleElement)
 
 TEST(TransformQueryForExternalDatabase, Like)
 {
-    const State & state = getState();
+    const State & state = State::instance();
 
     check("SELECT column FROM test.table WHERE column LIKE '%hello%'",
           R"(SELECT "column" FROM "test"."table" WHERE "column" LIKE '%hello%')",
@@ -87,7 +90,7 @@ TEST(TransformQueryForExternalDatabase, Like)
 
 TEST(TransformQueryForExternalDatabase, Substring)
 {
-    const State & state = getState();
+    const State & state = State::instance();
 
     check("SELECT column FROM test.table WHERE left(column, 10) = RIGHT(column, 10) AND SUBSTRING(column FROM 1 FOR 2) = 'Hello'",
           R"(SELECT "column" FROM "test"."table")",
@@ -96,7 +99,7 @@ TEST(TransformQueryForExternalDatabase, Substring)
 
 TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries)
 {
-    const State & state = getState();
+    const State & state = State::instance();
 
     check("SELECT column FROM test.table WHERE 1 = 1 AND toString(column) = '42' AND column = 42 AND left(column, 10) = RIGHT(column, 10) AND column IN (1, 42) AND SUBSTRING(column FROM 1 FOR 2) = 'Hello' AND column != 4",
           R"(SELECT "column" FROM "test"."table" WHERE 1 AND ("column" = 42) AND ("column" IN (1, 42)) AND ("column" != 4))",
@@ -108,7 +111,7 @@ TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries)
 
 TEST(TransformQueryForExternalDatabase, Issue7245)
 {
-    const State & state = getState();
+    const State & state = State::instance();
 
     check("select apply_id from test.table where apply_type = 2 and create_time > addDays(toDateTime('2019-01-01 01:02:03'),-7) and apply_status in (3,4)",
           R"(SELECT "apply_id", "apply_type", "apply_status", "create_time" FROM "test"."table" WHERE ("apply_type" = 2) AND ("create_time" > '2018-12-25 01:02:03') AND ("apply_status" IN (3, 4)))",
diff --git a/src/Storages/tests/storage_log.cpp b/src/Storages/tests/storage_log.cpp
deleted file mode 100644
index 1e1ab4c682f..00000000000
--- a/src/Storages/tests/storage_log.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <iostream>
-
-#include <IO/WriteBufferFromOStream.h>
-#include <Storages/StorageLog.h>
-#include <Formats/FormatFactory.h>
-#include <DataStreams/LimitBlockInputStream.h>
-#include <DataStreams/IBlockOutputStream.h>
-#include <DataStreams/copyData.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Columns/ColumnsNumber.h>
-#include <Interpreters/Context.h>
-#include <Common/typeid_cast.h>
-#include <Disks/DiskLocal.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-int main(int, char **)
-try
-{
-    using namespace DB;
-
-    const size_t rows = 10000000;
-
-    /// create table with a pair of columns
-
-    NamesAndTypesList names_and_types;
-    names_and_types.emplace_back("a", std::make_shared<DataTypeUInt64>());
-    names_and_types.emplace_back("b", std::make_shared<DataTypeUInt8>());
-
-    SharedContextHolder shared_context = Context::createShared();
-    auto context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-    context.setPath("./");
-
-    DiskPtr disk = std::make_unique<DiskLocal>("default", "./", 0);
-    StoragePtr table = StorageLog::create(disk, "table/", StorageID("test", "test"), ColumnsDescription{names_and_types}, ConstraintsDescription{}, 1048576);
-
-    table->startup();
-
-    /// write into it
-    {
-        Block block;
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "a";
-            column.type = table->getColumns().getPhysical("a").type;
-            auto col = column.type->createColumn();
-            ColumnUInt64::Container & vec = typeid_cast<ColumnUInt64 &>(*col).getData();
-
-            vec.resize(rows);
-            for (size_t i = 0; i < rows; ++i)
-                vec[i] = i;
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        {
-            ColumnWithTypeAndName column;
-            column.name = "b";
-            column.type = table->getColumns().getPhysical("b").type;
-            auto col = column.type->createColumn();
-            ColumnUInt8::Container & vec = typeid_cast<ColumnUInt8 &>(*col).getData();
-
-            vec.resize(rows);
-            for (size_t i = 0; i < rows; ++i)
-                vec[i] = i * 2;
-
-            column.column = std::move(col);
-            block.insert(column);
-        }
-
-        BlockOutputStreamPtr out = table->write({}, context);
-        out->write(block);
-    }
-
-    /// read from it
-    {
-        Names column_names;
-        column_names.push_back("a");
-        column_names.push_back("b");
-
-        QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-
-        BlockInputStreamPtr in = std::make_shared<TreeExecutorBlockInputStream>(std::move(table->read(column_names, {}, context, stage, 8192, 1)[0]));
-
-        Block sample;
-        {
-            ColumnWithTypeAndName col;
-            col.type = std::make_shared<DataTypeUInt64>();
-            sample.insert(std::move(col));
-        }
-        {
-            ColumnWithTypeAndName col;
-            col.type = std::make_shared<DataTypeUInt8>();
-            sample.insert(std::move(col));
-        }
-
-        WriteBufferFromOStream out_buf(std::cout);
-
-        LimitBlockInputStream in_limit(in, 10, 0);
-        BlockOutputStreamPtr output = FormatFactory::instance().getOutput("TabSeparated", out_buf, sample, context);
-
-        copyData(in_limit, *output);
-    }
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    return 1;
-}
diff --git a/src/Storages/tests/system_numbers.cpp b/src/Storages/tests/system_numbers.cpp
deleted file mode 100644
index 6955c90b74e..00000000000
--- a/src/Storages/tests/system_numbers.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <iostream>
-
-#include <IO/WriteBufferFromOStream.h>
-#include <Storages/System/StorageSystemNumbers.h>
-#include <DataStreams/LimitBlockInputStream.h>
-#include <Formats/FormatFactory.h>
-#include <DataStreams/copyData.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Interpreters/Context.h>
-#include <Processors/Executors/TreeExecutorBlockInputStream.h>
-
-
-int main(int, char **)
-try
-{
-    using namespace DB;
-
-    StoragePtr table = StorageSystemNumbers::create(StorageID("test", "numbers"), false);
-
-    Names column_names;
-    column_names.push_back("number");
-
-    Block sample;
-    ColumnWithTypeAndName col;
-    col.type = std::make_shared<DataTypeUInt64>();
-    sample.insert(std::move(col));
-
-    WriteBufferFromOStream out_buf(std::cout);
-
-    SharedContextHolder shared_context = Context::createShared();
-    auto context = Context::createGlobal(shared_context.get());
-    context.makeGlobalContext();
-    QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context);
-
-    auto stream = std::make_shared<TreeExecutorBlockInputStream>(std::move(table->read(column_names, {}, context, stage, 10, 1)[0]));
-    LimitBlockInputStream input(stream, 10, 96);
-    BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out_buf, sample, context);
-
-    copyData(input, *out);
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    return 1;
-}
diff --git a/src/Storages/tests/test_alter_distributed.sql b/src/Storages/tests/test_alter_distributed.sql
deleted file mode 100644
index 0578d340276..00000000000
--- a/src/Storages/tests/test_alter_distributed.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-create database if not exists test;
-
-drop table if exists test.merge_distributed;
-drop table if exists test.merge_distributed1;
-
-create table  test.merge_distributed1 ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
-insert into test.merge_distributed1 values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
-
-create table  test.merge_distributed ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = Distributed(self, test, merge_distributed1);
-
-alter table test.merge_distributed1 add column dummy String after CounterID;
-alter table test.merge_distributed add column dummy String after CounterID;
-
-describe table test.merge_distributed;
-show create table test.merge_distributed;
-
-insert into test.merge_distributed1 values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
-select CounterID, dummy from test.merge_distributed where dummy <> '' limit 10;
-
-alter table test.merge_distributed drop column dummy;
-
-describe table test.merge_distributed;
-show create table test.merge_distributed;
-
---error: should fall, because there is no `dummy1` column
-alter table test.merge_distributed add column dummy1 String after CounterID;
-select CounterID, dummy1 from test.merge_distributed where dummy1 <> '' limit 10;
-
diff --git a/src/Storages/tests/test_alter_merge.sql b/src/Storages/tests/test_alter_merge.sql
deleted file mode 100644
index 252577ddc37..00000000000
--- a/src/Storages/tests/test_alter_merge.sql
+++ /dev/null
@@ -1,35 +0,0 @@
-create database if not exists test;
-
-drop table if exists test.merge;
-drop table if exists test.merge1;
-drop table if exists test.merge2;
-
-create table  test.merge1 ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
-insert into test.merge1 values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
-
-create table  test.merge2 ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
-insert into test.merge2 values (2, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
-
-create table  test.merge ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = Merge(test, 'merge\[0-9\]');
-
-alter table test.merge1 add column dummy String after CounterID;
-alter table test.merge2 add column dummy String after CounterID;
-alter table test.merge add column dummy String after CounterID;
-
-describe table test.merge;
-show create table test.merge;
-
-insert into test.merge1 values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
-
-select CounterID, dummy from test.merge where dummy <> '' limit 10;
-
-
-alter table test.merge drop column dummy;
-
-describe table test.merge;
-show create table test.merge;
-
---error: must correctly fall into the alter
-alter table test.merge add column dummy1 String after CounterID;
-select CounterID, dummy1 from test.merge where dummy1 <> '' limit 10;
-
diff --git a/src/Storages/tests/test_alter_merge_tree.sql b/src/Storages/tests/test_alter_merge_tree.sql
deleted file mode 100644
index 5ac361acc5c..00000000000
--- a/src/Storages/tests/test_alter_merge_tree.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-create database if not exists test;
-
-drop table if exists test.merge_tree;
-
-create table  test.merge_tree ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
-
-insert into test.merge_tree values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3)
-alter table test.merge_tree add column dummy String after CounterID;
-describe table test.merge_tree;
-
-insert into test.merge_tree values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3)
-
-select CounterID, dummy from test.merge_tree where dummy <> '' limit 10;
-
-alter table test.merge_tree drop column dummy;
-
-describe table test.merge_tree;
diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index 7e36e4145eb..18f62504e1f 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -121,6 +121,7 @@ SRCS(
     System/StorageSystemQuotasUsage.cpp
     System/StorageSystemReplicas.cpp
     System/StorageSystemReplicationQueue.cpp
+    System/StorageSystemDistributionQueue.cpp
     System/StorageSystemRoleGrants.cpp
     System/StorageSystemRoles.cpp
     System/StorageSystemRowPolicies.cpp
@@ -164,6 +165,7 @@ SRCS(
     StorageMySQL.cpp
     StorageNull.cpp
     StorageReplicatedMergeTree.cpp
+    StorageS3Settings.cpp
     StorageSet.cpp
     StorageStripeLog.cpp
     StorageTinyLog.cpp
diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp
index 3b3db1c2510..548db38515c 100644
--- a/src/TableFunctions/TableFunctionGenerateRandom.cpp
+++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp
@@ -21,6 +21,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int LOGICAL_ERROR;
 }
@@ -44,6 +45,18 @@ StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & ast_function,
                         " structure, [random_seed, max_string_length, max_array_length].",
                         ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
+    // All the arguments must be literals.
+    for (const auto & arg : args)
+    {
+        if (!arg->as<const ASTLiteral>())
+        {
+            throw Exception(fmt::format(
+                "All arguments of table function '{}' must be literals. "
+                "Got '{}' instead", getName(), arg->formatForErrorMessage()),
+                ErrorCodes::BAD_ARGUMENTS);
+        }
+    }
+
     /// Parsing first argument as table structure and creating a sample block
     std::string structure = args[0]->as<const ASTLiteral &>().value.safeGet<String>();
 
diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp
index 4e166b10d8f..5ecd978146c 100644
--- a/src/TableFunctions/TableFunctionValues.cpp
+++ b/src/TableFunctions/TableFunctionValues.cpp
@@ -25,6 +25,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
     extern const int LOGICAL_ERROR;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
@@ -75,6 +76,13 @@ StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, const C
                         ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     /// Parsing first argument as table structure and creating a sample block
+    if (!args[0]->as<const ASTLiteral>())
+    {
+        throw Exception(fmt::format(
+            "The first argument of table function '{}' must be a literal. "
+            "Got '{}' instead", getName(), args[0]->formatForErrorMessage()),
+            ErrorCodes::BAD_ARGUMENTS);
+    }
     std::string structure = args[0]->as<ASTLiteral &>().value.safeGet<String>();
 
     ColumnsDescription columns = parseColumnsListFromString(structure, context);
diff --git a/src/TableFunctions/parseColumnsListForTableFunction.cpp b/src/TableFunctions/parseColumnsListForTableFunction.cpp
index 8eea3edf9bd..5221d96e086 100644
--- a/src/TableFunctions/parseColumnsListForTableFunction.cpp
+++ b/src/TableFunctions/parseColumnsListForTableFunction.cpp
@@ -1,5 +1,6 @@
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/Context.h>
 #include <TableFunctions/parseColumnsListForTableFunction.h>
@@ -11,27 +12,20 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-    extern const int SYNTAX_ERROR;
 }
 
 ColumnsDescription parseColumnsListFromString(const std::string & structure, const Context & context)
 {
-    Expected expected;
-
-    Tokens tokens(structure.c_str(), structure.c_str() + structure.size());
-    IParser::Pos token_iterator(tokens, context.getSettingsRef().max_parser_depth);
-
     ParserColumnDeclarationList parser;
-    ASTPtr columns_list_raw;
+    const Settings & settings = context.getSettingsRef();
 
-    if (!parser.parse(token_iterator, columns_list_raw, expected))
-        throw Exception("Cannot parse columns declaration list.", ErrorCodes::SYNTAX_ERROR);
+    ASTPtr columns_list_raw = parseQuery(parser, structure, "columns declaration list", settings.max_query_size, settings.max_parser_depth);
 
     auto * columns_list = dynamic_cast<ASTExpressionList *>(columns_list_raw.get());
     if (!columns_list)
         throw Exception("Could not cast AST to ASTExpressionList", ErrorCodes::LOGICAL_ERROR);
 
-    return InterpreterCreateQuery::getColumnsDescription(*columns_list, context, !context.getSettingsRef().allow_suspicious_codecs);
+    return InterpreterCreateQuery::getColumnsDescription(*columns_list, context, !settings.allow_suspicious_codecs);
 }
 
 }
diff --git a/tests/integration/test_host_ip_change/test.py b/tests/integration/test_host_ip_change/test.py
index 1b6d4365ac9..ac35478277c 100644
--- a/tests/integration/test_host_ip_change/test.py
+++ b/tests/integration/test_host_ip_change/test.py
@@ -76,8 +76,6 @@ node3 = cluster.add_instance('node3', main_configs=['configs/listen_host.xml'],
     with_zookeeper=True, ipv6_address='2001:3984:3989::1:1113')
 node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/listen_host.xml', 'configs/dns_update_short.xml'],
     with_zookeeper=True, ipv6_address='2001:3984:3989::1:1114')
-node5 = cluster.add_instance('node5', main_configs=['configs/listen_host.xml', 'configs/dns_update_short.xml'],
-                             user_configs=['configs/users_with_hostname.xml'], ipv6_address='2001:3984:3989::1:1115')
 
 @pytest.fixture(scope="module")
 def cluster_with_dns_cache_update():
@@ -142,24 +140,39 @@ def test_dns_cache_update(cluster_with_dns_cache_update):
     assert TSV(node4.query("SELECT DISTINCT host_name, host_address FROM system.clusters WHERE cluster='lost_host_cluster'")) == TSV("lost_host\t127.0.0.1\n")
     assert TSV(node4.query("SELECT hostName()")) == TSV("node4")
 
-def test_user_access_ip_change(cluster_with_dns_cache_update):
-    assert node3.query("SELECT * FROM remote('node5', 'system', 'one')") == "0\n"
-    assert node4.query("SELECT * FROM remote('node5', 'system', 'one')") == "0\n"
+# Check SYSTEM DROP DNS CACHE on node5 and background cache update on node6
+node5 = cluster.add_instance('node5', main_configs=['configs/listen_host.xml', 'configs/dns_update_long.xml'],
+                             user_configs=['configs/users_with_hostname.xml'], ipv6_address='2001:3984:3989::1:1115')
+node6 = cluster.add_instance('node6', main_configs=['configs/listen_host.xml', 'configs/dns_update_short.xml'],
+                             user_configs=['configs/users_with_hostname.xml'], ipv6_address='2001:3984:3989::1:1116')
 
-    set_hosts(node5, ['127.255.255.255 node3', '2001:3984:3989::1:8884 unknown_host'])
+@pytest.mark.parametrize("node", [node5, node6])
+def test_user_access_ip_change(cluster_with_dns_cache_update, node):
+    node_name = node.name
+    node_num = node.name[-1]
+    # getaddrinfo(...) may hang for a log time without this options
+    node.exec_in_container(['bash', '-c', 'echo -e "options timeout:1\noptions attempts:2" >> /etc/resolv.conf'], privileged=True, user='root')
 
-    cluster.restart_instance_with_ip_change(node3, "2001:3984:3989::1:8883")
-    cluster.restart_instance_with_ip_change(node4, "2001:3984:3989::1:8884")
+    assert node3.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name)) == "0\n"
+    assert node4.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name)) == "0\n"
+
+    set_hosts(node, ['127.255.255.255 node3', '2001:3984:3989::1:88{}4 unknown_host'.format(node_num)])
+
+    cluster.restart_instance_with_ip_change(node3, "2001:3984:3989::1:88{}3".format(node_num))
+    cluster.restart_instance_with_ip_change(node4, "2001:3984:3989::1:88{}4".format(node_num))
 
     with pytest.raises(QueryRuntimeException):
-        node3.query("SELECT * FROM remote('node5', 'system', 'one')")
+        node3.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name))
     with pytest.raises(QueryRuntimeException):
-        node4.query("SELECT * FROM remote('node5', 'system', 'one')")
+        node4.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name))
     # now wrong addresses are cached
 
-    set_hosts(node5, [])
-    # client is not allowed to connect, so execute it directly in container to send query from localhost
-    node5.exec_in_container(['bash', '-c', 'clickhouse client -q "SYSTEM DROP DNS CACHE"'], privileged=True, user='root')
+    set_hosts(node, [])
+    retry_count = 60
+    if node_name == 'node5':
+        # client is not allowed to connect, so execute it directly in container to send query from localhost
+        node.exec_in_container(['bash', '-c', 'clickhouse client -q "SYSTEM DROP DNS CACHE"'], privileged=True, user='root')
+        retry_count = 1
 
-    assert node3.query("SELECT * FROM remote('node5', 'system', 'one')") == "0\n"
-    assert node4.query("SELECT * FROM remote('node5', 'system', 'one')") == "0\n"
+    assert_eq_with_retry(node3, "SELECT * FROM remote('{}', 'system', 'one')".format(node_name), "0", retry_count=retry_count, sleep_time=1)
+    assert_eq_with_retry(node4, "SELECT * FROM remote('{}', 'system', 'one')".format(node_name), "0", retry_count=retry_count, sleep_time=1)
diff --git a/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml b/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml
index 61bc5af1f7d..bfcb1c0977b 100644
--- a/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml
+++ b/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml
@@ -1,6 +1,6 @@
 <yandex>
     <remote_servers>
-        <integration_test_cluster>
+        <internal_replication>
             <shard>
                 <internal_replication>true</internal_replication>
                 <replica>
@@ -12,7 +12,21 @@
                     <port>9000</port>
                 </replica>
             </shard>
-        </integration_test_cluster>
+        </internal_replication>
+
+        <no_internal_replication>
+            <shard>
+                <internal_replication>false</internal_replication>
+                <replica>
+                    <host>n2</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </no_internal_replication>
     </remote_servers>
 </yandex>
 
diff --git a/tests/integration/test_insert_distributed_load_balancing/test.py b/tests/integration/test_insert_distributed_load_balancing/test.py
index 49c8a89161f..52ee3ba1c4a 100644
--- a/tests/integration/test_insert_distributed_load_balancing/test.py
+++ b/tests/integration/test_insert_distributed_load_balancing/test.py
@@ -11,6 +11,11 @@ cluster = ClickHouseCluster(__file__)
 n1 = cluster.add_instance('n1', main_configs=['configs/remote_servers.xml'])
 n2 = cluster.add_instance('n2', main_configs=['configs/remote_servers.xml'])
 
+params = pytest.mark.parametrize('cluster,q', [
+    ('internal_replication', 0),
+    ('no_internal_replication', 1),
+])
+
 @pytest.fixture(scope='module', autouse=True)
 def start_cluster():
     try:
@@ -19,7 +24,7 @@ def start_cluster():
     finally:
         cluster.shutdown()
 
-def create_tables():
+def create_tables(cluster):
     n1.query('DROP TABLE IF EXISTS data')
     n2.query('DROP TABLE IF EXISTS data')
     n1.query('DROP TABLE IF EXISTS dist')
@@ -29,39 +34,44 @@ def create_tables():
     n1.query("""
     CREATE TABLE dist AS data
     Engine=Distributed(
-        integration_test_cluster,
+        {cluster},
         currentDatabase(),
         data,
         rand()
     )
-    """)
+    """.format(cluster=cluster))
 
-def insert_data(**settings):
-    create_tables()
+def insert_data(cluster, **settings):
+    create_tables(cluster)
     n1.query('INSERT INTO dist SELECT * FROM numbers(10)', settings=settings)
     n1.query('SYSTEM FLUSH DISTRIBUTED dist')
 
-def test_prefer_localhost_replica_1():
-    insert_data()
+@params
+def test_prefer_localhost_replica_1(cluster, q):
+    insert_data(cluster)
     assert int(n1.query('SELECT count() FROM data')) == 10
-    assert int(n2.query('SELECT count() FROM data')) == 0
+    assert int(n2.query('SELECT count() FROM data')) == 10*q
 
-def test_prefer_localhost_replica_1_load_balancing_in_order():
-    insert_data(load_balancing='in_order')
+@params
+def test_prefer_localhost_replica_1_load_balancing_in_order(cluster, q):
+    insert_data(cluster, load_balancing='in_order')
     assert int(n1.query('SELECT count() FROM data')) == 10
-    assert int(n2.query('SELECT count() FROM data')) == 0
+    assert int(n2.query('SELECT count() FROM data')) == 10*q
 
-def test_prefer_localhost_replica_0_load_balancing_nearest_hostname():
-    insert_data(load_balancing='nearest_hostname', prefer_localhost_replica=0)
+@params
+def test_prefer_localhost_replica_0_load_balancing_nearest_hostname(cluster, q):
+    insert_data(cluster, load_balancing='nearest_hostname', prefer_localhost_replica=0)
     assert int(n1.query('SELECT count() FROM data')) == 10
-    assert int(n2.query('SELECT count() FROM data')) == 0
+    assert int(n2.query('SELECT count() FROM data')) == 10*q
 
-def test_prefer_localhost_replica_0_load_balancing_in_order():
-    insert_data(load_balancing='in_order', prefer_localhost_replica=0)
-    assert int(n1.query('SELECT count() FROM data')) == 0
+@params
+def test_prefer_localhost_replica_0_load_balancing_in_order(cluster, q):
+    insert_data(cluster, load_balancing='in_order', prefer_localhost_replica=0)
+    assert int(n1.query('SELECT count() FROM data')) == 10*q
     assert int(n2.query('SELECT count() FROM data')) == 10
 
-def test_prefer_localhost_replica_0_load_balancing_in_order_sync():
-    insert_data(load_balancing='in_order', prefer_localhost_replica=0, insert_distributed_sync=1)
-    assert int(n1.query('SELECT count() FROM data')) == 0
+@params
+def test_prefer_localhost_replica_0_load_balancing_in_order_sync(cluster, q):
+    insert_data(cluster, load_balancing='in_order', prefer_localhost_replica=0, insert_distributed_sync=1)
+    assert int(n1.query('SELECT count() FROM data')) == 10*q
     assert int(n2.query('SELECT count() FROM data')) == 10
diff --git a/tests/integration/test_s3_with_proxy/proxy-resolver/entrypoint.sh b/tests/integration/test_s3_with_proxy/proxy-resolver/entrypoint.sh
deleted file mode 100644
index e456be666a9..00000000000
--- a/tests/integration/test_s3_with_proxy/proxy-resolver/entrypoint.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-pip install bottle
-python resolver.py
diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py
index 11176b19f0c..dc326b719bf 100644
--- a/tests/integration/test_s3_with_proxy/test.py
+++ b/tests/integration/test_s3_with_proxy/test.py
@@ -14,9 +14,7 @@ def run_resolver(cluster):
     current_dir = os.path.dirname(__file__)
     cluster.copy_file_to_container(container_id, os.path.join(current_dir, "proxy-resolver", "resolver.py"),
                                    "resolver.py")
-    cluster.copy_file_to_container(container_id, os.path.join(current_dir, "proxy-resolver", "entrypoint.sh"),
-                                   "entrypoint.sh")
-    cluster.exec_in_container(container_id, ["/bin/bash", "entrypoint.sh"], detach=True)
+    cluster.exec_in_container(container_id, ["python", "resolver.py"], detach=True)
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py
index d2a2532bb9a..2a1b42f8e0e 100644
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@@ -233,6 +233,83 @@ def test_kafka_settings_new_syntax(kafka_cluster):
     members = describe_consumer_group('new')
     assert members[0]['client_id'] == u'instance test 1234'
 
+
+@pytest.mark.timeout(180)
+def test_kafka_issue11308(kafka_cluster):
+    # Check that matview does respect Kafka SETTINGS
+    kafka_produce('issue11308', ['{"t": 123, "e": {"x": "woof"} }', '{"t": 123, "e": {"x": "woof"} }', '{"t": 124, "e": {"x": "test"} }'])
+
+    instance.query('''
+        CREATE TABLE test.persistent_kafka (
+            time UInt64,
+            some_string String
+        )
+        ENGINE = MergeTree()
+        ORDER BY time;
+
+        CREATE TABLE test.kafka (t UInt64, `e.x` String)
+            ENGINE = Kafka
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'issue11308',
+                     kafka_group_name = 'issue11308',
+                     kafka_format = 'JSONEachRow',
+                     kafka_row_delimiter = '\\n',
+                     kafka_flush_interval_ms=1000,
+                     input_format_import_nested_json = 1;
+
+        CREATE MATERIALIZED VIEW test.persistent_kafka_mv TO test.persistent_kafka AS
+        SELECT
+            `t` AS `time`,
+            `e.x` AS `some_string`
+        FROM test.kafka;
+        ''')
+
+    time.sleep(9)
+
+    result = instance.query('SELECT * FROM test.persistent_kafka ORDER BY time;')
+
+    instance.query('''
+        DROP TABLE test.persistent_kafka;
+        DROP TABLE test.persistent_kafka_mv;
+    ''')
+
+    expected = '''\
+123	woof
+123	woof
+124	test
+'''
+    assert TSV(result) == TSV(expected)
+
+
+@pytest.mark.timeout(180)
+def test_kafka_issue4116(kafka_cluster):
+    # Check that format_csv_delimiter parameter works now - as part of all available format settings.
+    kafka_produce('issue4116', ['1|foo', '2|bar', '42|answer','100|multi\n101|row\n103|message'])
+
+    instance.query('''
+        CREATE TABLE test.kafka (a UInt64, b String)
+            ENGINE = Kafka
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'issue4116',
+                     kafka_group_name = 'issue4116',
+                     kafka_format = 'CSV',
+                     kafka_row_delimiter = '\\n',
+                     format_csv_delimiter = '|';
+        ''')
+
+    result = instance.query('SELECT * FROM test.kafka ORDER BY a;')
+
+    expected = '''\
+1	foo
+2	bar
+42	answer
+100	multi
+101	row
+103	message
+'''
+    assert TSV(result) == TSV(expected)
+
+
 @pytest.mark.timeout(180)
 def test_kafka_consumer_hang(kafka_cluster):
 
diff --git a/tests/integration/test_storage_s3/configs/defaultS3.xml b/tests/integration/test_storage_s3/configs/defaultS3.xml
new file mode 100644
index 00000000000..26dc52f9e8f
--- /dev/null
+++ b/tests/integration/test_storage_s3/configs/defaultS3.xml
@@ -0,0 +1,8 @@
+<yandex>
+    <s3>
+        <s3_mock>
+            <endpoint>http://resolver:8080</endpoint>
+            <header>Authorization: Bearer TOKEN</header>
+        </s3_mock>
+    </s3>
+</yandex>
diff --git a/tests/integration/test_storage_s3/s3_mock/mock_s3.py b/tests/integration/test_storage_s3/s3_mock/mock_s3.py
new file mode 100644
index 00000000000..35b477d6b10
--- /dev/null
+++ b/tests/integration/test_storage_s3/s3_mock/mock_s3.py
@@ -0,0 +1,17 @@
+from bottle import abort, route, run, request
+
+
+@route('/<_bucket>/<_path>')
+def server(_bucket, _path):
+    for name in request.headers:
+        if name == 'Authorization' and request.headers[name] == u'Bearer TOKEN':
+            return '1, 2, 3'
+    abort(403)
+
+
+@route('/')
+def ping():
+    return 'OK'
+
+
+run(host='0.0.0.0', port=8080)
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 9f124507e14..b25e5907e62 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -2,6 +2,7 @@ import json
 import logging
 import random
 import threading
+import os
 
 import pytest
 
@@ -9,7 +10,6 @@ from helpers.cluster import ClickHouseCluster, ClickHouseInstance
 
 import helpers.client
 
-
 logging.getLogger().setLevel(logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler())
 
@@ -82,14 +82,16 @@ def get_nginx_access_logs():
 def cluster():
     try:
         cluster = ClickHouseCluster(__file__)
-        cluster.add_instance("restricted_dummy", main_configs=["configs/config_for_test_remote_host_filter.xml"], with_minio=True)
-        cluster.add_instance("dummy", with_minio=True)
+        cluster.add_instance("restricted_dummy", main_configs=["configs/config_for_test_remote_host_filter.xml"],
+                             with_minio=True)
+        cluster.add_instance("dummy", with_minio=True, main_configs=["configs/defaultS3.xml"])
         logging.info("Starting cluster...")
         cluster.start()
         logging.info("Cluster started")
 
         prepare_s3_bucket(cluster)
         logging.info("S3 bucket created")
+        run_s3_mock(cluster)
 
         yield cluster
     finally:
@@ -199,14 +201,15 @@ def test_put_get_with_globs(cluster):
         for j in range(10):
             path = "{}_{}/{}.csv".format(i, random.choice(['a', 'b', 'c', 'd']), j)
             max_path = max(path, max_path)
-            values = "({},{},{})".format(i, j, i+j)
+            values = "({},{},{})".format(i, j, i + j)
             query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format(
                 cluster.minio_host, cluster.minio_port, bucket, path, table_format, values)
             run_query(instance, query)
 
     query = "select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from s3('http://{}:{}/{}/*_{{a,b,c,d}}/%3f.csv', 'CSV', '{}')".format(
         cluster.minio_redirect_host, cluster.minio_redirect_port, bucket, table_format)
-    assert run_query(instance, query).splitlines() == ["450\t450\t900\t0.csv\t{bucket}/{max_path}".format(bucket=bucket, max_path=max_path)]
+    assert run_query(instance, query).splitlines() == [
+        "450\t450\t900\t0.csv\t{bucket}/{max_path}".format(bucket=bucket, max_path=max_path)]
 
 
 # Test multipart put.
@@ -307,3 +310,29 @@ def test_s3_glob_scheherazade(cluster):
     query = "select count(), sum(column1), sum(column2), sum(column3) from s3('http://{}:{}/{}/night_*/tale.csv', 'CSV', '{}')".format(
         cluster.minio_redirect_host, cluster.minio_redirect_port, bucket, table_format)
     assert run_query(instance, query).splitlines() == ["1001\t1001\t1001\t1001"]
+
+
+def run_s3_mock(cluster):
+    logging.info("Starting s3 mock")
+    container_id = cluster.get_container_id('resolver')
+    current_dir = os.path.dirname(__file__)
+    cluster.copy_file_to_container(container_id, os.path.join(current_dir, "s3_mock", "mock_s3.py"), "mock_s3.py")
+    cluster.exec_in_container(container_id, ["python", "mock_s3.py"], detach=True)
+    logging.info("S3 mock started")
+
+
+# Test get values in CSV format with default settings.
+def test_get_csv_default(cluster):
+    ping_response = cluster.exec_in_container(cluster.get_container_id('resolver'), ["curl", "-s", "http://resolver:8080"])
+    assert ping_response == 'OK', 'Expected "OK", but got "{}"'.format(ping_response)
+    
+    table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
+    filename = "test.csv"
+    get_query = "select * from s3('http://resolver:8080/{bucket}/{file}', 'CSV', '{table_format}')".format(
+        bucket=cluster.minio_restricted_bucket,
+        file=filename,
+        table_format=table_format)
+
+    instance = cluster.instances["dummy"]  # type: ClickHouseInstance
+    result = run_query(instance, get_query)
+    assert result == '1\t2\t3\n'
diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py
index a4318df1658..26bd36b8cb6 100644
--- a/tests/integration/test_ttl_move/test.py
+++ b/tests/integration/test_ttl_move/test.py
@@ -61,7 +61,7 @@ def get_used_disks_for_table(node, table_name, partition=None):
 def check_used_disks_with_retry(node, table_name, expected_disks, retries):
     for _ in range(retries):
         used_disks = get_used_disks_for_table(node, table_name)
-        if set(used_disks) == expected_disks:
+        if set(used_disks).issubset(expected_disks):
             return True
         time.sleep(0.5)
     return False
@@ -830,7 +830,8 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine):
         def optimize_table(num):
             for i in range(num):
                 try: # optimize may throw after concurrent alter
-                    node1.query("OPTIMIZE TABLE {} FINAL".format(name))
+                    node1.query("OPTIMIZE TABLE {} FINAL".format(name), settings={'optimize_throw_if_noop': '1'})
+                    break
                 except:
                     pass
 
@@ -903,3 +904,93 @@ def test_double_move_while_select(started_cluster, name, positive):
 
     finally:
         node1.query("DROP TABLE IF EXISTS {name}".format(name=name))
+
+
+@pytest.mark.parametrize("name,engine,positive", [
+    ("mt_test_alter_with_merge_do_not_work","MergeTree()",0),
+    ("replicated_mt_test_alter_with_merge_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_alter_with_merge_do_not_work', '1')",0),
+    ("mt_test_alter_with_merge_work","MergeTree()",1),
+    ("replicated_mt_test_alter_with_merge_work","ReplicatedMergeTree('/clickhouse/replicated_test_alter_with_merge_work', '1')",1),
+])
+def test_alter_with_merge_work(started_cluster, name, engine, positive):
+    """Copyright 2019, Altinity LTD
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License."""
+    """Check that TTL expressions are re-evaluated for
+    existing parts after ALTER command changes TTL expressions
+    and parts are merged.
+    """
+    try:
+        node1.query("""
+            CREATE TABLE {name} (
+                s1 String,
+                d1 DateTime
+            ) ENGINE = {engine}
+            ORDER BY tuple()
+            TTL d1 + INTERVAL 3000 SECOND TO DISK 'jbod2',
+                d1 + INTERVAL 6000 SECOND TO VOLUME 'external'
+            SETTINGS storage_policy='jbods_with_external', merge_with_ttl_timeout=0
+        """.format(name=name, engine=engine))
+
+
+        def optimize_table(num):
+            for i in range(num):
+                try: # optimize may throw after concurrent alter
+                    node1.query("OPTIMIZE TABLE {} FINAL".format(name), settings={'optimize_throw_if_noop': '1'})
+                    break
+                except:
+                    pass
+
+        for p in range(3):
+            data = [] # 6MB in total
+            now = time.time()
+            for i in range(2):
+                s1 = get_random_string(1024 * 1024) # 1MB
+                d1 = now - 1 if positive else now + 300
+                data.append("('{}', toDateTime({}))".format(s1, d1))
+            values = ",".join(data)
+            node1.query("INSERT INTO {name} (s1, d1) VALUES {values}".format(name=name, values=values))
+
+        used_disks = get_used_disks_for_table(node1, name)
+        assert set(used_disks) == {"jbod1", "jbod2"}
+
+        node1.query("SELECT count() FROM {name}".format(name=name)).splitlines() == ["6"]
+
+        node1.query("""
+            ALTER TABLE {name} MODIFY
+            TTL d1 + INTERVAL 0 SECOND TO DISK 'jbod2',
+                d1 + INTERVAL 5 SECOND TO VOLUME 'external',
+                d1 + INTERVAL 10 SECOND DELETE
+        """.format(name=name))
+
+        optimize_table(20)
+
+        assert node1.query("SELECT count() FROM system.parts WHERE table = '{name}' AND active = 1".format(name=name)) == "1\n"
+
+        time.sleep(5)
+
+        optimize_table(20)
+
+        if positive:
+            assert check_used_disks_with_retry(node1, name, set(["external"]), 50)
+        else:
+            assert check_used_disks_with_retry(node1, name, set(["jbod1", "jbod2"]), 50)
+
+        time.sleep(5)
+
+        optimize_table(20)
+
+        if positive:
+            assert node1.query("SELECT count() FROM {name}".format(name=name)) == "0\n"
+        else:
+            assert node1.query("SELECT count() FROM {name}".format(name=name)) == "6\n"
+
+    finally:
+        node1.query("DROP TABLE IF EXISTS {name}".format(name=name))
diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 29169ad3c0e..a458db07a23 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -78,13 +78,13 @@ def test_ttl_many_columns(started_cluster):
 
     time.sleep(1) # sleep to allow use ttl merge selector for second time
     node1.query("OPTIMIZE TABLE test_ttl_2 FINAL", timeout=5)
-    
+
     node2.query("SYSTEM SYNC REPLICA test_ttl_2", timeout=5)
 
     expected = "1\t0\t0\t0\t0\n6\t7\t8\t9\t10\n"
     assert TSV(node1.query("SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id")) == TSV(expected)
     assert TSV(node2.query("SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id")) == TSV(expected)
- 
+
 
 @pytest.mark.parametrize("delete_suffix", [
     "",
@@ -167,3 +167,67 @@ def test_ttl_double_delete_rule_returns_error(started_cluster):
         pass
     except:
         assert False
+
+@pytest.mark.parametrize("name,engine", [
+    ("test_ttl_alter_delete", "MergeTree()"),
+    ("test_replicated_ttl_alter_delete", "ReplicatedMergeTree('/clickhouse/test_replicated_ttl_alter_delete', '1')"),
+])
+def test_ttl_alter_delete(started_cluster, name, engine):
+    """Copyright 2019, Altinity LTD
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License."""
+    """Check compatibility with old TTL delete expressions to make sure
+    that:
+    * alter modify of column's TTL delete expression works
+    * alter to add new columns works
+    * alter modify to add TTL delete expression to a a new column works
+    for a table that has TTL delete expression defined but
+    no explicit storage policy assigned.
+    """
+    drop_table([node1], name)
+
+    def optimize_with_retry(retry=20):
+        for i in range(retry):
+            try:
+                node1.query("OPTIMIZE TABLE {name} FINAL".format(name=name), settings={"optimize_throw_if_noop": "1"})
+                break
+            except:
+                time.sleep(0.5)
+    node1.query(
+    """
+        CREATE TABLE {name} (
+            s1 String,
+            d1 DateTime
+        ) ENGINE = {engine}
+        ORDER BY tuple()
+        TTL d1 + INTERVAL 1 DAY DELETE
+    """.format(name=name, engine=engine))
+
+    node1.query("""ALTER TABLE {name} MODIFY COLUMN s1 String TTL d1 + INTERVAL 1 SECOND""".format(name=name))
+    node1.query("""ALTER TABLE {name} ADD COLUMN b1 Int32""".format(name=name))
+
+    node1.query("""INSERT INTO {name} (s1, b1, d1) VALUES ('hello1', 1, toDateTime({time}))""".format(name=name, time=time.time()))
+    node1.query("""INSERT INTO {name} (s1, b1, d1) VALUES ('hello2', 2, toDateTime({time}))""".format(name=name, time=time.time() + 360))
+
+    time.sleep(1)
+
+    optimize_with_retry()
+    r = node1.query("SELECT s1, b1 FROM {name} ORDER BY b1, s1".format(name=name)).splitlines()
+    assert r == ["\t1", "hello2\t2"]
+
+    node1.query("""ALTER TABLE {name} MODIFY COLUMN b1 Int32 TTL d1""".format(name=name))
+    node1.query("""INSERT INTO {name} (s1, b1, d1) VALUES ('hello3', 3, toDateTime({time}))""".format(name=name, time=time.time()))
+
+    time.sleep(1)
+
+    optimize_with_retry()
+
+    r = node1.query("SELECT s1, b1 FROM {name} ORDER BY b1, s1".format(name=name)).splitlines()
+    assert r == ["\t0", "\t0", "hello2\t2"]
diff --git a/tests/performance/aggregating_merge_tree.xml b/tests/performance/aggregating_merge_tree.xml
new file mode 100644
index 00000000000..d658fd705bb
--- /dev/null
+++ b/tests/performance/aggregating_merge_tree.xml
@@ -0,0 +1,30 @@
+<test>
+    <create_query>
+        CREATE TABLE test(
+            t UInt64,
+            q1 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64),
+            q2 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64),
+            q3 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64),
+            q4 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64),
+            q5 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64)
+        ) ENGINE=SummingMergeTree()
+        ORDER BY t
+    </create_query>
+
+    <create_query>
+        INSERT INTO test
+        SELECT
+            number / 10 as t,
+            quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q1,
+            quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q2,
+            quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q3,
+            quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q4,
+            quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q5
+        FROM numbers(1000 * 1000)
+        GROUP BY t
+    </create_query>
+
+    <query>OPTIMIZE TABLE test FINAL</query>
+
+    <drop_query>DROP TABLE test</drop_query>
+</test>
diff --git a/tests/performance/aggregation_in_order.xml b/tests/performance/aggregation_in_order.xml
new file mode 100644
index 00000000000..6e58865dab4
--- /dev/null
+++ b/tests/performance/aggregation_in_order.xml
@@ -0,0 +1,23 @@
+<test>
+    <preconditions>
+        <table_exists>hits_10m_single</table_exists>
+        <table_exists>hits_100m_single</table_exists>
+    </preconditions>
+
+    <settings><optimize_aggregation_in_order>1</optimize_aggregation_in_order></settings>
+
+    <substitutions>
+        <substitution>
+           <name>table</name>
+           <values>
+               <value>hits_10m_single</value>
+               <value>hits_100m_single</value>
+           </values>
+       </substitution>
+    </substitutions>
+
+    <query>SELECT avg(length(URL)) as x from hits_100m_single GROUP BY CounterID FORMAT Null</query>
+    <query>SELECT avg(length(URL)) as x from {table} GROUP BY CounterID, EventDate FORMAT Null</query>
+    <query>SELECT avg(length(URL)) as x from hits_10m_single GROUP BY CounterID, EventDate, intHash32(UserID) FORMAT Null</query>
+
+</test>
diff --git a/tests/queries/0_stateless/00398_url_functions.reference b/tests/queries/0_stateless/00398_url_functions.reference
index acb605597d3..c926240b4f7 100644
--- a/tests/queries/0_stateless/00398_url_functions.reference
+++ b/tests/queries/0_stateless/00398_url_functions.reference
@@ -16,6 +16,17 @@ www.example.com
 example.com
 example.com
 example.com
+====NETLOC====
+paul@www.example.com:80
+127.0.0.1:443
+127.0.0.1:443
+example.ru
+example.ru
+paul:zozo@example.ru
+paul:zozo@example.ru
+www.example.com
+www.example.com
+example.com
 ====DOMAIN====
 com
 
diff --git a/tests/queries/0_stateless/00398_url_functions.sql b/tests/queries/0_stateless/00398_url_functions.sql
index d301cac5b15..c689844d08d 100644
--- a/tests/queries/0_stateless/00398_url_functions.sql
+++ b/tests/queries/0_stateless/00398_url_functions.sql
@@ -18,6 +18,17 @@ SELECT domain('example.com') as Host;
 SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
 SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
 
+SELECT '====NETLOC====';
+SELECT netloc('http://paul@www.example.com:80/') AS Netloc;
+SELECT netloc('http://127.0.0.1:443/') AS Netloc;
+SELECT netloc('http://127.0.0.1:443') AS Netloc;
+SELECT netloc('svn+ssh://example.ru/?q=hello%20world') AS Netloc;
+SELECT netloc('svn+ssh://example.ru/?q=hello%20world') AS Netloc;
+SELECT netloc('svn+ssh://paul:zozo@example.ru/?q=hello%20world') AS Netloc;
+SELECT netloc('svn+ssh://paul:zozo@example.ru/?q=hello%20world') AS Netloc;
+SELECT netloc('//www.example.com') AS Netloc;
+SELECT netloc('www.example.com') as Netloc;
+SELECT netloc('example.com') as Netloc;
 
 SELECT '====DOMAIN====';
 SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain;
diff --git a/tests/queries/0_stateless/00500_point_in_polygon_nan.reference b/tests/queries/0_stateless/00500_point_in_polygon_nan.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/00500_point_in_polygon_nan.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/00500_point_in_polygon_nan.sql b/tests/queries/0_stateless/00500_point_in_polygon_nan.sql
new file mode 100644
index 00000000000..37ed8dbeded
--- /dev/null
+++ b/tests/queries/0_stateless/00500_point_in_polygon_nan.sql
@@ -0,0 +1 @@
+SELECT pointInPolygon((nan, 10.000100135803223), [(39.83154, 21.41527), (2., 1000.0001220703125), (39.90033, 21.37195), (1.000100016593933, 10.000100135803223), (39.83051, 21.42553), (39.82898, 21.41382), (39.83043, 21.41432), (39.83154, 21.41527)]);
diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.sql b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.sql
index 96740d63778..c94c0f3c55b 100644
--- a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.sql
+++ b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.sql
@@ -43,7 +43,7 @@ DETACH TABLE test.summing_r2;
 ALTER TABLE test.summing_r1 ADD COLUMN t UInt32 AFTER z, MODIFY ORDER BY (x, y, t * t) SETTINGS replication_alter_partitions_sync = 2; -- { serverError 341 }
 ATTACH TABLE test.summing_r2;
 
-SELECT sleep(1) Format Null;
+SYSTEM SYNC REPLICA test.summing_r2;
 
 SELECT '*** Check SHOW CREATE TABLE after offline ALTER ***';
 SHOW CREATE TABLE test.summing_r2;
diff --git a/tests/queries/0_stateless/00972_geohashesInBox.reference b/tests/queries/0_stateless/00972_geohashesInBox.reference
index e6844fa8394..92dab3cb04e 100644
--- a/tests/queries/0_stateless/00972_geohashesInBox.reference
+++ b/tests/queries/0_stateless/00972_geohashesInBox.reference
@@ -37,4 +37,6 @@ zooming
 ['s7w1z0gs3y0z','s7w1z0gs3y1p','s7w1z0gs3y1r','s7w1z0gs3y1x','s7w1z0gs3y2b','s7w1z0gs3y2c','s7w1z0gs3y2f','s7w1z0gs3y2g','s7w1z0gs3y2u','s7w1z0gs3y2v','s7w1z0gs3y30','s7w1z0gs3y31','s7w1z0gs3y32','s7w1z0gs3y33','s7w1z0gs3y34','s7w1z0gs3y35','s7w1z0gs3y36','s7w1z0gs3y37','s7w1z0gs3y38','s7w1z0gs3y39','s7w1z0gs3y3d','s7w1z0gs3y3e','s7w1z0gs3y3h','s7w1z0gs3y3j','s7w1z0gs3y3k','s7w1z0gs3y3m','s7w1z0gs3y3s','s7w1z0gs3y3t']
 ['s7w1z0gs3y0z','s7w1z0gs3y1p','s7w1z0gs3y1r','s7w1z0gs3y1x','s7w1z0gs3y2b','s7w1z0gs3y2c','s7w1z0gs3y2f','s7w1z0gs3y2g','s7w1z0gs3y2u','s7w1z0gs3y2v','s7w1z0gs3y30','s7w1z0gs3y31','s7w1z0gs3y32','s7w1z0gs3y33','s7w1z0gs3y34','s7w1z0gs3y35','s7w1z0gs3y36','s7w1z0gs3y37','s7w1z0gs3y38','s7w1z0gs3y39','s7w1z0gs3y3d','s7w1z0gs3y3e','s7w1z0gs3y3h','s7w1z0gs3y3j','s7w1z0gs3y3k','s7w1z0gs3y3m','s7w1z0gs3y3s','s7w1z0gs3y3t']
 ['s7w1z0gs3y0z','s7w1z0gs3y1p','s7w1z0gs3y1r','s7w1z0gs3y1x','s7w1z0gs3y2b','s7w1z0gs3y2c','s7w1z0gs3y2f','s7w1z0gs3y2g','s7w1z0gs3y2u','s7w1z0gs3y2v','s7w1z0gs3y30','s7w1z0gs3y31','s7w1z0gs3y32','s7w1z0gs3y33','s7w1z0gs3y34','s7w1z0gs3y35','s7w1z0gs3y36','s7w1z0gs3y37','s7w1z0gs3y38','s7w1z0gs3y39','s7w1z0gs3y3d','s7w1z0gs3y3e','s7w1z0gs3y3h','s7w1z0gs3y3j','s7w1z0gs3y3k','s7w1z0gs3y3m','s7w1z0gs3y3s','s7w1z0gs3y3t']
+input values are clamped to -90..90, -180..180 range
+32768
 errors
diff --git a/tests/queries/0_stateless/00972_geohashesInBox.sql b/tests/queries/0_stateless/00972_geohashesInBox.sql
index f382bf234ac..d52a03b055e 100644
--- a/tests/queries/0_stateless/00972_geohashesInBox.sql
+++ b/tests/queries/0_stateless/00972_geohashesInBox.sql
@@ -5,41 +5,46 @@
 -- except for the cases when JS-version produces result outside of given region,
 -- typically at wrap points: poles, 0-latitude and 0-longitude.
 
-select 'center';
+SELECT 'center';
 SELECT arraySort(geohashesInBox(-1.0, -1.0, 1.0, 1.0, 3));
 SELECT arraySort(geohashesInBox(-0.1, -0.1, 0.1, 0.1, 5));
 SELECT arraySort(geohashesInBox(-0.01, -0.01, 0.01, 0.01, 5));
 
-select 'north pole';
+SELECT 'north pole';
 SELECT arraySort(geohashesInBox(-180.0, 89.0, -179.0, 90.0, 3));
 SELECT arraySort(geohashesInBox(-1.0,   89.0, 0.0, 90.0, 3));
 SELECT arraySort(geohashesInBox(0.0,    89.0, 1.0, 90.0, 3));
 SELECT arraySort(geohashesInBox(179.0,  89.0, 180.0, 90.0, 3));
 
-select 'south pole';
+SELECT 'south pole';
 SELECT arraySort(geohashesInBox(-180.0, -90.0, -179.0, -89.0, 3));
 SELECT arraySort(geohashesInBox(-1.0,   -90.0, 0.0,    -89.0, 3));
 SELECT arraySort(geohashesInBox(0.0,    -90.0, 1.0,    -89.0, 3));
 SELECT arraySort(geohashesInBox(179.0,  -90.0, 180.0,  -89.0, 3));
 
-select 'wrap point around equator';
+SELECT 'wrap point around equator';
 SELECT arraySort(geohashesInBox(179.0,  -1.0, 180.0,  0.0, 3));
 SELECT arraySort(geohashesInBox(179.0,  0.0,  180.0,  1.0, 3));
 SELECT arraySort(geohashesInBox(-180.0, -1.0, -179.0, 0.0, 3));
 SELECT arraySort(geohashesInBox(-180.0, 0.0,  -179.0, 1.0, 3));
 
-select 'arbitrary values in all 4 quarters';
+SELECT 'arbitrary values in all 4 quarters';
 SELECT arraySort(geohashesInBox(98.36, 7.88, 98.37, 7.89, 6));
 SELECT arraySort(geohashesInBox(53.8, 27.6, 53.9, 27.7, 5));
 SELECT arraySort(geohashesInBox(-49.26, -25.38, -49.25, -25.37, 6));
 SELECT arraySort(geohashesInBox(23.11, -82.37, 23.12, -82.36, 6));
 
-select 'small range always produces array of length 1';
-SELECT lon/5 - 180 as lon1, lat/5 - 90 as lat1, lon1 as lon2, lat1 as lat2, geohashesInBox(lon1, lat1, lon2, lat2, 1)  as g FROM (SELECT arrayJoin(range(360*5)) as lon,  arrayJoin(range(180*5)) as lat) WHERE length(g) != 1;
-SELECT lon/5 - 40 as lon1, lat/5 - 20 as lat1, lon1 as lon2, lat1 as lat2, geohashesInBox(lon1, lat1, lon2, lat2, 12) as g FROM (SELECT arrayJoin(range(80*5)) as lon,  arrayJoin(range(10*5)) as lat) WHERE length(g) != 1;
-SELECT lon/5 - 40 as lon1, lat/5 - 20 as lat1, lon1 + 0.0000000001 as lon2, lat1 + 0.0000000001 as lat2, geohashesInBox(lon1, lat1, lon2, lat2, 1) as g FROM (SELECT arrayJoin(range(80*5)) as lon,  arrayJoin(range(10*5)) as lat) WHERE length(g) != 1;
+SELECT 'small range always produces array of length 1';
+SELECT lon/5 - 180 AS lon1, lat/5 - 90 AS lat1, lon1 AS lon2, lat1 AS lat2, geohashesInBox(lon1, lat1, lon2, lat2, 1)  AS g
+FROM (SELECT arrayJoin(range(360*5)) AS lon,  arrayJoin(range(180*5)) AS lat) WHERE length(g) != 1;
 
-select 'zooming';
+SELECT lon/5 - 40 AS lon1, lat/5 - 20 AS lat1, lon1 AS lon2, lat1 AS lat2, geohashesInBox(lon1, lat1, lon2, lat2, 12) AS g
+FROM (SELECT arrayJoin(range(80*5)) AS lon,  arrayJoin(range(10*5)) AS lat) WHERE length(g) != 1;
+
+SELECT lon/5 - 40 AS lon1, lat/5 - 20 AS lat1, lon1 + 0.0000000001 AS lon2, lat1 + 0.0000000001 AS lat2, geohashesInBox(lon1, lat1, lon2, lat2, 1) AS g
+FROM (SELECT arrayJoin(range(80*5)) AS lon,  arrayJoin(range(10*5)) AS lat) WHERE length(g) != 1;
+
+SELECT 'zooming';
 SELECT arraySort(geohashesInBox(20.0, 20.0, 21.0, 21.0, 2));
 SELECT arraySort(geohashesInBox(20.0, 20.0, 21.0, 21.0, 3));
 SELECT arraySort(geohashesInBox(20.0, 20.0, 21.0, 21.0, 4));
@@ -56,8 +61,12 @@ SELECT arraySort(geohashesInBox(20.0, 20.0, 20.000001, 20.000001, 12));
 SELECT arraySort(geohashesInBox(20.0, 20.0, 20.000001, 20.000001, 13));
 SELECT arraySort(geohashesInBox(20.0, 20.0, 20.000001, 20.000001, 14));
 
-select 'errors';
+SELECT 'input values are clamped to -90..90, -180..180 range';
+SELECT length(geohashesInBox(-inf, -inf, inf, inf, 3));
+
+SELECT 'errors';
 SELECT geohashesInBox();  -- { serverError 42 } -- not enough arguments
 SELECT geohashesInBox(1, 2, 3, 4, 5);  -- { serverError 43 }  -- wrong types of arguments
 SELECT geohashesInBox(toFloat32(1.0), 2.0, 3.0, 4.0, 5);  -- { serverError 43 } -- all lats and longs should be of the same type
 SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 12); -- { serverError 128 } -- to many elements in array
+
diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference
new file mode 100644
index 00000000000..3a6fe59ae6d
--- /dev/null
+++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference
@@ -0,0 +1,2 @@
+5	1
+10	2
diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql
new file mode 100644
index 00000000000..df3b7cb505a
--- /dev/null
+++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql
@@ -0,0 +1,26 @@
+SET allow_experimental_live_view = 1;
+
+DROP TABLE IF EXISTS lv;
+DROP TABLE IF EXISTS lv2;
+DROP TABLE IF EXISTS mt;
+
+CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple();
+CREATE LIVE VIEW lv AS SELECT sum(a) AS sum_a FROM mt PREWHERE a > 1;
+CREATE LIVE VIEW lv2 AS SELECT sum(number) AS sum_number FROM system.numbers PREWHERE number > 1;
+
+INSERT INTO mt VALUES (1),(2),(3);
+
+SELECT *,_version FROM lv;
+SELECT *,_version FROM lv PREWHERE sum_a > 5; -- { serverError 182 }
+
+INSERT INTO mt VALUES (1),(2),(3);
+
+SELECT *,_version FROM lv;
+SELECT *,_version FROM lv PREWHERE sum_a > 10; -- { serverError 182 }
+
+SELECT *,_version FROM lv2; -- { serverError 182 }
+SELECT *,_version FROM lv2 PREWHERE sum_number > 10; -- { serverError 182 }
+
+DROP TABLE lv;
+DROP TABLE lv2;
+DROP TABLE mt;
diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh
index 37ed463f59b..dbd53d6d0b7 100755
--- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh
+++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh
@@ -97,7 +97,7 @@ done
 echo "Equal number of columns"
 
 # This alter will finish all previous, but replica 1 maybe still not up-to-date
-while [[ $(timeout 30 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_1 MODIFY COLUMN value0 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do
+while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_1 MODIFY COLUMN value0 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do
     sleep 1
 done
 
diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh
index 114008ded26..90172d38cfb 100755
--- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh
+++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh
@@ -96,8 +96,12 @@ done
 
 
 # This alter will finish all previous, but replica 1 maybe still not up-to-date
-while [[ $(timeout 30 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_detach_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do
+while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_detach_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do
     sleep 1
+    # just try to attach table if it failed for some reason in the code above
+    for i in `seq $REPLICAS`; do
+        $CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_alter_detach_$i" 2> /dev/null
+    done
 done
 
 for i in `seq $REPLICAS`; do
diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh
index bacc742d16a..05ef4a1a675 100755
--- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh
+++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh
@@ -100,8 +100,14 @@ wait
 
 echo "Finishing alters"
 
-# This alter will finish all previous, but replica 1 maybe still not up-to-date
-while [[ $(timeout 30 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do
+# This alter will finish all previous, but replica 1 maybe still not up-to-date.
+# If query will throw something, than we will sleep 1 and retry. If timeout
+# happened we will silently go out of loop and probably fail tests in the
+# following for loop.
+#
+# 120 seconds is more than enough, but in rare cases for slow builds (debug,
+# thread) it maybe necessary.
+while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do
     sleep 1
 done
 
diff --git a/tests/queries/0_stateless/01087_table_function_generate.sql b/tests/queries/0_stateless/01087_table_function_generate.sql
index 96db6803a47..05f03a5a4e6 100644
--- a/tests/queries/0_stateless/01087_table_function_generate.sql
+++ b/tests/queries/0_stateless/01087_table_function_generate.sql
@@ -33,11 +33,11 @@ LIMIT 10;
 SELECT '-';
 SELECT
   toTypeName(i)s
-FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))')
+FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))')
 LIMIT 1;
 SELECT
   i
-FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 1, 10, 10)
+FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 1, 10, 10)
 LIMIT 10;
 SELECT '-';
 SELECT
diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference
new file mode 100644
index 00000000000..d1b29b46df6
--- /dev/null
+++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference
@@ -0,0 +1,29 @@
+1	l	\N	Nullable(String)
+2		\N	Nullable(String)
+1	l	\N	Nullable(String)
+2		\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
+0		\N	Nullable(String)
+0		\N	Nullable(String)
+1	l	\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
+0		\N	Nullable(String)
+0		\N	Nullable(String)
+1	l	\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
+2		\N	Nullable(String)
+1	l	\N	Nullable(String)
+2		\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
+\N		\N	Nullable(String)
+1	l	\N	Nullable(String)
+\N		\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
+\N		\N	Nullable(String)
+1	l	\N	Nullable(String)
+\N		\N	Nullable(String)
diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql
new file mode 100644
index 00000000000..edaf2870e89
--- /dev/null
+++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql
@@ -0,0 +1,50 @@
+DROP TABLE IF EXISTS t;
+DROP TABLE IF EXISTS nr;
+
+CREATE TABLE t (`x` UInt32, `lc` LowCardinality(String)) ENGINE = Memory;
+CREATE TABLE nr (`x` Nullable(UInt32), `lc` Nullable(String)) ENGINE = Memory;
+
+INSERT INTO t VALUES (1, 'l');
+INSERT INTO nr VALUES (2, NULL);
+
+SET join_use_nulls = 0;
+
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY x;
+
+SELECT '-';
+
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+
+SELECT '-';
+
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+
+SELECT '-';
+
+SET join_use_nulls = 1;
+
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY x;
+
+SELECT '-';
+
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+
+SELECT '-';
+
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+
+
+DROP TABLE t;
+DROP TABLE nr;
diff --git a/tests/queries/0_stateless/01143_trivial_count_with_join.reference b/tests/queries/0_stateless/01143_trivial_count_with_join.reference
new file mode 100644
index 00000000000..9c3f6a570ce
--- /dev/null
+++ b/tests/queries/0_stateless/01143_trivial_count_with_join.reference
@@ -0,0 +1,5 @@
+4
+4
+4
+4
+4
diff --git a/tests/queries/0_stateless/01143_trivial_count_with_join.sql b/tests/queries/0_stateless/01143_trivial_count_with_join.sql
new file mode 100644
index 00000000000..d31750e37dc
--- /dev/null
+++ b/tests/queries/0_stateless/01143_trivial_count_with_join.sql
@@ -0,0 +1,10 @@
+drop table if exists t;
+create table t engine Memory as select * from numbers(2);
+
+select count(*) from t, numbers(2) r;
+select count(*) from t cross join numbers(2) r;
+select count() from t cross join numbers(2) r;
+select count(t.number) from t cross join numbers(2) r;
+select count(r.number) from t cross join numbers(2) r;
+
+drop table t;
diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh
index e258f7faafa..1367b68a61c 100755
--- a/tests/queries/0_stateless/01268_procfs_metrics.sh
+++ b/tests/queries/0_stateless/01268_procfs_metrics.sh
@@ -17,14 +17,16 @@ function read_numbers_func()
 
 function show_processes_func()
 {
-    sleep 0.1;
-    
-    # These two system metrics for the generating query above are guaranteed to be nonzero when ProcFS is mounted at /proc
-    $CLICKHOUSE_CLIENT -q "
-        SELECT count() > 0 FROM system.processes\
-        WHERE has(ProfileEvents.Names, 'OSCPUVirtualTimeMicroseconds') AND has(ProfileEvents.Names, 'OSReadChars')\
-        SETTINGS max_threads = 1
-    ";
+    while true; do
+        sleep 0.1;
+
+        # These two system metrics for the generating query above are guaranteed to be nonzero when ProcFS is mounted at /proc
+        $CLICKHOUSE_CLIENT -q "
+            SELECT count() > 0 FROM system.processes\
+            WHERE has(ProfileEvents.Names, 'OSCPUVirtualTimeMicroseconds') AND has(ProfileEvents.Names, 'OSReadChars')\
+            SETTINGS max_threads = 1
+        " | grep '1' && break;
+    done
 }
 
 
diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.sql b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.sql
index 3550ed64e8c..d0e8fa426cf 100644
--- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.sql
+++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func.sql
@@ -1,9 +1,11 @@
-set optimize_arithmetic_operations_in_agr_func = 1;
+set optimize_arithmetic_operations_in_aggregate_functions = 1;
+
 SELECT sum(number * -3) + min(2 * number * -3) - max(-1 * -2 * number * -3) FROM numbers(10000000);
 SELECT max(log(2) * number) FROM numbers(10000000);
 SELECT round(max(log(2) * 3 * sin(0.3) * number * 4)) FROM numbers(10000000);
 
-set optimize_arithmetic_operations_in_agr_func = 0;
+set optimize_arithmetic_operations_in_aggregate_functions = 0;
+
 SELECT sum(number * -3) + min(2 * number * -3) - max(-1 * -2 * number * -3) FROM numbers(10000000);
 SELECT max(log(2) * number) FROM numbers(10000000);
 SELECT round(max(log(2) * 3 * sin(0.3) * number * 4)) FROM numbers(10000000);
diff --git a/tests/queries/0_stateless/01291_aggregation_in_order.reference b/tests/queries/0_stateless/01291_aggregation_in_order.reference
new file mode 100644
index 00000000000..c072a8aed3e
--- /dev/null
+++ b/tests/queries/0_stateless/01291_aggregation_in_order.reference
@@ -0,0 +1,41 @@
+1	1
+1	2
+1	3
+1	4
+1	5
+1	6
+2	1
+2	2
+2	3
+2	4
+1
+2
+1	1	101	1
+1	2	102	1
+1	3	103	1
+1	4	104	1
+1	5	104	1
+1	6	105	1
+2	1	213	2
+2	2	107	2
+2	3	108	2
+2	4	109	2
+1	619	1
+2	537	2
+1	619	1
+2	537	2
+2019-05-05 00:00:00	-45363190
+2019-05-05 00:00:00	-1249512288
+2019-05-05 00:00:00	345522721
+2019-05-05 00:00:00	486601715
+2019-05-05 00:00:00	1449669396
+2019-05-05 00:00:00	45
+2019-05-06 00:00:00	46
+2019-05-07 00:00:00	47
+2019-05-08 00:00:00	48
+2019-05-09 00:00:00	49
+2019-05-05 00:00:00	0	1900940608
+2019-05-06 00:00:00	1	1857737272
+2019-05-07 00:00:00	2	1996614413
+2019-05-08 00:00:00	3	1873725230
+2019-05-09 00:00:00	4	1831412253
diff --git a/tests/queries/0_stateless/01291_aggregation_in_order.sql b/tests/queries/0_stateless/01291_aggregation_in_order.sql
new file mode 100644
index 00000000000..753075f2757
--- /dev/null
+++ b/tests/queries/0_stateless/01291_aggregation_in_order.sql
@@ -0,0 +1,33 @@
+DROP TABLE IF EXISTS pk_order;
+
+SET optimize_aggregation_in_order = 1;
+
+CREATE TABLE pk_order(a UInt64, b UInt64, c UInt64, d UInt64) ENGINE=MergeTree() ORDER BY (a, b);
+INSERT INTO pk_order(a, b, c, d) VALUES (1, 1, 101, 1), (1, 2, 102, 1), (1, 3, 103, 1), (1, 4, 104, 1);
+INSERT INTO pk_order(a, b, c, d) VALUES (1, 5, 104, 1), (1, 6, 105, 1), (2, 1, 106, 2), (2, 1, 107, 2);
+INSERT INTO pk_order(a, b, c, d) VALUES (2, 2, 107, 2), (2, 3, 108, 2), (2, 4, 109, 2);
+
+-- Order after group by in order is determined
+
+SELECT a, b FROM pk_order GROUP BY a, b;
+SELECT a FROM pk_order GROUP BY a;
+
+SELECT a, b, sum(c), avg(d) FROM pk_order GROUP BY a, b;
+SELECT a, sum(c), avg(d) FROM pk_order GROUP BY a;
+SELECT a, sum(c), avg(d) FROM pk_order GROUP BY -a;
+
+DROP TABLE IF EXISTS pk_order;
+
+CREATE TABLE pk_order (d DateTime, a Int32, b Int32) ENGINE = MergeTree ORDER BY (d, a)
+    PARTITION BY toDate(d) SETTINGS index_granularity=1;
+
+INSERT INTO pk_order
+    SELECT toDateTime('2019-05-05 00:00:00') + INTERVAL number % 10 DAY, number, intHash32(number) from numbers(100);
+
+set max_block_size = 1;
+
+SELECT d, max(b) FROM pk_order GROUP BY d, a LIMIT 5;
+SELECT d, avg(a) FROM pk_order GROUP BY toString(d) LIMIT 5;
+SELECT toStartOfHour(d) as d1, min(a), max(b) FROM pk_order GROUP BY d1 LIMIT 5;
+
+DROP TABLE pk_order;
diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.reference b/tests/queries/0_stateless/01293_system_distribution_queue.reference
new file mode 100644
index 00000000000..a2c1e5f2a7b
--- /dev/null
+++ b/tests/queries/0_stateless/01293_system_distribution_queue.reference
@@ -0,0 +1,6 @@
+INSERT
+1	0	1	1
+FLUSH
+1	0	0	0
+UNBLOCK
+0	0	0	0
diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.sql b/tests/queries/0_stateless/01293_system_distribution_queue.sql
new file mode 100644
index 00000000000..c0ff6a21e8e
--- /dev/null
+++ b/tests/queries/0_stateless/01293_system_distribution_queue.sql
@@ -0,0 +1,21 @@
+drop table if exists null_01293;
+drop table if exists dist_01293;
+
+create table null_01293 (key Int) engine=Null();
+create table dist_01293 as null_01293 engine=Distributed(test_cluster_two_shards, currentDatabase(), null_01293, key);
+
+-- no rows, since no active monitor
+select * from system.distribution_queue;
+
+select 'INSERT';
+system stop distributed sends dist_01293;
+insert into dist_01293 select * from numbers(10);
+select is_blocked, error_count, data_files, data_compressed_bytes>100 from system.distribution_queue;
+system flush distributed dist_01293;
+
+select 'FLUSH';
+select is_blocked, error_count, data_files, data_compressed_bytes from system.distribution_queue;
+
+select 'UNBLOCK';
+system start distributed sends dist_01293;
+select is_blocked, error_count, data_files, data_compressed_bytes from system.distribution_queue;
diff --git a/tests/queries/0_stateless/01294_system_distributed_on_cluster.reference b/tests/queries/0_stateless/01294_system_distributed_on_cluster.reference
new file mode 100644
index 00000000000..a8b5d159c9c
--- /dev/null
+++ b/tests/queries/0_stateless/01294_system_distributed_on_cluster.reference
@@ -0,0 +1,3 @@
+localhost	9000	0		0	0
+localhost	9000	0		0	0
+localhost	9000	0		0	0
diff --git a/tests/queries/0_stateless/01294_system_distributed_on_cluster.sql b/tests/queries/0_stateless/01294_system_distributed_on_cluster.sql
new file mode 100644
index 00000000000..d56bddba3c6
--- /dev/null
+++ b/tests/queries/0_stateless/01294_system_distributed_on_cluster.sql
@@ -0,0 +1,21 @@
+-- just a smoke test
+
+-- quirk for ON CLUSTER does not uses currentDatabase()
+drop database if exists db_01294;
+create database db_01294;
+
+drop table if exists db_01294.dist_01294;
+create table db_01294.dist_01294 as system.one engine=Distributed(test_shard_localhost, system, one);
+-- flush
+system flush distributed db_01294.dist_01294;
+system flush distributed on cluster test_shard_localhost db_01294.dist_01294;
+-- stop
+system stop distributed sends;
+system stop distributed sends db_01294.dist_01294;
+system stop distributed sends on cluster test_shard_localhost db_01294.dist_01294;
+-- start
+system start distributed sends;
+system start distributed sends db_01294.dist_01294;
+system start distributed sends on cluster test_shard_localhost db_01294.dist_01294;
+
+drop database db_01294;
diff --git a/tests/queries/0_stateless/01295_aggregation_bug_11413.reference b/tests/queries/0_stateless/01295_aggregation_bug_11413.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01295_aggregation_bug_11413.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01295_aggregation_bug_11413.sql b/tests/queries/0_stateless/01295_aggregation_bug_11413.sql
new file mode 100644
index 00000000000..ec43be9eab3
--- /dev/null
+++ b/tests/queries/0_stateless/01295_aggregation_bug_11413.sql
@@ -0,0 +1 @@
+SELECT 1 FROM remote('127.0.0.{1,2}', numbers(99)) GROUP BY materialize(1) HAVING count() > 0 AND argMax(1, tuple(0))
diff --git a/tests/queries/0_stateless/01296_codecs_bad_arguments.reference b/tests/queries/0_stateless/01296_codecs_bad_arguments.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01296_codecs_bad_arguments.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01296_codecs_bad_arguments.sql b/tests/queries/0_stateless/01296_codecs_bad_arguments.sql
new file mode 100644
index 00000000000..d7eb53300ec
--- /dev/null
+++ b/tests/queries/0_stateless/01296_codecs_bad_arguments.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS delta_table;
+DROP TABLE IF EXISTS zstd_table;
+DROP TABLE IF EXISTS lz4_table;
+
+CREATE TABLE delta_table (`id` UInt64 CODEC(Delta(tuple()))) ENGINE = MergeTree() ORDER BY tuple(); --{serverError 433}
+CREATE TABLE zstd_table (`id` UInt64 CODEC(ZSTD(tuple()))) ENGINE = MergeTree() ORDER BY tuple(); --{serverError 433}
+CREATE TABLE lz4_table (`id` UInt64 CODEC(LZ4HC(tuple()))) ENGINE = MergeTree() ORDER BY tuple(); --{serverError 433}
+
+CREATE TABLE lz4_table (`id` UInt64 CODEC(LZ4(tuple()))) ENGINE = MergeTree() ORDER BY tuple(); --{serverError 378}
+
+SELECT 1;
+
+DROP TABLE IF EXISTS delta_table;
+DROP TABLE IF EXISTS zstd_table;
+DROP TABLE IF EXISTS lz4_table;
diff --git a/tests/queries/0_stateless/01296_pipeline_stuck.reference b/tests/queries/0_stateless/01296_pipeline_stuck.reference
new file mode 100644
index 00000000000..ed8de641763
--- /dev/null
+++ b/tests/queries/0_stateless/01296_pipeline_stuck.reference
@@ -0,0 +1,13 @@
+1
+INSERT SELECT
+1
+1
+INSERT SELECT max_threads
+1
+1
+1
+INSERT SELECT max_insert_threads max_threads
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/01296_pipeline_stuck.sql b/tests/queries/0_stateless/01296_pipeline_stuck.sql
new file mode 100644
index 00000000000..eeb67362634
--- /dev/null
+++ b/tests/queries/0_stateless/01296_pipeline_stuck.sql
@@ -0,0 +1,18 @@
+drop table if exists data_01295;
+create table data_01295 (key Int) Engine=AggregatingMergeTree() order by key;
+
+insert into data_01295 values (1);
+select * from data_01295;
+
+select 'INSERT SELECT';
+insert into data_01295 select * from data_01295; -- no stuck for now
+select * from data_01295;
+
+select 'INSERT SELECT max_threads';
+insert into data_01295 select * from data_01295 final settings max_threads=2; -- stuck with multiple threads
+select * from data_01295;
+
+select 'INSERT SELECT max_insert_threads max_threads';
+set max_insert_threads=2;
+insert into data_01295 select * from data_01295 final settings max_threads=2; -- no stuck for now
+select * from data_01295;
diff --git a/tests/queries/0_stateless/01297_alter_distributed.reference b/tests/queries/0_stateless/01297_alter_distributed.reference
new file mode 100644
index 00000000000..bd269322884
--- /dev/null
+++ b/tests/queries/0_stateless/01297_alter_distributed.reference
@@ -0,0 +1,18 @@
+CounterID	UInt32					
+dummy	String					
+StartDate	Date					
+Sign	Int8					
+VisitID	UInt64					
+UserID	UInt64					
+StartTime	DateTime					
+ClickLogID	UInt64					
+CREATE TABLE default.merge_distributed\n(\n    `CounterID` UInt32, \n    `dummy` String, \n    `StartDate` Date, \n    `Sign` Int8, \n    `VisitID` UInt64, \n    `UserID` UInt64, \n    `StartTime` DateTime, \n    `ClickLogID` UInt64\n)\nENGINE = Distributed(\'test_shard_localhost\', \'default\', \'merge_distributed1\')
+1	Hello, Alter Table!
+CounterID	UInt32					
+StartDate	Date					
+Sign	Int8					
+VisitID	UInt64					
+UserID	UInt64					
+StartTime	DateTime					
+ClickLogID	UInt64					
+CREATE TABLE default.merge_distributed\n(\n    `CounterID` UInt32, \n    `StartDate` Date, \n    `Sign` Int8, \n    `VisitID` UInt64, \n    `UserID` UInt64, \n    `StartTime` DateTime, \n    `ClickLogID` UInt64\n)\nENGINE = Distributed(\'test_shard_localhost\', \'default\', \'merge_distributed1\')
diff --git a/tests/queries/0_stateless/01297_alter_distributed.sql b/tests/queries/0_stateless/01297_alter_distributed.sql
new file mode 100644
index 00000000000..d5359cc5ea8
--- /dev/null
+++ b/tests/queries/0_stateless/01297_alter_distributed.sql
@@ -0,0 +1,28 @@
+drop table if exists merge_distributed;
+drop table if exists merge_distributed1;
+
+create table merge_distributed1 ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
+insert into merge_distributed1 values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
+
+create table merge_distributed ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = Distributed(test_shard_localhost, currentDatabase(), merge_distributed1);
+
+alter table merge_distributed1 add column dummy String after CounterID;
+alter table merge_distributed add column dummy String after CounterID;
+
+describe table merge_distributed;
+show create table merge_distributed;
+
+insert into merge_distributed1 values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
+select CounterID, dummy from merge_distributed where dummy <> '' limit 10;
+
+alter table merge_distributed drop column dummy;
+
+describe table merge_distributed;
+show create table merge_distributed;
+
+--error: should fall, because there is no `dummy1` column
+alter table merge_distributed add column dummy1 String after CounterID;
+select CounterID, dummy1 from merge_distributed where dummy1 <> '' limit 10; -- { serverError 47 }
+
+drop table merge_distributed;
+drop table merge_distributed1;
diff --git a/tests/queries/0_stateless/01298_alter_merge.reference b/tests/queries/0_stateless/01298_alter_merge.reference
new file mode 100644
index 00000000000..393c0a600ff
--- /dev/null
+++ b/tests/queries/0_stateless/01298_alter_merge.reference
@@ -0,0 +1,17 @@
+CounterID	UInt32					
+dummy	String					
+StartDate	Date					
+Sign	Int8					
+VisitID	UInt64					
+UserID	UInt64					
+StartTime	DateTime					
+ClickLogID	UInt64					
+CREATE TABLE default.merge\n(\n    `CounterID` UInt32, \n    `dummy` String, \n    `StartDate` Date, \n    `Sign` Int8, \n    `VisitID` UInt64, \n    `UserID` UInt64, \n    `StartTime` DateTime, \n    `ClickLogID` UInt64\n)\nENGINE = Merge(\'default\', \'merge\\\\[0-9\\\\]\')
+CounterID	UInt32					
+StartDate	Date					
+Sign	Int8					
+VisitID	UInt64					
+UserID	UInt64					
+StartTime	DateTime					
+ClickLogID	UInt64					
+CREATE TABLE default.merge\n(\n    `CounterID` UInt32, \n    `StartDate` Date, \n    `Sign` Int8, \n    `VisitID` UInt64, \n    `UserID` UInt64, \n    `StartTime` DateTime, \n    `ClickLogID` UInt64\n)\nENGINE = Merge(\'default\', \'merge\\\\[0-9\\\\]\')
diff --git a/tests/queries/0_stateless/01298_alter_merge.sql b/tests/queries/0_stateless/01298_alter_merge.sql
new file mode 100644
index 00000000000..86c89c38c8c
--- /dev/null
+++ b/tests/queries/0_stateless/01298_alter_merge.sql
@@ -0,0 +1,36 @@
+drop table if exists merge;
+drop table if exists merge1;
+drop table if exists merge2;
+
+create table merge1 ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
+insert into merge1 values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
+
+create table merge2 ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
+insert into merge2 values (2, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
+
+create table merge ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = Merge(currentDatabase(), 'merge\[0-9\]');
+
+alter table merge1 add column dummy String after CounterID;
+alter table merge2 add column dummy String after CounterID;
+alter table merge add column dummy String after CounterID;
+
+describe table merge;
+show create table merge;
+
+insert into merge1 values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3);
+
+select CounterID, dummy from merge where dummy <> '' limit 10;
+
+
+alter table merge drop column dummy;
+
+describe table merge;
+show create table merge;
+
+--error: must correctly fall into the alter
+alter table merge add column dummy1 String after CounterID;
+select CounterID, dummy1 from merge where dummy1 <> '' limit 10;
+
+drop table merge;
+drop table merge1;
+drop table merge2;
diff --git a/tests/queries/0_stateless/01299_alter_merge_tree.reference b/tests/queries/0_stateless/01299_alter_merge_tree.reference
new file mode 100644
index 00000000000..d641115026b
--- /dev/null
+++ b/tests/queries/0_stateless/01299_alter_merge_tree.reference
@@ -0,0 +1,16 @@
+CounterID	UInt32					
+dummy	String					
+StartDate	Date					
+Sign	Int8					
+VisitID	UInt64					
+UserID	UInt64					
+StartTime	DateTime					
+ClickLogID	UInt64					
+1	Hello, Alter Table!
+CounterID	UInt32					
+StartDate	Date					
+Sign	Int8					
+VisitID	UInt64					
+UserID	UInt64					
+StartTime	DateTime					
+ClickLogID	UInt64					
diff --git a/tests/queries/0_stateless/01299_alter_merge_tree.sql b/tests/queries/0_stateless/01299_alter_merge_tree.sql
new file mode 100644
index 00000000000..87608e6d15a
--- /dev/null
+++ b/tests/queries/0_stateless/01299_alter_merge_tree.sql
@@ -0,0 +1,17 @@
+drop table if exists merge_tree;
+
+create table merge_tree ( CounterID UInt32,  StartDate Date,  Sign Int8,  VisitID UInt64,  UserID UInt64,  StartTime DateTime,   ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign);
+
+insert into merge_tree values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3)
+alter table merge_tree add column dummy String after CounterID;
+describe table merge_tree;
+
+insert into merge_tree values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3)
+
+select CounterID, dummy from merge_tree where dummy <> '' limit 10;
+
+alter table merge_tree drop column dummy;
+
+describe table merge_tree;
+
+drop table merge_tree;
diff --git a/tests/integration/test_s3_with_proxy/proxy-resolver/__init__.py b/tests/queries/0_stateless/01300_client_save_history_when_terminated.reference
similarity index 100%
rename from tests/integration/test_s3_with_proxy/proxy-resolver/__init__.py
rename to tests/queries/0_stateless/01300_client_save_history_when_terminated.reference
diff --git a/tests/queries/0_stateless/01300_client_save_history_when_terminated.sh b/tests/queries/0_stateless/01300_client_save_history_when_terminated.sh
new file mode 100755
index 00000000000..5ffcbbda883
--- /dev/null
+++ b/tests/queries/0_stateless/01300_client_save_history_when_terminated.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 60
+match_max 100000
+
+spawn clickhouse-client
+expect ":) "
+
+# Make a query
+send -- "SELECT 'for the history'\r"
+expect "for the history"
+expect ":) "
+
+# Kill the client to check if the history was saved
+exec kill -9 [exp_pid]
+close
+
+# Run client one more time and press "up" to see the last recorded query
+spawn clickhouse-client
+expect ":) "
+send -- "\[A"
+expect "SELECT 'for the history'"
+
+# Will check that Ctrl+C clears current line.
+send -- "\3"
+expect ":)"
+
+# Will check that second Ctrl+C invocation does not exit from client.
+send -- "\3"
+expect ":)"
+
+# But Ctrl+D does.
+send -- "\4"
+expect eof
diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference
new file mode 100644
index 00000000000..b20e7415f52
--- /dev/null
+++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference
@@ -0,0 +1,2 @@
+Memory limit (for query) exceeded
+Ok
diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
new file mode 100755
index 00000000000..633fa5ce315
--- /dev/null
+++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+function test()
+{
+    for i in {1..1000}; do
+        $CLICKHOUSE_CLIENT --max_memory_usage 1G <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10";
+    done
+}
+
+export -f test;
+
+# If the memory leak exists, it will lead to OOM fairly quickly.
+timeout 30 bash -c test 2>&1 | grep -o -F 'Memory limit (for query) exceeded' | uniq
+echo 'Ok'
diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.reference
new file mode 100644
index 00000000000..7326d960397
--- /dev/null
+++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.reference
@@ -0,0 +1 @@
+Ok
diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh
new file mode 100755
index 00000000000..cd2fec408ab
--- /dev/null
+++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+function test()
+{
+    for i in {1..250}; do
+        $CLICKHOUSE_CLIENT --query "SELECT groupArrayIfState(('Hello, world' AS s) || s || s || s || s || s || s || s || s || s, NOT throwIf(number > 10000000, 'Ok')) FROM system.numbers_mt GROUP BY number % 10";
+    done
+}
+
+export -f test;
+
+# If the memory leak exists, it will lead to OOM fairly quickly.
+timeout 30 bash -c test 2>&1 | grep -o -F 'Ok' | uniq
diff --git a/tests/queries/1_stateful/00004_top_counters.reference b/tests/queries/1_stateful/00004_top_counters.reference
index cf2824e45b0..e2d584170c0 100644
--- a/tests/queries/1_stateful/00004_top_counters.reference
+++ b/tests/queries/1_stateful/00004_top_counters.reference
@@ -8,3 +8,13 @@
 59183	85379
 33010362	77807
 800784	77492
+1704509	523264
+732797	475698
+598875	337212
+792887	252197
+3807842	196036
+25703952	147211
+716829	90109
+59183	85379
+33010362	77807
+800784	77492
diff --git a/tests/queries/1_stateful/00004_top_counters.sql b/tests/queries/1_stateful/00004_top_counters.sql
index 045f940da42..abdd5ac794a 100644
--- a/tests/queries/1_stateful/00004_top_counters.sql
+++ b/tests/queries/1_stateful/00004_top_counters.sql
@@ -1 +1,2 @@
-SELECT CounterID, count() AS c FROM test.hits GROUP BY CounterID ORDER BY c DESC LIMIT 10
+SELECT CounterID, count() AS c FROM test.hits GROUP BY CounterID ORDER BY c DESC LIMIT 10;
+SELECT CounterID, count() AS c FROM test.hits GROUP BY CounterID ORDER BY c DESC LIMIT 10 SETTINGS optimize_aggregation_in_order = 1
diff --git a/tests/queries/1_stateful/00047_bar.reference b/tests/queries/1_stateful/00047_bar.reference
index 61999ae73c9..c038f59946e 100644
--- a/tests/queries/1_stateful/00047_bar.reference
+++ b/tests/queries/1_stateful/00047_bar.reference
@@ -98,3 +98,103 @@
 7901143	10022	█▌
 194599	9997	█▌
 21052498	9780	█▍
+1704509	523264	████████████████████████████████████████████████████████████████████████████████
+732797	475698	████████████████████████████████████████████████████████████████████████▋
+598875	337212	███████████████████████████████████████████████████▌
+792887	252197	██████████████████████████████████████▌
+3807842	196036	█████████████████████████████▊
+25703952	147211	██████████████████████▌
+716829	90109	█████████████▋
+59183	85379	█████████████
+33010362	77807	███████████▊
+800784	77492	███████████▋
+20810645	73213	███████████▏
+25843850	68945	██████████▌
+23447120	67570	██████████▎
+14739804	64174	█████████▋
+32077710	60456	█████████▏
+22446879	58389	████████▊
+170282	57017	████████▋
+11482817	52345	████████
+63469	52142	███████▊
+29103473	47758	███████▎
+10136747	44080	██████▋
+27528801	43395	██████▋
+10581377	43279	██████▌
+9841201	40581	██████▏
+20310963	37562	█████▋
+17337667	34301	█████▏
+28600281	32776	█████
+32046685	28788	████▍
+10130880	26603	████
+8676831	25733	███▊
+53230	25595	███▊
+20271226	25585	███▊
+17420663	25496	███▊
+631207	25270	███▋
+633130	24744	███▋
+14324015	23349	███▌
+8537965	21270	███▎
+11285298	20825	███▏
+14937615	20788	███▏
+185050	20785	███▏
+16368233	19897	███
+81602	19724	███
+62896	19717	███
+12967664	19402	██▊
+15996597	18557	██▋
+4379238	18370	██▋
+90982	17443	██▋
+18211045	17390	██▋
+14625884	17302	██▋
+12864910	17279	██▋
+126096	16959	██▌
+30296134	16849	██▌
+26360482	16175	██▍
+17788950	16017	██▍
+5928716	15340	██▎
+15469035	15171	██▎
+29732125	15146	██▎
+32946244	15104	██▎
+20957241	14719	██▎
+9495695	14584	██▏
+29241146	14540	██▏
+109805	14199	██▏
+26905788	13972	██▏
+212019	13930	██▏
+171509	13792	██
+23913162	13615	██
+1861993	13509	██
+125776	13308	██
+11312316	13181	██
+32667326	13181	██
+28628973	12922	█▊
+122804	12520	█▊
+12322758	12352	█▊
+1301819	12283	█▊
+10769545	12183	█▋
+21566939	12170	█▋
+28905364	12158	█▋
+4250765	12049	█▋
+15009727	11818	█▋
+12761932	11733	█▋
+26995888	11658	█▋
+12759346	11514	█▋
+1507911	11452	█▋
+968488	11444	█▋
+15736172	11358	█▋
+54310	11193	█▋
+17027391	11047	█▋
+17439919	10936	█▋
+4480860	10747	█▋
+26738469	10738	█▋
+9986231	10656	█▋
+1539995	10655	█▋
+214556	10625	█▌
+219339	10522	█▌
+3266	10503	█▌
+30563429	10128	█▌
+1960469	10098	█▌
+7901143	10022	█▌
+194599	9997	█▌
+21052498	9780	█▍
diff --git a/tests/queries/1_stateful/00047_bar.sql b/tests/queries/1_stateful/00047_bar.sql
index c7310763525..37c420b91ff 100644
--- a/tests/queries/1_stateful/00047_bar.sql
+++ b/tests/queries/1_stateful/00047_bar.sql
@@ -1 +1,2 @@
-SELECT CounterID, count() AS c, bar(c, 0, 523264) FROM test.hits GROUP BY CounterID ORDER BY c DESC, CounterID ASC LIMIT 100
+SELECT CounterID, count() AS c, bar(c, 0, 523264) FROM test.hits GROUP BY CounterID ORDER BY c DESC, CounterID ASC LIMIT 100;
+SELECT CounterID, count() AS c, bar(c, 0, 523264) FROM test.hits GROUP BY CounterID ORDER BY c DESC, CounterID ASC LIMIT 100 SETTINGS optimize_aggregation_in_order = 1
diff --git a/tests/queries/1_stateful/00049_max_string_if.reference b/tests/queries/1_stateful/00049_max_string_if.reference
index f87bc6d1fd2..6897a773c87 100644
--- a/tests/queries/1_stateful/00049_max_string_if.reference
+++ b/tests/queries/1_stateful/00049_max_string_if.reference
@@ -18,3 +18,23 @@
 11482817	52345	я скачать игры
 63469	52142	яндекс марте рокус надписями я любимому у полосы фото минск
 29103473	47758	
+1704509	523264	نيك امريكي نيك افلام سكس جامد
+732797	475698	نيك سكس سيحاق
+598875	337212	سکس باصات
+792887	252197	№2267 отзыв
+3807842	196036	ярмаркетовара 200кг купить по неделю тебелье
+25703952	147211	
+716829	90109	яндекс повыш
+59183	85379	франция машину угловы крузер из кофе
+33010362	77807	ярмаркетовара 200кг купить по неделю тебелье
+800784	77492	ярмаркур смерти теплицы из чего
+20810645	73213	ярмаркетовара 200кг купить по неделю тебе перево метиков детский
+25843850	68945	электросчет-фактура
+23447120	67570	южная степанов
+14739804	64174	штангал волк
+32077710	60456	
+22446879	58389	فیلم سكس امريكي نيك
+170282	57017	ل افلام السكس
+11482817	52345	я скачать игры
+63469	52142	яндекс марте рокус надписями я любимому у полосы фото минск
+29103473	47758	
diff --git a/tests/queries/1_stateful/00049_max_string_if.sql b/tests/queries/1_stateful/00049_max_string_if.sql
index af87123ef02..5c6d4274bab 100644
--- a/tests/queries/1_stateful/00049_max_string_if.sql
+++ b/tests/queries/1_stateful/00049_max_string_if.sql
@@ -1 +1,2 @@
-SELECT CounterID, count(), maxIf(SearchPhrase, notEmpty(SearchPhrase)) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20
+SELECT CounterID, count(), maxIf(SearchPhrase, notEmpty(SearchPhrase)) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20;
+SELECT CounterID, count(), maxIf(SearchPhrase, notEmpty(SearchPhrase)) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20 SETTINGS optimize_aggregation_in_order = 1
diff --git a/tests/queries/1_stateful/00050_min_max.reference b/tests/queries/1_stateful/00050_min_max.reference
index ab47fd7a69a..91473c4ea17 100644
--- a/tests/queries/1_stateful/00050_min_max.reference
+++ b/tests/queries/1_stateful/00050_min_max.reference
@@ -18,3 +18,23 @@
 11482817	4611708000353743073	9223337838355779113
 63469	4611695097019173921	9223353530156141191
 29103473	4611744585914335132	9223333530281362537
+1704509	4611700827100483880	9223360787015464643
+732797	4611701940806302259	9223355550934604746
+598875	4611701407242345792	9223362250391155632
+792887	4611699550286611812	9223290551912005343
+3807842	4611710821592843606	9223326163906184987
+25703952	4611709443519524003	9223353913449113943
+716829	4611852156092872082	9223361623076951140
+59183	4611730685242027332	9223354909338698162
+33010362	4611704682869732882	9223268545373999677
+800784	4611752907938305166	9223340418389788041
+20810645	4611712185532639162	9223218900001937412
+25843850	4611690025407720929	9223346023778617822
+23447120	4611796031755620254	9223329309291309758
+14739804	4611692230555590277	9223313509005166531
+32077710	4611884228437061959	9223352444952988904
+22446879	4611846229717089436	9223124373140579096
+170282	4611833225706935900	9223371583739401906
+11482817	4611708000353743073	9223337838355779113
+63469	4611695097019173921	9223353530156141191
+29103473	4611744585914335132	9223333530281362537
diff --git a/tests/queries/1_stateful/00050_min_max.sql b/tests/queries/1_stateful/00050_min_max.sql
index 4c45f6fffa6..1ca93a5d620 100644
--- a/tests/queries/1_stateful/00050_min_max.sql
+++ b/tests/queries/1_stateful/00050_min_max.sql
@@ -1 +1,2 @@
-SELECT CounterID, min(WatchID), max(WatchID) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20
+SELECT CounterID, min(WatchID), max(WatchID) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20;
+SELECT CounterID, min(WatchID), max(WatchID) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20 SETTINGS optimize_aggregation_in_order = 1
diff --git a/tests/queries/1_stateful/00051_min_max_array.reference b/tests/queries/1_stateful/00051_min_max_array.reference
index a5f1b6cdfef..b5555954099 100644
--- a/tests/queries/1_stateful/00051_min_max_array.reference
+++ b/tests/queries/1_stateful/00051_min_max_array.reference
@@ -18,3 +18,23 @@
 11482817	52345	[]	[]	[]
 63469	52142	[]	[]	[]
 29103473	47758	[6185451]	[]	[6185451]
+1704509	523264	[271264]	[]	[271264]
+732797	475698	[]	[]	[]
+598875	337212	[]	[]	[]
+792887	252197	[2094893,2028343]	[]	[1272031]
+3807842	196036	[1710269]	[]	[1134660]
+25703952	147211	[]	[]	[]
+716829	90109	[4186138]	[]	[1800405]
+59183	85379	[]	[]	[]
+33010362	77807	[]	[]	[]
+800784	77492	[4002316]	[]	[1270480]
+20810645	73213	[]	[]	[]
+25843850	68945	[4028285]	[]	[4028285]
+23447120	67570	[6503091,2762273]	[]	[2098132]
+14739804	64174	[4180720]	[]	[664490]
+32077710	60456	[]	[]	[]
+22446879	58389	[]	[]	[]
+170282	57017	[4166114]	[]	[34386,1240412,1248634,1616213,2928740,1458582]
+11482817	52345	[]	[]	[]
+63469	52142	[]	[]	[]
+29103473	47758	[6185451]	[]	[6185451]
diff --git a/tests/queries/1_stateful/00051_min_max_array.sql b/tests/queries/1_stateful/00051_min_max_array.sql
index 1027586372d..adf44fb9c22 100644
--- a/tests/queries/1_stateful/00051_min_max_array.sql
+++ b/tests/queries/1_stateful/00051_min_max_array.sql
@@ -1 +1,2 @@
-SELECT CounterID, count(), max(GoalsReached), min(GoalsReached), minIf(GoalsReached, notEmpty(GoalsReached)) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20
+SELECT CounterID, count(), max(GoalsReached), min(GoalsReached), minIf(GoalsReached, notEmpty(GoalsReached)) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20;
+SELECT CounterID, count(), max(GoalsReached), min(GoalsReached), minIf(GoalsReached, notEmpty(GoalsReached)) FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20 SETTINGS optimize_aggregation_in_order = 1
diff --git a/tests/queries/1_stateful/00087_where_0.sql b/tests/queries/1_stateful/00087_where_0.sql
index c55617d2245..33c325e53b8 100644
--- a/tests/queries/1_stateful/00087_where_0.sql
+++ b/tests/queries/1_stateful/00087_where_0.sql
@@ -1,3 +1,5 @@
 SET max_rows_to_read = 1000;
 SELECT CounterID, uniq(UserID) FROM test.hits WHERE 0 != 0 GROUP BY CounterID;
+SELECT CounterID, uniq(UserID) FROM test.hits WHERE 0 != 0 GROUP BY CounterID SETTINGS optimize_aggregation_in_order = 1;
 SELECT CounterID, uniq(UserID) FROM test.hits WHERE 0 AND CounterID = 1704509 GROUP BY CounterID;
+SELECT CounterID, uniq(UserID) FROM test.hits WHERE 0 AND CounterID = 1704509 GROUP BY CounterID SETTINGS optimize_aggregation_in_order = 1;
diff --git a/tests/queries/1_stateful/00149_quantiles_timing_distributed.reference b/tests/queries/1_stateful/00149_quantiles_timing_distributed.reference
index 8ac5f01c7cc..e31a1e90d87 100644
--- a/tests/queries/1_stateful/00149_quantiles_timing_distributed.reference
+++ b/tests/queries/1_stateful/00149_quantiles_timing_distributed.reference
@@ -1 +1,2 @@
 10726001768429413598
+10726001768429413598
diff --git a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql
index b195518e1e7..dc63cb5867f 100644
--- a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql
+++ b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql
@@ -1 +1,2 @@
 SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID);
+SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1;
diff --git a/tests/queries/1_stateful/00150_quantiles_timing_precision.reference b/tests/queries/1_stateful/00150_quantiles_timing_precision.reference
index 09aaf8449dc..79ef24af591 100644
--- a/tests/queries/1_stateful/00150_quantiles_timing_precision.reference
+++ b/tests/queries/1_stateful/00150_quantiles_timing_precision.reference
@@ -1 +1,2 @@
 4379238	1868	1879	5755	0.006
+4379238	1868	1879	5755	0.006
diff --git a/tests/queries/1_stateful/00150_quantiles_timing_precision.sql b/tests/queries/1_stateful/00150_quantiles_timing_precision.sql
index 7d5b27fafd3..e858bcf34ff 100644
--- a/tests/queries/1_stateful/00150_quantiles_timing_precision.sql
+++ b/tests/queries/1_stateful/00150_quantiles_timing_precision.sql
@@ -1 +1,2 @@
 SELECT CounterID, quantileTiming(0.5)(SendTiming) AS qt, least(30000, quantileExact(0.5)(SendTiming)) AS qe, count() AS c, round(abs(qt - qe) / greatest(qt, qe) AS diff, 3) AS rounded_diff FROM test.hits WHERE SendTiming != -1 GROUP BY CounterID HAVING diff != 0 ORDER BY diff DESC;
+SELECT CounterID, quantileTiming(0.5)(SendTiming) AS qt, least(30000, quantileExact(0.5)(SendTiming)) AS qe, count() AS c, round(abs(qt - qe) / greatest(qt, qe) AS diff, 3) AS rounded_diff FROM test.hits WHERE SendTiming != -1 GROUP BY CounterID HAVING diff != 0 ORDER BY diff DESC SETTINGS optimize_aggregation_in_order = 1;
diff --git a/utils/check-marks/CMakeLists.txt b/utils/check-marks/CMakeLists.txt
index bfb200b8d28..2fc22a925b1 100644
--- a/utils/check-marks/CMakeLists.txt
+++ b/utils/check-marks/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable (check-marks main.cpp)
-target_link_libraries(check-marks PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(check-marks PRIVATE dbms boost::program_options)
diff --git a/utils/compressor/CMakeLists.txt b/utils/compressor/CMakeLists.txt
index df32330a137..43cde973846 100644
--- a/utils/compressor/CMakeLists.txt
+++ b/utils/compressor/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable (decompress_perf decompress_perf.cpp)
-target_link_libraries(decompress_perf PRIVATE dbms ${LZ4_LIBRARY})
+target_link_libraries(decompress_perf PRIVATE dbms lz4)
diff --git a/utils/convert-month-partitioned-parts/CMakeLists.txt b/utils/convert-month-partitioned-parts/CMakeLists.txt
index abfd60a07a0..14853590c76 100644
--- a/utils/convert-month-partitioned-parts/CMakeLists.txt
+++ b/utils/convert-month-partitioned-parts/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable (convert-month-partitioned-parts main.cpp)
-target_link_libraries(convert-month-partitioned-parts PRIVATE dbms clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(convert-month-partitioned-parts PRIVATE dbms clickhouse_parsers boost::program_options)
diff --git a/utils/test-data-generator/CMakeLists.txt b/utils/test-data-generator/CMakeLists.txt
index 758c3cdc0ce..20c37854c0a 100644
--- a/utils/test-data-generator/CMakeLists.txt
+++ b/utils/test-data-generator/CMakeLists.txt
@@ -6,7 +6,7 @@ if (USE_PROTOBUF)
     protobuf_generate_cpp(ProtobufDelimitedMessagesSerializer_Srcs2 ProtobufDelimitedMessagesSerializer_Hdrs2 ${CMAKE_CURRENT_SOURCE_DIR}/../../tests/queries/0_stateless/00825_protobuf_format_syntax2.proto)
     add_executable (ProtobufDelimitedMessagesSerializer ProtobufDelimitedMessagesSerializer.cpp ${ProtobufDelimitedMessagesSerializer_Srcs} ${ProtobufDelimitedMessagesSerializer_Hdrs} ${ProtobufDelimitedMessagesSerializer_Srcs2} ${ProtobufDelimitedMessagesSerializer_Hdrs2})
     target_include_directories (ProtobufDelimitedMessagesSerializer SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
-    target_link_libraries (ProtobufDelimitedMessagesSerializer PRIVATE ${Protobuf_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY})
+    target_link_libraries (ProtobufDelimitedMessagesSerializer PRIVATE ${Protobuf_LIBRARY} boost::program_options)
     get_filename_component(ProtobufDelimitedMessagesSerializer_OutputDir "${CMAKE_CURRENT_LIST_DIR}/../../tests/queries/0_stateless" REALPATH)
     target_compile_definitions(ProtobufDelimitedMessagesSerializer PRIVATE OUTPUT_DIR="${ProtobufDelimitedMessagesSerializer_OutputDir}")
 endif ()
diff --git a/utils/wikistat-loader/CMakeLists.txt b/utils/wikistat-loader/CMakeLists.txt
index 7f72cbb9f46..96567e73790 100644
--- a/utils/wikistat-loader/CMakeLists.txt
+++ b/utils/wikistat-loader/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable (wikistat-loader main.cpp ${SRCS})
-target_link_libraries (wikistat-loader PRIVATE clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries (wikistat-loader PRIVATE clickhouse_common_io boost::program_options)
diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt
index 2fdd87a4412..08907e1c5b9 100644
--- a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt
+++ b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt
@@ -1,3 +1,3 @@
 add_executable (zookeeper-adjust-block-numbers-to-parts main.cpp ${SRCS})
 target_compile_options(zookeeper-adjust-block-numbers-to-parts PRIVATE -Wno-format)
-target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE dbms clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE dbms clickhouse_common_zookeeper boost::program_options)
diff --git a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt
index 34f2e608ef9..7fe7fb94fa4 100644
--- a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt
+++ b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable (zookeeper-create-entry-to-download-part main.cpp ${SRCS})
-target_link_libraries (zookeeper-create-entry-to-download-part PRIVATE dbms clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries (zookeeper-create-entry-to-download-part PRIVATE dbms clickhouse_common_zookeeper boost::program_options)
diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt
index d2947fa8932..9f5da351068 100644
--- a/utils/zookeeper-dump-tree/CMakeLists.txt
+++ b/utils/zookeeper-dump-tree/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable (zookeeper-dump-tree main.cpp ${SRCS})
-target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper clickhouse_common_io boost::program_options)
diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt
index ba112bab9cf..c31b1ec3388 100644
--- a/utils/zookeeper-remove-by-list/CMakeLists.txt
+++ b/utils/zookeeper-remove-by-list/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable (zookeeper-remove-by-list main.cpp ${SRCS})
-target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper boost::program_options)