Merge branch 'master' of github.com:ClickHouse/ClickHouse into docs/CLICKHOUSEDOCS-658-mv-block-settings

2024-11-21 23:21:59 +00:00 · 2020-06-18 22:24:17 +03:00 · 2020-06-18 22:24:17 +03:00 · 13f8c588de
commit 13f8c588de
parent 5ce12d8d2b fe24c715ca
1239 changed files with 25567 additions and 14987 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -157,6 +157,14 @@
 [submodule "contrib/openldap"]
 	path = contrib/openldap
 	url = https://github.com/openldap/openldap.git
+[submodule "contrib/cassandra"]
+	path = contrib/cassandra
+	url = https://github.com/ClickHouse-Extras/cpp-driver.git
+	branch = clickhouse
+[submodule "contrib/libuv"]
+	path = contrib/libuv
+	url = https://github.com/ClickHouse-Extras/libuv.git
+	branch = clickhouse
 [submodule "contrib/fmtlib"]
 	path = contrib/fmtlib
 	url = https://github.com/fmtlib/fmt.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -327,20 +327,16 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE

 include (GNUInstallDirs)
 include (cmake/contrib_finder.cmake)
-include (cmake/lib_name.cmake)

 find_contrib_lib(double-conversion) # Must be before parquet
 include (cmake/find/ssl.cmake)
 include (cmake/find/ldap.cmake) # after ssl
 include (cmake/find/icu.cmake)
-include (cmake/find/boost.cmake)
 include (cmake/find/zlib.cmake)
 include (cmake/find/zstd.cmake)
 include (cmake/find/ltdl.cmake) # for odbc
 include (cmake/find/termcap.cmake)
 # openssl, zlib before poco
-include (cmake/find/lz4.cmake)
-include (cmake/find/xxhash.cmake)
 include (cmake/find/sparsehash.cmake)
 include (cmake/find/re2.cmake)
 include (cmake/find/libgsasl.cmake)
@ -358,17 +354,16 @@ include (cmake/find/hdfs3.cmake) # uses protobuf
 include (cmake/find/s3.cmake)
 include (cmake/find/base64.cmake)
 include (cmake/find/parquet.cmake)
-include (cmake/find/hyperscan.cmake)
 include (cmake/find/simdjson.cmake)
 include (cmake/find/rapidjson.cmake)
 include (cmake/find/fastops.cmake)
 include (cmake/find/orc.cmake)
 include (cmake/find/avro.cmake)
 include (cmake/find/msgpack.cmake)
+include (cmake/find/cassandra.cmake)

 find_contrib_lib(cityhash)
 find_contrib_lib(farmhash)
-find_contrib_lib(metrohash)
 find_contrib_lib(btrie)

 if (ENABLE_TESTS)
--- a/README.md
+++ b/README.md
@ -10,10 +10,12 @@ ClickHouse is an open-source column-oriented database management system that all
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
 * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
 * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
+* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
 * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
 * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.

 ## Upcoming Events

+* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020.
 * [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
 * [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.
--- a/base/common/CMakeLists.txt
+++ b/base/common/CMakeLists.txt
@ -16,6 +16,7 @@ set (SRCS
    shift10.cpp
    sleep.cpp
    terminalColors.cpp
+    errnoToString.cpp
 )

 if (ENABLE_REPLXX)
@ -43,10 +44,6 @@ endif()

 target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..)

-if (NOT USE_INTERNAL_BOOST_LIBRARY)
-    target_include_directories (common SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
-endif ()
-
 # Allow explicit fallback to readline
 if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
    message (STATUS "Attempt to fallback to readline explicitly")
@ -72,7 +69,8 @@ endif ()
 target_link_libraries (common
    PUBLIC
        ${CITYHASH_LIBRARIES}
-        ${Boost_SYSTEM_LIBRARY}
+        boost::headers_only
+        boost::system
        FastMemcpy
        Poco::Net
        Poco::Net::SSL
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@ -1,9 +1,11 @@
 #include <common/ReplxxLineReader.h>
+#include <common/errnoToString.h>

 #include <errno.h>
 #include <string.h>
 #include <unistd.h>
 #include <functional>
+#include <sys/file.h>

 namespace
 {
@ -17,14 +19,41 @@ void trim(String & s)
 }

 ReplxxLineReader::ReplxxLineReader(
-    const Suggest & suggest, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
-    : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_))
+    const Suggest & suggest,
+    const String & history_file_path_,
+    bool multiline_,
+    Patterns extenders_,
+    Patterns delimiters_,
+    replxx::Replxx::highlighter_callback_t highlighter_)
+    : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
 {
    using namespace std::placeholders;
    using Replxx = replxx::Replxx;

    if (!history_file_path.empty())
-        rx.history_load(history_file_path);
+    {
+        history_file_fd = open(history_file_path.c_str(), O_RDWR);
+        if (history_file_fd < 0)
+        {
+            rx.print("Open of history file failed: %s\n", errnoToString(errno).c_str());
+        }
+        else
+        {
+            if (flock(history_file_fd, LOCK_SH))
+            {
+                rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str());
+            }
+            else
+            {
+                rx.history_load(history_file_path);
+
+                if (flock(history_file_fd, LOCK_UN))
+                {
+                    rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
+                }
+            }
+        }
+    }

    auto callback = [&suggest] (const String & context, size_t context_size)
    {
@ -36,6 +65,9 @@ ReplxxLineReader::ReplxxLineReader(
    rx.set_complete_on_empty(false);
    rx.set_word_break_characters(word_break_characters);

+    if (highlighter)
+        rx.set_highlighter_callback(highlighter);
+
    /// By default C-p/C-n binded to COMPLETE_NEXT/COMPLETE_PREV,
    /// bind C-p/C-n to history-previous/history-next like readline.
    rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
@ -49,8 +81,8 @@ ReplxxLineReader::ReplxxLineReader(

 ReplxxLineReader::~ReplxxLineReader()
 {
-    if (!history_file_path.empty())
-        rx.history_save(history_file_path);
+    if (close(history_file_fd))
+        rx.print("Close of history file failed: %s\n", strerror(errno));
 }

 LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@ -68,7 +100,20 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)

 void ReplxxLineReader::addToHistory(const String & line)
 {
+    // locking history file to prevent from inconsistent concurrent changes
+    bool locked = false;
+    if (flock(history_file_fd, LOCK_EX))
+        rx.print("Lock of history file failed: %s\n", strerror(errno));
+    else
+        locked = true;
+
    rx.history_add(line);
+
+    // flush changes to the disk
+    rx.history_save(history_file_path);
+
+    if (locked && 0 != flock(history_file_fd, LOCK_UN))
+        rx.print("Unlock of history file failed: %s\n", strerror(errno));
 }

 void ReplxxLineReader::enableBracketedPaste()
--- a/base/common/ReplxxLineReader.h
+++ b/base/common/ReplxxLineReader.h
@ -4,10 +4,17 @@

 #include <replxx.hxx>

+
 class ReplxxLineReader : public LineReader
 {
 public:
-    ReplxxLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_);
+    ReplxxLineReader(
+        const Suggest & suggest,
+        const String & history_file_path,
+        bool multiline,
+        Patterns extenders_,
+        Patterns delimiters_,
+        replxx::Replxx::highlighter_callback_t highlighter_);
    ~ReplxxLineReader() override;

    void enableBracketedPaste() override;
@ -17,4 +24,8 @@ private:
    void addToHistory(const String & line) override;

    replxx::Replxx rx;
+    replxx::Replxx::highlighter_callback_t highlighter;
+
+    // used to call flock() to synchronize multiple clients using same history file
+    int history_file_fd = -1;
 };
--- a/base/common/errnoToString.cpp
+++ b/base/common/errnoToString.cpp
@ -0,0 +1,29 @@
+#include "errnoToString.h"
+
+#include <fmt/format.h>
+
+
+std::string errnoToString(int code, int the_errno)
+{
+    const size_t buf_size = 128;
+    char buf[buf_size];
+#ifndef _GNU_SOURCE
+    int rc = strerror_r(the_errno, buf, buf_size);
+#ifdef __APPLE__
+    if (rc != 0 && rc != EINVAL)
+#else
+    if (rc != 0)
+#endif
+    {
+        std::string tmp = std::to_string(code);
+        const char * code_str = tmp.c_str();
+        const char * unknown_message = "Unknown error ";
+        strcpy(buf, unknown_message);
+        strcpy(buf + strlen(unknown_message), code_str);
+    }
+    return fmt::format("errno: {}, strerror: {}", the_errno, buf);
+#else
+    (void)code;
+    return fmt::format("errno: {}, strerror: {}", the_errno, strerror_r(the_errno, buf, sizeof(buf)));
+#endif
+}
--- a/base/common/errnoToString.h
+++ b/base/common/errnoToString.h
@ -0,0 +1,6 @@
+#pragma once
+
+#include <cerrno>
+#include <string>
+
+std::string errnoToString(int code, int the_errno = errno);
--- a/base/common/strong_typedef.h
+++ b/base/common/strong_typedef.h
@ -1,6 +1,8 @@
 #pragma once

+#include <functional>
 #include <type_traits>
+#include <utility>

 template <class T, class Tag>
 struct StrongTypedef
--- a/base/common/ya.make
+++ b/base/common/ya.make
@ -47,6 +47,7 @@ SRCS(
    shift10.cpp
    sleep.cpp
    terminalColors.cpp
+    errnoToString.cpp
 )

 END()
--- a/base/mysqlxx/CMakeLists.txt
+++ b/base/mysqlxx/CMakeLists.txt
@ -32,10 +32,18 @@ else ()
    endif ()
 endif ()

-target_link_libraries(mysqlxx PUBLIC common PRIVATE ${MYSQLCLIENT_LIBRARIES} PUBLIC ${Boost_SYSTEM_LIBRARY} PRIVATE ${ZLIB_LIBRARIES})
+target_link_libraries (mysqlxx
+    PUBLIC
+        common
+    PRIVATE
+        ${MYSQLCLIENT_LIBRARIES}
+        ${ZLIB_LIBRARIES}
+)
+
 if(OPENSSL_LIBRARIES)
    target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES})
 endif()
+
 target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES})

 if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR)
--- a/cmake/Modules/Findmetrohash.cmake
+++ b/cmake/Modules/Findmetrohash.cmake
@ -1,44 +0,0 @@
-# - Try to find metrohash headers and libraries.
-#
-# Usage of this module as follows:
-#
-#     find_package(metrohash)
-#
-# Variables used by this module, they can change the default behaviour and need
-# to be set before calling find_package:
-#
-#  METROHASH_ROOT_DIR Set this variable to the root installation of
-#                    metrohash if the module has problems finding
-#                    the proper installation path.
-#
-# Variables defined by this module:
-#
-#  METROHASH_FOUND             System has metrohash libs/headers
-#  METROHASH_LIBRARIES         The metrohash library/libraries
-#  METROHASH_INCLUDE_DIR       The location of metrohash headers
-
-find_path(METROHASH_ROOT_DIR
-    NAMES include/metrohash.h
-)
-
-find_library(METROHASH_LIBRARIES
-    NAMES metrohash
-    PATHS ${METROHASH_ROOT_DIR}/lib ${METROHASH_LIBRARIES_PATHS}
-)
-
-find_path(METROHASH_INCLUDE_DIR
-    NAMES metrohash.h
-    PATHS ${METROHASH_ROOT_DIR}/include PATH_SUFFIXES metrohash ${METROHASH_INCLUDE_PATHS}
-)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(metrohash DEFAULT_MSG
-    METROHASH_LIBRARIES
-    METROHASH_INCLUDE_DIR
-)
-
-mark_as_advanced(
-    METROHASH_ROOT_DIR
-    METROHASH_LIBRARIES
-    METROHASH_INCLUDE_DIR
-)
--- a/cmake/find/boost.cmake
+++ b/cmake/find/boost.cmake
@ -1,52 +0,0 @@
-option (USE_INTERNAL_BOOST_LIBRARY "Set to FALSE to use system boost library instead of bundled" ${NOT_UNBUNDLED})
-
-# Test random file existing in all package variants
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/system/src/error_code.cpp")
-    if(USE_INTERNAL_BOOST_LIBRARY)
-        message(WARNING "submodules in contrib/boost is missing. to fix try run: \n git submodule update --init --recursive")
-    endif()
-    set (USE_INTERNAL_BOOST_LIBRARY 0)
-    set (MISSING_INTERNAL_BOOST_LIBRARY 1)
-endif ()
-
-if (NOT USE_INTERNAL_BOOST_LIBRARY)
-    set (Boost_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES})
-    set (BOOST_ROOT "/usr/local")
-    find_package (Boost 1.60 COMPONENTS program_options system filesystem thread regex)
-    # incomplete, no include search, who use it?
-    if (NOT Boost_FOUND)
-        #    # Try to find manually.
-        #    set (BOOST_PATHS "")
-        #    find_library (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options PATHS ${BOOST_PATHS})
-        #    find_library (Boost_SYSTEM_LIBRARY boost_system PATHS ${BOOST_PATHS})
-        #    find_library (Boost_FILESYSTEM_LIBRARY boost_filesystem PATHS ${BOOST_PATHS})
-        # maybe found but incorrect version.
-        set (Boost_INCLUDE_DIRS "")
-        set (Boost_SYSTEM_LIBRARY "")
-    endif ()
-endif ()
-
-if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY)
-    set (USE_INTERNAL_BOOST_LIBRARY 1)
-    set (Boost_SYSTEM_LIBRARY boost_system_internal)
-    set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal)
-    set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY})
-    set (Boost_IOSTREAMS_LIBRARY boost_iostreams_internal)
-    set (Boost_REGEX_LIBRARY boost_regex_internal)
-
-    set (Boost_INCLUDE_DIRS)
-
-    set (BOOST_ROOT "${ClickHouse_SOURCE_DIR}/contrib/boost")
-
-    # For boost from github:
-    file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/*/include")
-    list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
-    # numeric has additional level
-    file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/numeric/*/include")
-    list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
-
-    # For packaged version:
-    list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost")
-endif ()
-
-message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_IOSTREAMS_LIBRARY},${Boost_REGEX_LIBRARY}")
--- a/cmake/find/cassandra.cmake
+++ b/cmake/find/cassandra.cmake
@ -0,0 +1,26 @@
+option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES})
+
+if (ENABLE_CASSANDRA)
+    if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv")
+        message (ERROR "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive")
+    elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
+        message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive")
+    else()
+        set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv")
+        set (CASSANDRA_INCLUDE_DIR
+                "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/")
+        if (USE_STATIC_LIBRARIES)
+            set (LIBUV_LIBRARY uv_a)
+            set (CASSANDRA_LIBRARY cassandra_static)
+        else()
+            set (LIBUV_LIBRARY uv)
+            set (CASSANDRA_LIBRARY cassandra)
+        endif()
+        set (USE_CASSANDRA 1)
+        set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
+
+    endif()
+endif()
+
+message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}")
+message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}")
--- a/cmake/find/hyperscan.cmake
+++ b/cmake/find/hyperscan.cmake
@ -1,33 +0,0 @@
-if (HAVE_SSSE3)
-    option (ENABLE_HYPERSCAN "Enable hyperscan" ${ENABLE_LIBRARIES})
-endif ()
-
-if (ENABLE_HYPERSCAN)
-
-option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED})
-
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
-    if (USE_INTERNAL_HYPERSCAN_LIBRARY)
-        message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive")
-    endif ()
-   set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1)
-   set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
-endif ()
-
-if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
-    find_library (HYPERSCAN_LIBRARY hs)
-    find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
-endif ()
-
-if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
-    set (USE_HYPERSCAN 1)
-elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY)
-    set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
-    set (HYPERSCAN_LIBRARY hs)
-    set (USE_HYPERSCAN 1)
-    set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
-endif()
-
-message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}")
-
-endif ()
--- a/cmake/find/lz4.cmake
+++ b/cmake/find/lz4.cmake
@ -1,23 +0,0 @@
-option (USE_INTERNAL_LZ4_LIBRARY "Set to FALSE to use system lz4 library instead of bundled" ${NOT_UNBUNDLED})
-
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lz4/lib/lz4.h")
-    if (USE_INTERNAL_LZ4_LIBRARY)
-       message (WARNING "submodule contrib/lz4 is missing. to fix try run: \n git submodule update --init --recursive")
-       set (USE_INTERNAL_LZ4_LIBRARY 0)
-    endif ()
-    set (MISSING_INTERNAL_LZ4_LIBRARY 1)
-endif ()
-
-if (NOT USE_INTERNAL_LZ4_LIBRARY)
-    find_library (LZ4_LIBRARY lz4)
-    find_path (LZ4_INCLUDE_DIR NAMES lz4.h PATHS ${LZ4_INCLUDE_PATHS})
-endif ()
-
-if (LZ4_LIBRARY AND LZ4_INCLUDE_DIR)
-elseif (NOT MISSING_INTERNAL_LZ4_LIBRARY)
-    set (LZ4_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
-    set (USE_INTERNAL_LZ4_LIBRARY 1)
-    set (LZ4_LIBRARY lz4)
-endif ()
-
-message (STATUS "Using lz4: ${LZ4_INCLUDE_DIR} : ${LZ4_LIBRARY}")
--- a/cmake/find/parquet.cmake
+++ b/cmake/find/parquet.cmake
@ -63,7 +63,7 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
        set(ARROW_LIBRARY arrow_shared)
        set(PARQUET_LIBRARY parquet_shared)
        if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
-            list(APPEND PARQUET_LIBRARY ${Boost_REGEX_LIBRARY})
+            list(APPEND PARQUET_LIBRARY boost::regex)
        endif()
        set(THRIFT_LIBRARY thrift)
    endif()
--- a/cmake/find/xxhash.cmake
+++ b/cmake/find/xxhash.cmake
@ -1,22 +0,0 @@
-option (USE_INTERNAL_XXHASH_LIBRARY "Set to FALSE to use system xxHash library instead of bundled" ${NOT_UNBUNDLED})
-
-if (USE_INTERNAL_XXHASH_LIBRARY AND NOT USE_INTERNAL_LZ4_LIBRARY)
-    message (WARNING "can not use internal xxhash without internal lz4")
-    set (USE_INTERNAL_XXHASH_LIBRARY 0)
-endif ()
-
-if (USE_INTERNAL_XXHASH_LIBRARY)
-    set (XXHASH_LIBRARY lz4)
-    set (XXHASH_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
-else ()
-    find_library (XXHASH_LIBRARY xxhash)
-    find_path (XXHASH_INCLUDE_DIR NAMES xxhash.h PATHS ${XXHASH_INCLUDE_PATHS})
-endif ()
-
-if (XXHASH_LIBRARY AND XXHASH_INCLUDE_DIR)
-    set (USE_XXHASH 1)
-else ()
-    set (USE_XXHASH 0)
-endif ()
-
-message (STATUS "Using xxhash=${USE_XXHASH}: ${XXHASH_INCLUDE_DIR} : ${XXHASH_LIBRARY}")
--- a/cmake/lib_name.cmake
+++ b/cmake/lib_name.cmake
@ -1,4 +0,0 @@
-set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
-set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src)
-set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion)
-set(METROHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
--- a/cmake/print_include_directories.cmake
+++ b/cmake/print_include_directories.cmake
@ -21,11 +21,6 @@ if (TARGET double-conversion)
    list(APPEND dirs ${dirs1})
 endif ()

-if (TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY})
-    get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
-    list(APPEND dirs ${dirs1})
-endif ()
-
 list(REMOVE_DUPLICATES dirs)
 file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")
 foreach (dir ${dirs})
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -16,13 +16,18 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")

 set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)

+add_subdirectory (boost-cmake)
 add_subdirectory (cctz-cmake)
 add_subdirectory (consistent-hashing-sumbur)
 add_subdirectory (consistent-hashing)
 add_subdirectory (croaring)
 add_subdirectory (FastMemcpy)
+add_subdirectory (hyperscan-cmake)
 add_subdirectory (jemalloc-cmake)
 add_subdirectory (libcpuid-cmake)
+add_subdirectory (libdivide)
+add_subdirectory (libmetrohash)
+add_subdirectory (lz4-cmake)
 add_subdirectory (murmurhash)
 add_subdirectory (replxx-cmake)
 add_subdirectory (ryu-cmake)
@ -33,14 +38,6 @@ add_subdirectory (poco-cmake)

 # TODO: refactor the contrib libraries below this comment.

-if (USE_INTERNAL_BOOST_LIBRARY)
-    add_subdirectory (boost-cmake)
-endif ()
-
-if (USE_INTERNAL_LZ4_LIBRARY)
-    add_subdirectory (lz4-cmake)
-endif ()
-
 if (USE_INTERNAL_ZSTD_LIBRARY)
    add_subdirectory (zstd-cmake)
 endif ()
@ -63,10 +60,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
    add_subdirectory (libfarmhash)
 endif ()

-if (USE_INTERNAL_METROHASH_LIBRARY)
-    add_subdirectory (libmetrohash)
-endif ()
-
 if (USE_INTERNAL_BTRIE_LIBRARY)
    add_subdirectory (libbtrie)
 endif ()
@ -294,18 +287,6 @@ if (USE_BASE64)
    add_subdirectory (base64-cmake)
 endif()

-if (USE_INTERNAL_HYPERSCAN_LIBRARY)
-    # The library is large - avoid bloat.
-    if (USE_STATIC_LIBRARIES)
-        add_subdirectory (hyperscan)
-        target_compile_options (hs PRIVATE -g0)
-    else ()
-        set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
-        add_subdirectory (hyperscan)
-        target_compile_options (hs_shared PRIVATE -g0)
-    endif ()
-endif()
-
 if (USE_SIMDJSON)
    add_subdirectory (simdjson-cmake)
 endif()
@ -314,4 +295,10 @@ if (USE_FASTOPS)
    add_subdirectory (fastops-cmake)
 endif()

+if (USE_CASSANDRA)
+    add_subdirectory (libuv)
+    add_subdirectory (cassandra)
+endif()
+
 add_subdirectory (fmtlib-cmake)
+
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@ -47,7 +47,8 @@ set(thriftcpp_threads_SOURCES
        )
 add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
 set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
-target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS})
+target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src)
+target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only)


 # === orc
@ -146,7 +147,7 @@ add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
 add_dependencies(metadata_fbs flatc)

 # arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
-# Apple Clang compiler failed to compile this code without specifying c++11 standard. 
+# Apple Clang compiler failed to compile this code without specifying c++11 standard.
 # As result these compiler features detected as absent. In result it failed to compile orc itself.
 # In orc makefile there is code that sets flags, but arrow-cmake ignores these flags.
 if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
@ -286,10 +287,6 @@ set(ARROW_SRCS ${ARROW_SRCS}
        ${LIBRARY_DIR}/compute/kernels/util_internal.cc
        )

-if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
-    set(ARROW_WITH_LZ4 1)
-endif ()
-
 if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
    set(ARROW_WITH_SNAPPY 1)
 endif ()
@ -302,10 +299,8 @@ if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
    set(ARROW_WITH_ZSTD 1)
 endif ()

-if (ARROW_WITH_LZ4)
-    add_definitions(-DARROW_WITH_LZ4)
-    SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
-endif ()
+add_definitions(-DARROW_WITH_LZ4)
+SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})

 if (ARROW_WITH_SNAPPY)
    add_definitions(-DARROW_WITH_SNAPPY)
@ -328,18 +323,15 @@ add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
 # Arrow dependencies
 add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs)

-target_link_libraries(${ARROW_LIBRARY} PRIVATE boost_system_internal boost_filesystem_internal boost_regex_internal)
-target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY})
+target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)

 if (USE_INTERNAL_PROTOBUF_LIBRARY)
    add_dependencies(${ARROW_LIBRARY} protoc)
 endif ()

-target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS})
+target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
 target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
-if (ARROW_WITH_LZ4)
-    target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY})
-endif ()
+target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4)
 if (ARROW_WITH_SNAPPY)
    target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY})
 endif ()
@ -396,8 +388,7 @@ list(APPEND PARQUET_SRCS
 add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
 target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
 include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
-target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY})
-target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS})
+target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex)

 if (SANITIZE STREQUAL "undefined")
    target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined)
--- a/contrib/avro-cmake/CMakeLists.txt
+++ b/contrib/avro-cmake/CMakeLists.txt
@ -45,13 +45,12 @@ set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_V

 target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR})

-target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS})
-target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY})
+target_link_libraries (avrocpp PRIVATE boost::headers_only boost::iostreams)

 if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
    target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
    target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
-    target_link_libraries (avrocpp ${SNAPPY_LIBRARY})
+    target_link_libraries (avrocpp PRIVATE ${SNAPPY_LIBRARY})
 endif ()

 if (COMPILER_GCC)
@ -67,4 +66,4 @@ ADD_CUSTOM_TARGET(avro_symlink_headers ALL
    COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro
 )
-add_dependencies(avrocpp avro_symlink_headers)
+add_dependencies(avrocpp avro_symlink_headers)
--- a/contrib/aws
+++ b/contrib/aws
@ -1 +1 @@
-Subproject commit f7d9ce39f41323300044567be007c233338bb94a
+Subproject commit 17e10c0fc77f22afe890fa6d1b283760e5edaa56
--- a/contrib/boost-cmake/CMakeLists.txt
+++ b/contrib/boost-cmake/CMakeLists.txt
@ -1,45 +1,133 @@
-# Supported contrib/boost source variants:
-# 1. Default - Minimized vrsion from release archive : https://github.com/ClickHouse-Extras/boost
-# 2. Release archive unpacked to contrib/boost
-# 3. Full boost https://github.com/boostorg/boost
+option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ${NOT_UNBUNDLED})

-# if boostorg/boost connected as submodule: Update all boost internal submodules to tag:
-# git submodule foreach "git fetch --all && git checkout boost-1.66.0 || true"
+if (USE_INTERNAL_BOOST_LIBRARY)
+    set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)

-#
-# Important boost patch: 094c18b
-#
+    # filesystem

-include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
+    set (SRCS_FILESYSTEM
+        ${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/operations.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/path.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/portability.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp
+        ${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp
+    )

-set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
+    add_library (_boost_filesystem ${SRCS_FILESYSTEM})
+    add_library (boost::filesystem ALIAS _boost_filesystem)
+    target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})

-if(NOT MSVC)
-    add_definitions(-Wno-unused-variable -Wno-deprecated-declarations)
-endif()
+    # headers-only

-macro(add_boost_lib lib_name)
-    add_headers_and_sources(boost_${lib_name} ${LIBRARY_DIR}/libs/${lib_name}/src)
-    add_library(boost_${lib_name}_internal ${boost_${lib_name}_sources})
-    target_include_directories(boost_${lib_name}_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
-    target_compile_definitions(boost_${lib_name}_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
-endmacro()
+    add_library (_boost_headers_only INTERFACE)
+    add_library (boost::headers_only ALIAS _boost_headers_only)
+    target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR})

-add_boost_lib(system)
+    # iostreams

-add_boost_lib(program_options)
+    set (SRCS_IOSTREAMS
+        ${LIBRARY_DIR}/libs/iostreams/src/file_descriptor.cpp
+        ${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp
+        ${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp
+        ${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp
+    )

-add_boost_lib(filesystem)
-target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal)
+    add_library (_boost_iostreams ${SRCS_IOSTREAMS})
+    add_library (boost::iostreams ALIAS _boost_iostreams)
+    target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
+    target_link_libraries (_boost_iostreams PRIVATE zlib)

-#add_boost_lib(random)
+    # program_options

-if (USE_INTERNAL_PARQUET_LIBRARY)
-    add_boost_lib(regex)
-endif()
+    set (SRCS_PROGRAM_OPTIONS
+        ${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/config_file.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/convert.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/options_description.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/parsers.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/split.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp
+        ${LIBRARY_DIR}/libs/program_options/src/winmain.cpp
+    )

-if (USE_INTERNAL_AVRO_LIBRARY)
-    add_boost_lib(iostreams)
-    target_link_libraries(boost_iostreams_internal PUBLIC ${ZLIB_LIBRARIES})
-    target_include_directories(boost_iostreams_internal SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR})
-endif()
+    add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS})
+    add_library (boost::program_options ALIAS _boost_program_options)
+    target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
+
+    # regex
+
+    set (SRCS_REGEX
+        ${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/cregex.cpp
+        ${LIBRARY_DIR}/libs/regex/src/fileiter.cpp
+        ${LIBRARY_DIR}/libs/regex/src/icu.cpp
+        ${LIBRARY_DIR}/libs/regex/src/instances.cpp
+        ${LIBRARY_DIR}/libs/regex/src/internals.hpp
+        ${LIBRARY_DIR}/libs/regex/src/posix_api.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp
+        ${LIBRARY_DIR}/libs/regex/src/regex.cpp
+        ${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp
+        ${LIBRARY_DIR}/libs/regex/src/usinstances.cpp
+        ${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp
+        ${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp
+        ${LIBRARY_DIR}/libs/regex/src/winstances.cpp
+    )
+
+    add_library (_boost_regex ${SRCS_REGEX})
+    add_library (boost::regex ALIAS _boost_regex)
+    target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR})
+
+    # system
+
+    set (SRCS_SYSTEM
+        ${LIBRARY_DIR}/libs/system/src/error_code.cpp
+    )
+
+    add_library (_boost_system ${SRCS_SYSTEM})
+    add_library (boost::system ALIAS _boost_system)
+    target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
+else ()
+    # 1.70 like in contrib/boost
+    # 1.67 on CI
+    set(BOOST_VERSION 1.67)
+
+    find_package(Boost ${BOOST_VERSION} COMPONENTS
+        system
+        filesystem
+        iostreams
+        program_options
+        regex
+    REQUIRED)
+
+    add_library (_boost_headers_only INTERFACE)
+    add_library (boost::headers_only ALIAS _boost_headers_only)
+    target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${Boost_INCLUDE_DIR})
+
+    add_library (_boost_filesystem INTERFACE)
+    add_library (_boost_iostreams INTERFACE)
+    add_library (_boost_program_options INTERFACE)
+    add_library (_boost_regex INTERFACE)
+    add_library (_boost_system INTERFACE)
+
+    target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
+    target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
+    target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY})
+    target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY})
+    target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
+
+    add_library (boost::filesystem ALIAS _boost_filesystem)
+    add_library (boost::iostreams ALIAS _boost_iostreams)
+    add_library (boost::program_options ALIAS _boost_program_options)
+    add_library (boost::regex ALIAS _boost_regex)
+    add_library (boost::system ALIAS _boost_system)
+endif ()
--- a/contrib/cassandra
+++ b/contrib/cassandra
@ -0,0 +1 @@
+Subproject commit a49b4e0e2696a4b8ef286a5b9538d1cbe8490509
--- a/contrib/cppkafka-cmake/CMakeLists.txt
+++ b/contrib/cppkafka-cmake/CMakeLists.txt
@ -1,31 +1,33 @@
-set(CPPKAFKA_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)
+set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)

 set(SRCS
-  ${CPPKAFKA_DIR}/src/configuration.cpp
-  ${CPPKAFKA_DIR}/src/topic_configuration.cpp
-  ${CPPKAFKA_DIR}/src/configuration_option.cpp
-  ${CPPKAFKA_DIR}/src/exceptions.cpp
-  ${CPPKAFKA_DIR}/src/topic.cpp
-  ${CPPKAFKA_DIR}/src/buffer.cpp
-  ${CPPKAFKA_DIR}/src/queue.cpp
-  ${CPPKAFKA_DIR}/src/message.cpp
-  ${CPPKAFKA_DIR}/src/message_timestamp.cpp
-  ${CPPKAFKA_DIR}/src/message_internal.cpp
-  ${CPPKAFKA_DIR}/src/topic_partition.cpp
-  ${CPPKAFKA_DIR}/src/topic_partition_list.cpp
-  ${CPPKAFKA_DIR}/src/metadata.cpp
-  ${CPPKAFKA_DIR}/src/group_information.cpp
-  ${CPPKAFKA_DIR}/src/error.cpp
-  ${CPPKAFKA_DIR}/src/event.cpp
-
-  ${CPPKAFKA_DIR}/src/kafka_handle_base.cpp
-  ${CPPKAFKA_DIR}/src/producer.cpp
-  ${CPPKAFKA_DIR}/src/consumer.cpp
+    ${LIBRARY_DIR}/src/buffer.cpp
+    ${LIBRARY_DIR}/src/configuration_option.cpp
+    ${LIBRARY_DIR}/src/configuration.cpp
+    ${LIBRARY_DIR}/src/consumer.cpp
+    ${LIBRARY_DIR}/src/error.cpp
+    ${LIBRARY_DIR}/src/event.cpp
+    ${LIBRARY_DIR}/src/exceptions.cpp
+    ${LIBRARY_DIR}/src/group_information.cpp
+    ${LIBRARY_DIR}/src/kafka_handle_base.cpp
+    ${LIBRARY_DIR}/src/message_internal.cpp
+    ${LIBRARY_DIR}/src/message_timestamp.cpp
+    ${LIBRARY_DIR}/src/message.cpp
+    ${LIBRARY_DIR}/src/metadata.cpp
+    ${LIBRARY_DIR}/src/producer.cpp
+    ${LIBRARY_DIR}/src/queue.cpp
+    ${LIBRARY_DIR}/src/topic_configuration.cpp
+    ${LIBRARY_DIR}/src/topic_partition_list.cpp
+    ${LIBRARY_DIR}/src/topic_partition.cpp
+    ${LIBRARY_DIR}/src/topic.cpp
 )

 add_library(cppkafka ${SRCS})

-target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY})
-target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka)
-target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS})
-target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include)
+target_link_libraries(cppkafka
+    PRIVATE
+        ${RDKAFKA_LIBRARY}
+        boost::headers_only
+)
+target_include_directories(cppkafka PRIVATE ${LIBRARY_DIR}/include/cppkafka)
+target_include_directories(cppkafka SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)
--- a/contrib/hyperscan
+++ b/contrib/hyperscan
@ -1 +1 @@
-Subproject commit 3058c9c20cba3accdf92544d8513a26240c4ff70
+Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531
--- a/contrib/hyperscan-cmake/CMakeLists.txt
+++ b/contrib/hyperscan-cmake/CMakeLists.txt
@ -0,0 +1,250 @@
+option (ENABLE_HYPERSCAN "Enable hyperscan library" ${ENABLE_LIBRARIES})
+
+if (NOT HAVE_SSSE3)
+    set (ENABLE_HYPERSCAN OFF)
+endif ()
+
+if (ENABLE_HYPERSCAN)
+    option (USE_INTERNAL_HYPERSCAN_LIBRARY "Use internal hyperscan library" ${NOT_UNBUNDLED})
+
+    if (USE_INTERNAL_HYPERSCAN_LIBRARY)
+        set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan)
+
+        set (SRCS
+            ${LIBRARY_DIR}/src/alloc.c
+            ${LIBRARY_DIR}/src/compiler/asserts.cpp
+            ${LIBRARY_DIR}/src/compiler/compiler.cpp
+            ${LIBRARY_DIR}/src/compiler/error.cpp
+            ${LIBRARY_DIR}/src/crc32.c
+            ${LIBRARY_DIR}/src/database.c
+            ${LIBRARY_DIR}/src/fdr/engine_description.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp
+            ${LIBRARY_DIR}/src/fdr/fdr.c
+            ${LIBRARY_DIR}/src/fdr/flood_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/teddy_compile.cpp
+            ${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp
+            ${LIBRARY_DIR}/src/fdr/teddy.c
+            ${LIBRARY_DIR}/src/grey.cpp
+            ${LIBRARY_DIR}/src/hs_valid_platform.c
+            ${LIBRARY_DIR}/src/hs_version.c
+            ${LIBRARY_DIR}/src/hs.cpp
+            ${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp
+            ${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp
+            ${LIBRARY_DIR}/src/hwlm/hwlm.c
+            ${LIBRARY_DIR}/src/hwlm/noodle_build.cpp
+            ${LIBRARY_DIR}/src/hwlm/noodle_engine.c
+            ${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp
+            ${LIBRARY_DIR}/src/nfa/accel.c
+            ${LIBRARY_DIR}/src/nfa/accelcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/castle.c
+            ${LIBRARY_DIR}/src/nfa/castlecompile.cpp
+            ${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp
+            ${LIBRARY_DIR}/src/nfa/dfa_min.cpp
+            ${LIBRARY_DIR}/src/nfa/gough.c
+            ${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp
+            ${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp
+            ${LIBRARY_DIR}/src/nfa/goughcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/lbr.c
+            ${LIBRARY_DIR}/src/nfa/limex_64.c
+            ${LIBRARY_DIR}/src/nfa/limex_accel.c
+            ${LIBRARY_DIR}/src/nfa/limex_compile.cpp
+            ${LIBRARY_DIR}/src/nfa/limex_native.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd128.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd256.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd384.c
+            ${LIBRARY_DIR}/src/nfa/limex_simd512.c
+            ${LIBRARY_DIR}/src/nfa/mcclellan.c
+            ${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp
+            ${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp
+            ${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp
+            ${LIBRARY_DIR}/src/nfa/mcsheng_data.c
+            ${LIBRARY_DIR}/src/nfa/mcsheng.c
+            ${LIBRARY_DIR}/src/nfa/mpv.c
+            ${LIBRARY_DIR}/src/nfa/mpvcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c
+            ${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp
+            ${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp
+            ${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp
+            ${LIBRARY_DIR}/src/nfa/rdfa.cpp
+            ${LIBRARY_DIR}/src/nfa/repeat.c
+            ${LIBRARY_DIR}/src/nfa/repeatcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/sheng.c
+            ${LIBRARY_DIR}/src/nfa/shengcompile.cpp
+            ${LIBRARY_DIR}/src/nfa/shufti.c
+            ${LIBRARY_DIR}/src/nfa/shufticompile.cpp
+            ${LIBRARY_DIR}/src/nfa/tamarama.c
+            ${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp
+            ${LIBRARY_DIR}/src/nfa/truffle.c
+            ${LIBRARY_DIR}/src/nfa/trufflecompile.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_builder.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_calc_components.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_cyclic_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_depth.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_dominators.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_edge_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_equivalence.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_execute.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_expr_info.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_extparam.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_fixed_width.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_fuzzy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_haig.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_mcclellan.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_misc_opt.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_netflow.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_prefilter.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_region.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_som.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_split.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_uncalc_components.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_utf8.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_util.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng_width.cpp
+            ${LIBRARY_DIR}/src/nfagraph/ng.cpp
+            ${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp
+            ${LIBRARY_DIR}/src/parser/buildstate.cpp
+            ${LIBRARY_DIR}/src/parser/check_refs.cpp
+            ${LIBRARY_DIR}/src/parser/Component.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentAtomicGroup.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentBackReference.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentBoundary.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentByte.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentClass.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentEUS.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentSequence.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp
+            ${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp
+            ${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp
+            ${LIBRARY_DIR}/src/parser/control_verbs.cpp
+            ${LIBRARY_DIR}/src/parser/logical_combination.cpp
+            ${LIBRARY_DIR}/src/parser/parse_error.cpp
+            ${LIBRARY_DIR}/src/parser/parser_util.cpp
+            ${LIBRARY_DIR}/src/parser/Parser.cpp
+            ${LIBRARY_DIR}/src/parser/prefilter.cpp
+            ${LIBRARY_DIR}/src/parser/shortcut_literal.cpp
+            ${LIBRARY_DIR}/src/parser/ucp_table.cpp
+            ${LIBRARY_DIR}/src/parser/unsupported.cpp
+            ${LIBRARY_DIR}/src/parser/utf8_validate.cpp
+            ${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp
+            ${LIBRARY_DIR}/src/rose/block.c
+            ${LIBRARY_DIR}/src/rose/catchup.c
+            ${LIBRARY_DIR}/src/rose/init.c
+            ${LIBRARY_DIR}/src/rose/match.c
+            ${LIBRARY_DIR}/src/rose/program_runtime.c
+            ${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_add.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_castle.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_compile.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_convert.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_dedupe.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_engine_blob.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_exclusive.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_groups.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_infix.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_instructions.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_lit_accel.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_long_lit.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_lookaround.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_merge.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_misc.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_program.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp
+            ${LIBRARY_DIR}/src/rose/rose_build_width.cpp
+            ${LIBRARY_DIR}/src/rose/rose_in_util.cpp
+            ${LIBRARY_DIR}/src/rose/stream.c
+            ${LIBRARY_DIR}/src/runtime.c
+            ${LIBRARY_DIR}/src/scratch.c
+            ${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp
+            ${LIBRARY_DIR}/src/som/slot_manager.cpp
+            ${LIBRARY_DIR}/src/som/som_runtime.c
+            ${LIBRARY_DIR}/src/som/som_stream.c
+            ${LIBRARY_DIR}/src/stream_compress.c
+            ${LIBRARY_DIR}/src/util/alloc.cpp
+            ${LIBRARY_DIR}/src/util/charreach.cpp
+            ${LIBRARY_DIR}/src/util/clique.cpp
+            ${LIBRARY_DIR}/src/util/compile_context.cpp
+            ${LIBRARY_DIR}/src/util/compile_error.cpp
+            ${LIBRARY_DIR}/src/util/cpuid_flags.c
+            ${LIBRARY_DIR}/src/util/depth.cpp
+            ${LIBRARY_DIR}/src/util/fatbit_build.cpp
+            ${LIBRARY_DIR}/src/util/multibit_build.cpp
+            ${LIBRARY_DIR}/src/util/multibit.c
+            ${LIBRARY_DIR}/src/util/report_manager.cpp
+            ${LIBRARY_DIR}/src/util/simd_utils.c
+            ${LIBRARY_DIR}/src/util/state_compress.c
+            ${LIBRARY_DIR}/src/util/target_info.cpp
+            ${LIBRARY_DIR}/src/util/ue2string.cpp
+        )
+
+        add_library (hyperscan ${SRCS})
+
+        target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
+        target_compile_options (hyperscan
+            PRIVATE -g0 -march=corei7 # library has too much debug information
+        )
+        target_include_directories (hyperscan
+            PRIVATE
+                common
+                ${LIBRARY_DIR}/include
+        )
+        target_include_directories (hyperscan SYSTEM PUBLIC ${LIBRARY_DIR}/src)
+        if (ARCH_AMD64)
+            target_include_directories (hyperscan PRIVATE x86_64)
+        endif ()
+        target_link_libraries (hyperscan PRIVATE boost::headers_only)
+    else ()
+        find_library (LIBRARY_HYPERSCAN hs)
+        find_path (INCLUDE_HYPERSCAN NAMES hs.h HINTS /usr/include/hs) # Ubuntu puts headers in this folder
+
+        add_library (hyperscan UNKNOWN IMPORTED GLOBAL)
+        set_target_properties (hyperscan PROPERTIES IMPORTED_LOCATION ${LIBRARY_HYPERSCAN})
+        set_target_properties (hyperscan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_HYPERSCAN})
+        set_property(TARGET hyperscan APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_HYPERSCAN=1)
+    endif ()
+
+    message (STATUS "Using hyperscan")
+else ()
+    add_library (hyperscan INTERFACE)
+    target_compile_definitions (hyperscan INTERFACE USE_HYPERSCAN=0)
+
+    message (STATUS "Not using hyperscan")
+endif ()
--- a/contrib/hyperscan-cmake/common/hs_version.h
+++ b/contrib/hyperscan-cmake/common/hs_version.h
@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HS_VERSION_H_C6428FAF8E3713
+#define HS_VERSION_H_C6428FAF8E3713
+
+/**
+ * A version string to identify this release of Hyperscan.
+ */
+#define HS_VERSION_STRING "5.1.1 2000-01-01"
+
+#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (1 << 8) | 0)
+
+#endif /* HS_VERSION_H_C6428FAF8E3713 */
+
--- a/contrib/hyperscan-cmake/x86_64/config.h
+++ b/contrib/hyperscan-cmake/x86_64/config.h
@ -0,0 +1,106 @@
+/* used by cmake */
+
+#ifndef CONFIG_H_
+#define CONFIG_H_
+
+/* "Define if the build is 32 bit" */
+/* #undef ARCH_32_BIT */
+
+/* "Define if the build is 64 bit" */
+#define ARCH_64_BIT
+
+/* "Define if building for IA32" */
+/* #undef ARCH_IA32 */
+
+/* "Define if building for EM64T" */
+#define ARCH_X86_64
+
+/* internal build, switch on dump support. */
+/* #undef DUMP_SUPPORT */
+
+/* Define if building "fat" runtime. */
+/* #undef FAT_RUNTIME */
+
+/* Define if building AVX-512 in the fat runtime. */
+/* #undef BUILD_AVX512 */
+
+/* Define to 1 if `backtrace' works. */
+#define HAVE_BACKTRACE
+
+/* C compiler has __builtin_assume_aligned */
+#define HAVE_CC_BUILTIN_ASSUME_ALIGNED
+
+/* C++ compiler has __builtin_assume_aligned */
+#define HAVE_CXX_BUILTIN_ASSUME_ALIGNED
+
+/* C++ compiler has x86intrin.h */
+#define HAVE_CXX_X86INTRIN_H
+
+/* C compiler has x86intrin.h */
+#define HAVE_C_X86INTRIN_H
+
+/* C++ compiler has intrin.h */
+/* #undef HAVE_CXX_INTRIN_H */
+
+/* C compiler has intrin.h */
+/* #undef HAVE_C_INTRIN_H */
+
+/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
+   0 if you don't. */
+/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */
+
+/* #undef HAVE_PTHREAD_NP_H */
+
+/* Define to 1 if you have the `malloc_info' function. */
+/* #undef HAVE_MALLOC_INFO */
+
+/* Define to 1 if you have the `memmem' function. */
+/* #undef HAVE_MEMMEM */
+
+/* Define to 1 if you have a working `mmap' system call. */
+#define HAVE_MMAP
+
+/* Define to 1 if `posix_memalign' works. */
+#define HAVE_POSIX_MEMALIGN
+
+/* Define to 1 if you have the `setrlimit' function. */
+#define HAVE_SETRLIMIT
+
+/* Define to 1 if you have the `shmget' function. */
+/* #undef HAVE_SHMGET */
+
+/* Define to 1 if you have the `sigaction' function. */
+#define HAVE_SIGACTION
+
+/* Define to 1 if you have the `sigaltstack' function. */
+#define HAVE_SIGALTSTACK
+
+/* Define if the sqlite3_open_v2 call is available */
+/* #undef HAVE_SQLITE3_OPEN_V2 */
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H
+
+/* Define to 1 if you have the `_aligned_malloc' function. */
+/* #undef HAVE__ALIGNED_MALLOC */
+
+/* Define if compiler has __builtin_constant_p */
+#define HAVE__BUILTIN_CONSTANT_P
+
+/* Optimize, inline critical functions */
+#define HS_OPTIMIZE
+
+#define HS_VERSION
+#define HS_MAJOR_VERSION
+#define HS_MINOR_VERSION
+#define HS_PATCH_VERSION
+
+#define BUILD_DATE
+
+/* define if this is a release build. */
+#define RELEASE_BUILD
+
+/* define if reverse_graph requires patch for boost 1.62.0 */
+/* #undef BOOST_REVGRAPH_PATCH */
+
+#endif /* CONFIG_H_ */
--- a/contrib/libdivide/CMakeLists.txt
+++ b/contrib/libdivide/CMakeLists.txt
@ -0,0 +1,2 @@
+add_library (libdivide INTERFACE)
+target_include_directories (libdivide SYSTEM BEFORE INTERFACE .)
--- a/contrib/libhdfs3-cmake/CMakeLists.txt
+++ b/contrib/libhdfs3-cmake/CMakeLists.txt
@ -209,9 +209,8 @@ endif()
 target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARY})

 # inherit from parent cmake
-target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS})
 target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR})
-target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY})
+target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY} boost::headers_only)
 if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES)
    target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR})
    target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES})
--- a/contrib/libmetrohash/CMakeLists.txt
+++ b/contrib/libmetrohash/CMakeLists.txt
@ -1,13 +1,10 @@
-if (HAVE_SSE42) # Not used. Pretty easy to port.
-    set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp src/metrohash128crc.h)
-endif ()
-
-add_library(metrohash
-    src/metrohash.h
-    src/testvector.h
-
+set (SRCS
    src/metrohash64.cpp
    src/metrohash128.cpp
-    ${SOURCES_SSE42_ONLY})
+)
+if (HAVE_SSE42) # Not used. Pretty easy to port.
+    list (APPEND SRCS src/metrohash128crc.cpp)
+endif ()

-target_include_directories(metrohash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
+add_library(metrohash ${SRCS})
+target_include_directories(metrohash PUBLIC src)
--- a/contrib/librdkafka-cmake/CMakeLists.txt
+++ b/contrib/librdkafka-cmake/CMakeLists.txt
@ -82,7 +82,7 @@ target_compile_options(rdkafka PRIVATE -fno-sanitize=undefined)
 target_include_directories(rdkafka SYSTEM PUBLIC include)
 target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR})         # Because weird logic with "include_next" is used.
 target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common)   # Because wrong path to "zstd_errors.h" is used.
-target_link_libraries(rdkafka PRIVATE ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${LIBGSASL_LIBRARY})
+target_link_libraries(rdkafka PRIVATE lz4 ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LIBGSASL_LIBRARY})
 if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY)
    target_link_libraries(rdkafka PRIVATE ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
 endif()
--- a/contrib/libuv
+++ b/contrib/libuv
@ -0,0 +1 @@
+Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28
--- a/contrib/lz4-cmake/CMakeLists.txt
+++ b/contrib/lz4-cmake/CMakeLists.txt
@ -1,17 +1,28 @@
-SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
+option (USE_INTERNAL_LZ4_LIBRARY "Use internal lz4 library" ${NOT_UNBUNDLED})

-add_library (lz4
-    ${LIBRARY_DIR}/lz4.c
-    ${LIBRARY_DIR}/lz4hc.c
-    ${LIBRARY_DIR}/lz4frame.c
-    ${LIBRARY_DIR}/lz4frame.h
-    ${LIBRARY_DIR}/xxhash.c
-    ${LIBRARY_DIR}/xxhash.h
+if (USE_INTERNAL_LZ4_LIBRARY)
+    set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4)

-    ${LIBRARY_DIR}/lz4.h
-    ${LIBRARY_DIR}/lz4hc.h)
+    set (SRCS
+        ${LIBRARY_DIR}/lib/lz4.c
+        ${LIBRARY_DIR}/lib/lz4hc.c
+        ${LIBRARY_DIR}/lib/lz4frame.c
+        ${LIBRARY_DIR}/lib/xxhash.c
+    )

-target_compile_definitions(lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1)
-target_compile_options(lz4 PRIVATE -fno-sanitize=undefined)
+    add_library (lz4 ${SRCS})

-target_include_directories(lz4 PUBLIC ${LIBRARY_DIR})
+    target_compile_definitions (lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1 USE_XXHASH=1)
+    if (SANITIZE STREQUAL "undefined")
+        target_compile_options (lz4 PRIVATE -fno-sanitize=undefined)
+    endif ()
+    target_include_directories(lz4 PUBLIC ${LIBRARY_DIR}/lib)
+else ()
+    find_library (LIBRARY_LZ4 lz4)
+    find_path (INCLUDE_LZ4 lz4.h)
+
+    add_library (lz4 UNKNOWN IMPORTED)
+    set_property (TARGET lz4 PROPERTY IMPORTED_LOCATION ${LIBRARY_LZ4})
+    set_property (TARGET lz4 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_LZ4})
+    set_property (TARGET lz4 APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_XXHASH=0)
+endif ()
--- a/contrib/replxx
+++ b/contrib/replxx
@ -1 +1 @@
-Subproject commit f1332626639d6492eaf170758642da14fbbda7bf
+Subproject commit 2d37daaad24be71e76514a36b0a47120be2f9086
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -1,6 +1,6 @@
 FROM ubuntu:18.04

-ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
+ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
 ARG version=20.5.1.*

 RUN apt-get update \
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -54,6 +54,8 @@ RUN apt-get --allow-unauthenticated update -y \
            libboost-system-dev \
            libboost-filesystem-dev \
            libboost-thread-dev \
+            libboost-iostreams-dev \
+            libboost-regex-dev \
            zlib1g-dev \
            liblz4-dev \
            libdouble-conversion-dev \
@ -82,8 +84,8 @@ RUN apt-get --allow-unauthenticated update -y \
            libcctz-dev \
            libldap2-dev \
            libsasl2-dev \
-            heimdal-multidev
-
+            heimdal-multidev \
+            libhyperscan-dev


 # This symlink required by gcc to find lld compiler
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -1,6 +1,6 @@
 FROM ubuntu:18.04

-ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
+ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
 ARG version=20.5.1.*
 ARG gosu_ver=1.10

--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then

    # check if clickhouse is ready to accept connections
    # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
-    if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
+    if ! wget --spider --quiet --prefer-family=IPv6 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
        echo >&2 'ClickHouse init process failed.'
        exit 1
    fi
@ -110,7 +110,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
    # create default database, if defined
    if [ -n "$CLICKHOUSE_DB" ]; then
        echo "$0: create database '$CLICKHOUSE_DB'"
-        "${clickhouseclient[@]}" "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
+        "${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
    fi

    for f in /docker-entrypoint-initdb.d/*; do
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@ -1,6 +1,6 @@
 FROM ubuntu:18.04

-ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
+ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
 ARG version=20.5.1.*

 RUN apt-get update && \
--- a/docker/test/integration/compose/docker_compose_cassandra.yml
+++ b/docker/test/integration/compose/docker_compose_cassandra.yml
@ -0,0 +1,7 @@
+version: '2.3'
+services:
+    cassandra1:
+        image: cassandra
+        restart: always
+        ports:
+          - 9043:9042
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -198,12 +198,14 @@ function get_profiles
    clickhouse-client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
    clickhouse-client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
    clickhouse-client --port 9001 --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
+    clickhouse-client --port 9001 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &

    clickhouse-client --port 9002 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
    clickhouse-client --port 9002 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
    clickhouse-client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
    clickhouse-client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
    clickhouse-client --port 9002 --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
+    clickhouse-client --port 9002 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > right-async-metric-log.tsv ||: &

    wait

@ -347,9 +349,11 @@ create table query_metric_stats engine File(TSVWithNamesAndTypes,
 create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
    as select
        -- FIXME Comparison mode doesn't make sense for queries that complete
-        -- immediately, so for now we pretend they don't exist. We don't want to
-        -- remove them altogether because we want to be able to detect regressions,
-        -- but the right way to do this is not yet clear.
+        -- immediately (on the same order of time as noise). We compute average
+        -- run time between old and new version, and if it is below a threshold,
+        -- we just skip the query. If there is a significant regression, the
+        -- average will be above threshold, we'll process it normally and will
+        -- detect the regression.
        (left + right) / 2 < 0.02 as short,

        not short and abs(diff) > report_threshold        and abs(diff) > stat_threshold as changed_fail,
@ -409,11 +413,11 @@ create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json')
    ;

 create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
-    select left, right, diff, stat_threshold, changed_fail, test, query_display_name
+    select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name
    from queries where changed_show order by abs(diff) desc;

 create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
-    select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
+    select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name
    from queries where unstable_show order by stat_threshold desc;

 create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
@ -421,9 +425,39 @@ create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
    select test, query_index from queries where unstable_show or changed_show
    ;

-create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
-    select test, sum(unstable_fail) u, sum(changed_fail) c, u + c s from queries
-    group by test having s > 0 order by s desc;
+create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as
+    select test, queries, average_time_change from (
+        select test, count(*) queries,
+            sum(left) as left, sum(right) as right,
+            (right - left) / right average_time_change
+        from queries
+        group by test
+        order by abs(average_time_change) desc
+    )
+    ;
+
+create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as
+    select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
+    from queries
+    group by test
+    order by total_unstable + total_changed desc
+    ;
+
+create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as
+    select test,
+        queries,
+        coalesce(total_unstable, 0) total_unstable,
+        coalesce(total_changed, 0) total_changed,
+        total_unstable + total_changed total_bad,
+        coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str
+    from test_time_changes_tsv
+    full join unstable_tests_tsv
+    using test
+    where (abs(average_time_change) > 0.05 and queries > 5)
+        or (total_bad > 0)
+    order by total_bad desc, average_time_change desc
+    settings join_use_nulls = 1
+    ;

 create table query_time engine Memory as select *
    from file('analyze/client-times.tsv', TSV,
@ -464,8 +498,8 @@ create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
    select changed_fail, unstable_fail,
        left, right, diff,
        floor(left > right ? left / right : right / left, 3),
-        stat_threshold, test, query_display_name
-    from queries order by test, query_display_name;
+        stat_threshold, test, query_index, query_display_name
+    from queries order by test, query_index;

 -- new report for all queries with all metrics (no page yet)
 create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
@ -582,7 +616,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes,
            union all select * from unstable_run_traces
            union all select * from unstable_run_metrics_2) mm
        group by test, query_index, metric
-        having d > 0.5
+        having d > 0.5 and q[3] > 5
    ) metrics
    left join query_display_names using (test, query_index)
    order by test, query_index, d desc
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -17,7 +17,7 @@ function find_reference_sha
    # If not master, try to fetch pull/.../{head,merge}
    if [ "$PR_TO_TEST" != "0" ]
    then
-        git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pr/*"
+        git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
    fi

    # Go back from the revision to be tested, trying to find the closest published
@ -28,9 +28,9 @@ function find_reference_sha
    # and SHA_TO_TEST, but a revision that is merged with recent master, given
    # by pull/.../merge ref.
    # Master is the first parent of the pull/.../merge.
-    if git -C ch rev-parse pr/merge
+    if git -C ch rev-parse "pull/$PR_TO_TEST/merge"
    then
-        start_ref=pr/merge~
+        start_ref="pull/$PR_TO_TEST/merge~"
    fi

    while :
@ -73,11 +73,11 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi

 (
    git -C ch log -1 --decorate "$SHA_TO_TEST" ||:
-    if git -C ch rev-parse pr/merge &> /dev/null
+    if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null
    then
        echo
        echo Real tested commit is:
-        git -C ch log -1 --decorate pr/merge
+        git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge"
    fi
 ) | tee right-commit.txt

@ -87,7 +87,7 @@ then
    # tests for use by compare.sh. Compare to merge base, because master might be
    # far in the future and have unrelated test changes.
    base=$(git -C ch merge-base "$SHA_TO_TEST" master)
-    git -C ch diff --name-only "$SHA_TO_TEST" "$base" | tee changed-tests.txt
+    git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt
    if grep -vq '^tests/performance' changed-tests.txt
    then
        # Have some other changes besides the tests, so truncate the test list,
@ -131,5 +131,8 @@ done

 dmesg -T > dmesg.log

-7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
+7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
+    {right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
+    report analyze benchmark
+
 cp compare.log /output
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -207,7 +207,8 @@ if args.report == 'main':
            'p&nbsp;<&nbsp;0.001 threshold',                   # 3
            # Failed                                           # 4
            'Test',                                            # 5
-            'Query',                                           # 6
+            '#',                                               # 6
+            'Query',                                           # 7
            ]

        print(tableHeader(columns))
@ -248,7 +249,8 @@ if args.report == 'main':
            'p&nbsp;<&nbsp;0.001 threshold', #3
            # Failed #4
            'Test', #5
-            'Query' #6
+            '#',    #6
+            'Query' #7
        ]

        print(tableStart('Unstable queries'))
@ -272,9 +274,9 @@ if args.report == 'main':
    skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
    printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)

-    printSimpleTable('Tests with most unstable queries',
-        ['Test', 'Unstable', 'Changed perf', 'Total not OK'],
-        tsvRows('report/bad-tests.tsv'))
+    printSimpleTable('Test performance changes',
+        ['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
+        tsvRows('report/test-perf-changes.tsv'))

    def print_test_times():
        global slow_average_tests
@ -357,7 +359,7 @@ if args.report == 'main':
    error_tests += slow_average_tests
    if error_tests:
        status = 'failure'
-        message_array.append(str(error_tests) + ' errors')
+        message_array.insert(0, str(error_tests) + ' errors')

    if message_array:
        message = ', '.join(message_array)
@ -391,7 +393,8 @@ elif args.report == 'all-queries':
            'Times speedup / slowdown',                 #5
            'p&nbsp;<&nbsp;0.001 threshold',          #6
            'Test',                                   #7
-            'Query',                                  #8
+            '#',                                      #8
+            'Query',                                  #9
            ]

        print(tableStart('All query times'))
--- a/docker/test/pvs/Dockerfile
+++ b/docker/test/pvs/Dockerfile
@ -20,9 +20,9 @@ RUN apt-get --allow-unauthenticated update -y \
 #        apt-get --allow-unauthenticated install --yes --no-install-recommends \
 #            pvs-studio

-ENV PKG_VERSION="pvs-studio-7.07.38234.46-amd64.deb"
+ENV PKG_VERSION="pvs-studio-7.07.38234.48-amd64.deb"

-RUN wget "http://files.viva64.com/$PKG_VERSION"
+RUN wget "https://files.viva64.com/$PKG_VERSION"
 RUN sudo dpkg -i "$PKG_VERSION"

 CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic  \
--- a/docker/test/stateful/Dockerfile
+++ b/docker/test/stateful/Dockerfile
@ -24,6 +24,8 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
    ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
    ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \
--- a/docker/test/stateful_with_coverage/run.sh
+++ b/docker/test/stateful_with_coverage/run.sh
@ -59,7 +59,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
    ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
    ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -62,7 +62,9 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
    ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
--- a/docker/test/stateless_with_coverage/run.sh
+++ b/docker/test/stateless_with_coverage/run.sh
@ -50,7 +50,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
    ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
+    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@ -31,6 +31,7 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
    dpkg -i package_folder/clickhouse-server_*.deb;  \
    dpkg -i package_folder/clickhouse-client_*.deb; \
    dpkg -i package_folder/clickhouse-test_*.deb; \
+    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
    ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \
    echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \
--- a/docs/_description_templates/template-function.md
+++ b/docs/_description_templates/template-function.md
@ -1,4 +1,4 @@
-## function-name {#function-name-in-lower-case}
+## functionName {#functionname-in-lower-case}

 Short description.

--- a/docs/_description_templates/template-setting.md
+++ b/docs/_description_templates/template-setting.md
@ -1,4 +1,4 @@
-## setting-name {#setting-name-in-lower-case}
+## setting_name {#setting_name}

 Description.

--- a/docs/en/development/build-cross-arm.md
+++ b/docs/en/development/build-cross-arm.md
@ -7,7 +7,7 @@ toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)

 This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers.

-The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first.
+The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.

 # Install Clang-8 {#install-clang-8}

--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@ -5,9 +5,9 @@ toc_title: How to Build ClickHouse on Linux for Mac OS X

 # How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}

-This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build-osx.md).
+This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).

-The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first.
+The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.

 # Install Clang-8 {#install-clang-8}

--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -28,10 +28,9 @@ There are several ways to do this.
 ### Install from Repository {#install-from-repository}

 On Ubuntu 19.10 or newer:
-```
-$ sudo apt-get update
-$ sudo apt-get install gcc-9 g++-9
-```
+
+    $ sudo apt-get update
+    $ sudo apt-get install gcc-9 g++-9

 ### Install from a PPA Package {#install-from-a-ppa-package}

--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@ -3,11 +3,11 @@ toc_priority: 61
 toc_title: For Beginners
 ---

-# The Beginner ClickHouse Developer Instruction
+# The Beginner ClickHouse Developer Instruction {#the-beginner-clickhouse-developer-instruction}

 Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.

-If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
+If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.

 ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.

@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl

 To install GCC on Ubuntu run: `sudo apt install gcc g++`

-Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9.
+Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.

 Mac OS X build is supported only for Clang. Just run `brew install llvm`

--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@ -200,7 +200,7 @@ Debug version of `jemalloc` is used for debug build.
 ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
 All the fuzz testing should be performed with sanitizers (Address and Undefined).

-LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have "\_fuzzer" name postfixes.
+LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes.
 Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
 We encourage you to write fuzz tests for every functionality that handles user input.

@ -211,7 +211,6 @@ Google OSS-Fuzz can be found at `docker/fuzz`.
 We also use simple fuzz test to generate random SQL queries and to check that the server doesn’t die executing them.
 You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).

-
 ## Security Audit {#security-audit}

 People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.
--- a/docs/en/engines/database-engines/index.md
+++ b/docs/en/engines/database-engines/index.md
@ -12,8 +12,8 @@ By default, ClickHouse uses its native database engine, which provides configura

 You can also use the following database engines:

-   [MySQL](mysql.md)
+-   [MySQL](../../engines/database-engines/mysql.md)

-   [Lazy](lazy.md)
+-   [Lazy](../../engines/database-engines/lazy.md)

 [Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->
--- a/docs/en/engines/index.md
+++ b/docs/en/engines/index.md
@ -1,8 +1,8 @@
 ---
 toc_folder_title: Engines
+toc_hidden: true
 toc_priority: 25
 toc_title: hidden
-toc_hidden: true
 ---

 {## [Original article](https://clickhouse.tech/docs/en/engines/) ##}
--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@ -19,27 +19,27 @@ The table engine (type of table) determines:

 ### MergeTree {#mergetree}

-The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated*](mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.
+The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.

 Engines in the family:

-   [MergeTree](mergetree-family/mergetree.md#mergetree)
-   [ReplacingMergeTree](mergetree-family/replacingmergetree.md#replacingmergetree)
-   [SummingMergeTree](mergetree-family/summingmergetree.md#summingmergetree)
-   [AggregatingMergeTree](mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
-   [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
-   [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
-   [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree)
+-   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree)
+-   [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree)
+-   [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree)
+-   [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
+-   [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
+-   [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
+-   [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree)

 ### Log {#log}

-Lightweight [engines](log-family/index.md) with minimum functionality. They’re the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
+Lightweight [engines](../../engines/table-engines/log-family/index.md) with minimum functionality. They’re the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.

 Engines in the family:

-   [TinyLog](log-family/tinylog.md#tinylog)
-   [StripeLog](log-family/stripelog.md#stripelog)
-   [Log](log-family/log.md#log)
+-   [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog)
+-   [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog)
+-   [Log](../../engines/table-engines/log-family/log.md#log)

 ### Integration Engines {#integration-engines}

@ -47,28 +47,28 @@ Engines for communicating with other data storage and processing systems.

 Engines in the family:

-   [Kafka](integrations/kafka.md#kafka)
-   [MySQL](integrations/mysql.md#mysql)
-   [ODBC](integrations/odbc.md#table-engine-odbc)
-   [JDBC](integrations/jdbc.md#table-engine-jdbc)
-   [HDFS](integrations/hdfs.md#hdfs)
+-   [Kafka](../../engines/table-engines/integrations/kafka.md#kafka)
+-   [MySQL](../../engines/table-engines/integrations/mysql.md#mysql)
+-   [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
+-   [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
+-   [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)

 ### Special Engines {#special-engines}

 Engines in the family:

-   [Distributed](special/distributed.md#distributed)
-   [MaterializedView](special/materializedview.md#materializedview)
-   [Dictionary](special/dictionary.md#dictionary)
-   [Merge](special/merge.md#merge
-   [File](special/file.md#file)
-   [Null](special/null.md#null)
-   [Set](special/set.md#set)
-   [Join](special/join.md#join)
-   [URL](special/url.md#table_engines-url)
-   [View](special/view.md#table_engines-view)
-   [Memory](special/memory.md#memory)
-   [Buffer](special/buffer.md#buffer)
+-   [Distributed](../../engines/table-engines/special/distributed.md#distributed)
+-   [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview)
+-   [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary)
+-   [Merge](../../engines/table-engines/special/merge.md#merge)
+-   [File](../../engines/table-engines/special/file.md#file)
+-   [Null](../../engines/table-engines/special/null.md#null)
+-   [Set](../../engines/table-engines/special/set.md#set)
+-   [Join](../../engines/table-engines/special/join.md#join)
+-   [URL](../../engines/table-engines/special/url.md#table_engines-url)
+-   [View](../../engines/table-engines/special/view.md#table_engines-view)
+-   [Memory](../../engines/table-engines/special/memory.md#memory)
+-   [Buffer](../../engines/table-engines/special/buffer.md#buffer)

 ## Virtual Columns {#table_engines-virtual_columns}

--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@ -6,7 +6,7 @@ toc_title: HDFS
 # HDFS {#table_engines-hdfs}

 This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
-to the [File](../special/file.md#table_engines-file) and [URL](../special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
+to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.

 ## Usage {#usage}

@ -116,6 +116,6 @@ CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9

 **See Also**

-   [Virtual columns](../index.md#table_engines-virtual_columns)
+-   [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide-->
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@ -173,7 +173,7 @@ For a list of possible configuration options, see the [librdkafka configuration

 **See Also**

-   [Virtual columns](../index.md#table_engines-virtual_columns)
-   [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
+-   [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
+-   [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->
--- a/docs/en/engines/table-engines/log-family/log-family.md
+++ b/docs/en/engines/table-engines/log-family/log-family.md
@ -9,9 +9,9 @@ These engines were developed for scenarios when you need to quickly write many s

 Engines of the family:

-   [StripeLog](stripelog.md)
-   [Log](log.md)
-   [TinyLog](tinylog.md)
+-   [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
+-   [Log](../../../engines/table-engines/log-family/log.md)
+-   [TinyLog](../../../engines/table-engines/log-family/tinylog.md)

 ## Common Properties {#common-properties}

--- a/docs/en/engines/table-engines/log-family/log.md
+++ b/docs/en/engines/table-engines/log-family/log.md
@ -5,9 +5,9 @@ toc_title: Log

 # Log {#log}

-Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
+Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.

-Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
+Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
 For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
 The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.

--- a/docs/en/engines/table-engines/log-family/stripelog.md
+++ b/docs/en/engines/table-engines/log-family/stripelog.md
@ -5,7 +5,7 @@ toc_title: StripeLog

 # Stripelog {#stripelog}

-This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
+This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.

 Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).

--- a/docs/en/engines/table-engines/log-family/tinylog.md
+++ b/docs/en/engines/table-engines/log-family/tinylog.md
@ -5,10 +5,10 @@ toc_title: TinyLog

 # TinyLog {#tinylog}

-The engine belongs to the log engine family. See [Log Engine Family](log-family.md) for common properties of log engines and their differences.
+The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences.

 This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.

-Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since it’s simpler than the [Log](log.md) engine (fewer files need to be opened).
+Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since it’s simpler than the [Log](../../../engines/table-engines/log-family/log.md) engine (fewer files need to be opened).

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide-->
--- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
@ -5,7 +5,7 @@ toc_title: AggregatingMergeTree

 # Aggregatingmergetree {#aggregatingmergetree}

-The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
+The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.

 You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views.

@ -36,7 +36,7 @@ For a description of request parameters, see [request description](../../../sql-

 **Query clauses**

-When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
+When creating a `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.

 <details markdown="1">

--- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
@ -5,7 +5,7 @@ toc_title: CollapsingMergeTree

 # CollapsingMergeTree {#table_engine-collapsingmergetree}

-The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
+The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.

 `CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document.

@ -36,7 +36,7 @@ For a description of query parameters, see [query description](../../../sql-refe

 **Query clauses**

-When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
+When creating a `CollapsingMergeTree` table, the same [query clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.

 <details markdown="1">

--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@ -5,11 +5,11 @@ toc_title: Custom Partitioning Key

 # Custom Partitioning Key {#custom-partitioning-key}

-Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
+Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.

 A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.

-The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
+The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:

 ``` sql
 CREATE TABLE visits
@ -23,7 +23,7 @@ PARTITION BY toYYYYMM(VisitDate)
 ORDER BY Hour;
 ```

-The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example:
+The partition key can also be a tuple of expressions (similar to the [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries)). For example:

 ``` sql
 ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign)
@ -38,7 +38,7 @@ When inserting new data to a table, this data is stored as a separate part (chun
 !!! info "Info"
    A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldn’t make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors.

-Use the [system.parts](../../../operations/system-tables.md#system_tables-parts) table to view the table parts and partitions. For example, let’s assume that we have a `visits` table with partitioning by month. Let’s perform the `SELECT` query for the `system.parts` table:
+Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, let’s assume that we have a `visits` table with partitioning by month. Let’s perform the `SELECT` query for the `system.parts` table:

 ``` sql
 SELECT
--- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
@ -9,7 +9,7 @@ This engine is designed for thinning and aggregating/averaging (rollup) [Graphit

 You can use any ClickHouse table engine to store the Graphite data if you don’t need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite.

-The engine inherits properties from [MergeTree](mergetree.md).
+The engine inherits properties from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md).

 ## Creating a Table {#creating-table}

@ -50,7 +50,7 @@ The names of these columns should be set in the rollup configuration.

 **Query clauses**

-When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
+When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.

 <details markdown="1">

--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -15,20 +15,20 @@ Main features:

    This allows you to create a small sparse index that helps find data faster.

-   Partitions can be used if the [partitioning key](custom-partitioning-key.md) is specified.
+-   Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.

    ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance.

 -   Data replication support.

-    The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](replication.md).
+    The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](../../../engines/table-engines/mergetree-family/replication.md).

 -   Data sampling support.

    If necessary, you can set the data sampling method in the table.

 !!! info "Info"
-    The [Merge](../special/merge.md#merge) engine does not belong to the `*MergeTree` family.
+    The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family.

 ## Creating a Table {#table_engine-mergetree-creating-a-table}

@ -41,8 +41,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
    INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
 ) ENGINE = MergeTree()
+ORDER BY expr
 [PARTITION BY expr]
-[ORDER BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
 [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
@ -51,30 +51,31 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

 For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md).

-!!! note "Note"
-    `INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes).
-
 ### Query Clauses {#mergetree-query-clauses}

 -   `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.

-   `PARTITION BY` — The [partitioning key](custom-partitioning-key.md).
+-   `ORDER BY` — The sorting key.
+
+    A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
+
+    ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
+
+    Use the `ORDER BY tuple()` syntax, if you don’t need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
+
+-   `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional.

    For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.

-   `ORDER BY` — The sorting key.
-
-    A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
-
-   `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key).
+-   `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.

    By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.

-   `SAMPLE BY` — An expression for sampling.
+-   `SAMPLE BY` — An expression for sampling. Optional.

    If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.

-   `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes).
+-   `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.

    Expression must have one `Date` or `DateTime` column as a result. Example:
    `TTL date + INTERVAL 1 DAY`
@ -83,7 +84,7 @@ For a description of parameters, see the [CREATE query description](../../../sql

    For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)

-   `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
+-   `SETTINGS` — Additional parameters that control the behavior of the `MergeTree` (optional):

    -   `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
    -   `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
@ -192,18 +193,22 @@ The number of columns in the primary key is not explicitly limited. Depending on

    ClickHouse sorts data by primary key, so the higher the consistency, the better the compression.

-   Provide additional logic when merging data parts in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md) engines.
+-   Provide additional logic when merging data parts in the [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) engines.

    In this case it makes sense to specify the *sorting key* that is different from the primary key.

 A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.

+You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
+
+To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
+
 ### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}

 It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.

-This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and
-[AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
+This feature is helpful when using the [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) and
+[AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.

 In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple.

@ -249,7 +254,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par

 ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence.

-### Data Skipping Indexes (experimental) {#table_engine-mergetree-data_skipping-indexes}
+### Data Skipping Indexes {#table_engine-mergetree-data_skipping-indexes}

 The index declaration is in the columns section of the `CREATE` query.

@ -332,8 +337,8 @@ The `set` index can be used with all functions. Function subsets for other index
 |------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
 | [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals)                 | ✔           | ✔      | ✔           | ✔           | ✔             |
 | [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals)         | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [like](../../../sql-reference/functions/string-search-functions.md#function-like)                          | ✔           | ✔      | ✔           | ✗           | ✗             |
-| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike)                    | ✔           | ✔      | ✔           | ✗           | ✗             |
+| [like](../../../sql-reference/functions/string-search-functions.md#function-like)                          | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike)                    | ✔           | ✔      | ✗           | ✗           | ✗             |
 | [startsWith](../../../sql-reference/functions/string-functions.md#startswith)                              | ✔           | ✔      | ✔           | ✔           | ✗             |
 | [endsWith](../../../sql-reference/functions/string-functions.md#endswith)                                  | ✗           | ✗      | ✔           | ✔           | ✗             |
 | [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany)      | ✗           | ✗      | ✔           | ✗           | ✗             |
@ -349,7 +354,8 @@ The `set` index can be used with all functions. Function subsets for other index

 Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization.

-Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example:
+!!! note "Note"
+    Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example:

 -   Can be optimized:
    -   `s LIKE '%test%'`
@ -478,7 +484,7 @@ When ClickHouse see that data is expired, it performs an off-schedule merge. To

 If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`.

-## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} 
+## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}

 ### Introduction {#introduction}

@ -493,7 +499,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
 -   Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
 -   Storage policy — Set of volumes and the rules for moving data between them.

-The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
+The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.

 ### Configuration {#table_engine-mergetree-multiple-volumes_configure}

@ -623,7 +629,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'

 The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.

-The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
+The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting.

 ### Details {#details}

@ -642,7 +648,7 @@ In all these cases except for mutations and partition freezing, a part is stored
 Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.

 In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
-Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
+Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.

 User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.

@ -652,4 +658,3 @@ After the completion of background merges and mutations, old parts are removed o
 During this time, they are not moved to other volumes or disks. Therefore, until the parts are finally removed, they are still taken into account for evaluation of the occupied disk space.

 [Original article](https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/) <!--hide-->
-
--- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
@ -5,7 +5,7 @@ toc_title: ReplacingMergeTree

 # ReplacingMergeTree {#replacingmergetree}

-The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value).
+The engine differs from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md) value).

 Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you can’t plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, don’t count on using it, because the `OPTIMIZE` query will read and write a large amount of data.

@ -40,7 +40,7 @@ For a description of request parameters, see [request description](../../../sql-

 **Query clauses**

-When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
+When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.

 <details markdown="1">

--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@ -63,7 +63,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug

 For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasn’t proven necessary on the Yandex.Metrica cluster (approximately 300 servers).

-Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
+Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.

 By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.

@ -217,6 +217,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to

 **See also**

-   [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
+-   [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->
--- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
@ -5,7 +5,7 @@ toc_title: SummingMergeTree

 # SummingMergeTree {#summingmergetree}

-The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
+The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.

 We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key.

@ -35,7 +35,7 @@ For a description of request parameters, see [request description](../../../sql-

 **Query clauses**

-When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
+When creating a `SummingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.

 <details markdown="1">

@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key

 When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.

-ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
+ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.

 ### Common Rules for Summation {#common-rules-for-summation}

@ -110,7 +110,7 @@ The values are not summarized for columns in the primary key.

 ### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns}

-For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function.
+For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine aggregating according to the function.

 ### Nested Structures {#nested-structures}

@ -132,7 +132,7 @@ Examples:
 [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
 ```

-When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference.md) function for aggregation of `Map`.
+When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.

 For nested data structure, you do not need to specify its columns in the tuple of columns for summation.

--- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
@ -12,7 +12,7 @@ This engine:

 See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details.

-The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
+The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.

 ## Creating a Table {#creating-a-table}

@ -47,7 +47,7 @@ VersionedCollapsingMergeTree(sign, version)

 **Query Clauses**

-When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table.
+When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table.

 <details markdown="1">

--- a/docs/en/engines/table-engines/special/buffer.md
+++ b/docs/en/engines/table-engines/special/buffer.md
@ -3,7 +3,7 @@ toc_priority: 45
 toc_title: Buffer
 ---

-# Buffer {#buffer}
+# Buffer Table Engine {#buffer}

 Buffers the data to write in RAM, periodically flushing it to another table. During the read operation, data is read from the buffer and the other table simultaneously.

@ -34,9 +34,9 @@ Example:
 CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)
 ```

-Creating a ‘merge.hits\_buffer’ table with the same structure as ‘merge.hits’ and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
+Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.

-When the server is stopped, with DROP TABLE or DETACH TABLE, buffer data is also flushed to the destination table.
+When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffer data is also flushed to the destination table.

 You can set empty strings in single quotation marks for the database and table name. This indicates the absence of a destination table. In this case, when the data flush conditions are reached, the buffer is simply cleared. This may be useful for keeping a window of data in memory.

@ -52,11 +52,11 @@ If you need to run ALTER for a subordinate table and the Buffer table, we recomm

 If the server is restarted abnormally, the data in the buffer is lost.

-FINAL and SAMPLE do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.
+`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.

 When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table.

-Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set ‘num\_layers’ to 1.
+Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1.

 If the destination table is replicated, some expected characteristics of replicated tables are lost when writing to a Buffer table. The random changes to the order of rows and sizes of data parts cause data deduplication to quit working, which means it is not possible to have a reliable ‘exactly once’ write to replicated tables.

--- a/docs/en/engines/table-engines/special/dictionary.md
+++ b/docs/en/engines/table-engines/special/dictionary.md
@ -3,15 +3,17 @@ toc_priority: 35
 toc_title: Dictionary
 ---

-# Dictionary {#dictionary}
+# Dictionary Table Engine {#dictionary}

 The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.

+## Example {#example}
+
 As an example, consider a dictionary of `products` with the following configuration:

 ``` xml
 <dictionaries>
-<dictionary>
+    <dictionary>
        <name>products</name>
        <source>
            <odbc>
@ -36,7 +38,7 @@ As an example, consider a dictionary of `products` with the following configurat
                <null_value></null_value>
            </attribute>
        </structure>
-</dictionary>
+    </dictionary>
 </dictionaries>
 ```

--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@ -3,9 +3,9 @@ toc_priority: 33
 toc_title: Distributed
 ---

-# Distributed {#distributed}
+# Distributed Table Engine {#distributed}

-**Tables with Distributed engine do not store any data by themself**, but allow distributed query processing on multiple servers.
+Tables with Distributed engine do not store any data by their own, but allow distributed query processing on multiple servers.
 Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.

 The Distributed engine accepts parameters:
@ -23,7 +23,7 @@ The Distributed engine accepts parameters:
    See also:

    -   `insert_distributed_sync` setting
-    -   [MergeTree](../mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
+    -   [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples

 Example:

@ -31,7 +31,7 @@ Example:
 Distributed(logs, default, hits[, sharding_key[, policy_name]])
 ```

-Data will be read from all servers in the ‘logs’ cluster, from the default.hits table located on every server in the cluster.
+Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
 Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
 For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.

@ -75,7 +75,7 @@ Clusters are set like this:
 </remote_servers>
 ```

-Here a cluster is defined with the name ‘logs’ that consists of two shards, each of which contains two replicas.
+Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas.
 Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards).
 Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).

@ -83,7 +83,7 @@ Cluster names must not contain dots.

 The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
 - `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn’t start. If you change the DNS record, restart the server.
- `port` – The TCP port for messenger activity (‘tcp\_port’ in the config, usually set to 9000). Do not confuse it with http\_port.
+- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
 - `user` – Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
 - `password` – The password for connecting to a remote server (not masked). Default value: empty string.
 - `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
@ -97,11 +97,11 @@ You can specify just one of the shards (in this case, query processing should be

 You can specify as many clusters as you wish in the configuration.

-To view your clusters, use the ‘system.clusters’ table.
+To view your clusters, use the `system.clusters` table.

 The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster’s servers).

-The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don’t need to create a Distributed table – use the ‘remote’ table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
+The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don’t need to create a Distributed table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).

 There are two methods for writing data to a cluster:

@ -111,15 +111,15 @@ Second, you can perform INSERT in a Distributed table. In this case, the table w

 Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.

-Each shard can have the ‘internal\_replication’ parameter defined in the config file.
+Each shard can have the `internal_replication` parameter defined in the config file.

-If this parameter is set to ‘true’, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
+If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.

-If it is set to ‘false’ (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
+If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.

-To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from ‘prev\_weight’ to ‘prev\_weights + weight’, where ‘prev\_weights’ is the total weight of the shards with the smallest number, and ‘weight’ is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
+To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).

-The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression ‘rand()’ for random distribution of data, or ‘UserID’ for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
+The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).

 A simple reminder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.

@ -130,11 +130,11 @@ You should be concerned about the sharding scheme in the following cases:
 -   Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
 -   A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.

-Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
+Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.

-If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the ‘broken’ subdirectory and no longer used.
+If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.

-When the max\_parallel\_replicas option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
+When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).

 ## Virtual Columns {#virtual-columns}

@ -145,7 +145,7 @@ When the max\_parallel\_replicas option is enabled, query processing is parallel

 **See Also**

-   [Virtual columns](index.md#table_engines-virtual_columns)
-   [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
+-   [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
+-   [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->
--- a/docs/en/engines/table-engines/special/external-data.md
+++ b/docs/en/engines/table-engines/special/external-data.md
@ -1,11 +1,11 @@
 ---
-toc_priority: 34
-toc_title: External data
+toc_priority: 45
+toc_title: External Data
 ---

 # External Data for Query Processing {#external-data-for-query-processing}

-ClickHouse allows sending a server the data that is needed for processing a query, together with a SELECT query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in IN operators).
+ClickHouse allows sending a server the data that is needed for processing a query, together with a `SELECT` query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in `IN` operators).

 For example, if you have a text file with important user identifiers, you can upload it to the server along with a query that uses filtration by this list.

@ -46,7 +46,7 @@ $ cat /etc/passwd | sed 's/:/\t/g' | clickhouse-client --query="SELECT shell, co
 /bin/sync       1
 ```

-When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The ‘query\_string’ is passed the parameters ‘name\_format’, ‘name\_types’, and ‘name\_structure’, where ‘name’ is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client.
+When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The `query_string` is passed the parameters `name_format`, `name_types`, and `name_structure`, where `name` is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client.

 Example:

--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@ -3,12 +3,11 @@ toc_priority: 37
 toc_title: File
 ---

-# File {#table_engines-file}
+# File Table Engine {#table_engines-file}

-The File table engine keeps the data in a file in one of the supported [file
-formats](../../../interfaces/formats.md#formats) (TabSeparated, Native, etc.).
+The File table engine keeps the data in a file in one of the supported [file formats](../../../interfaces/formats.md#formats) (`TabSeparated`, `Native`, etc.).

-Usage examples:
+Usage scenarios:

 -   Data export from ClickHouse to file.
 -   Convert data from one format to another.
@ -34,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A
 !!! warning "Warning"
    Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.

-**Example:**
+## Example {#example}

 **1.** Set up the `file_engine_table` table:

--- a/docs/en/engines/table-engines/special/generate.md
+++ b/docs/en/engines/table-engines/special/generate.md
@ -3,7 +3,7 @@ toc_priority: 46
 toc_title: GenerateRandom
 ---

-# Generaterandom {#table_engines-generate}
+# GenerateRandom Table Engine {#table_engines-generate}

 The GenerateRandom table engine produces random data for given table schema.

@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries.

 It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`.

-**Example:**
+## Example {#example}

 **1.** Set up the `generate_engine_table` table:

--- a/docs/en/engines/table-engines/special/join.md
+++ b/docs/en/engines/table-engines/special/join.md
@ -3,9 +3,12 @@ toc_priority: 40
 toc_title: Join
 ---

-# Join {#join}
+# Join Table Engine {#join}

-Prepared data structure for using in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations.
+Optional prepared data structure for usage in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations.
+
+!!! note "Note"
+    This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself.

 ## Creating a Table {#creating-a-table}

--- a/docs/en/engines/table-engines/special/materializedview.md
+++ b/docs/en/engines/table-engines/special/materializedview.md
@ -3,8 +3,8 @@ toc_priority: 43
 toc_title: MaterializedView
 ---

-# Materializedview {#materializedview}
+# MaterializedView Table Engine {#materializedview}

-Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses this engine.
+Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) <!--hide-->
--- a/docs/en/engines/table-engines/special/memory.md
+++ b/docs/en/engines/table-engines/special/memory.md
@ -3,15 +3,16 @@ toc_priority: 44
 toc_title: Memory
 ---

-# Memory {#memory}
+# Memory Table Engine {#memory}

 The Memory engine stores data in RAM, in uncompressed form. Data is stored in exactly the same form as it is received when read. In other words, reading from this table is completely free.
 Concurrent data access is synchronized. Locks are short: read and write operations don’t block each other.
 Indexes are not supported. Reading is parallelized.
+
 Maximal productivity (over 10 GB/sec) is reached on simple queries, because there is no reading from the disk, decompressing, or deserializing data. (We should note that in many cases, the productivity of the MergeTree engine is almost as high.)
 When restarting a server, data disappears from the table and the table becomes empty.
 Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000).

-The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing GLOBAL IN (see the section “IN operators”).
+The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”).

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/memory/) <!--hide-->
--- a/docs/en/engines/table-engines/special/merge.md
+++ b/docs/en/engines/table-engines/special/merge.md
@ -3,13 +3,17 @@ toc_priority: 36
 toc_title: Merge
 ---

-# Merge {#merge}
+# Merge Table Engine {#merge}

 The `Merge` engine (not to be confused with `MergeTree`) does not store data itself, but allows reading from any number of other tables simultaneously.
+
 Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist.
+
 The `Merge` engine accepts parameters: the database name and a regular expression for tables.

-Example:
+## Examples {#examples}
+
+Example 1:

 ``` sql
 Merge(hits, '^WatchLog')
@ -63,6 +67,6 @@ FROM WatchLog

 **See Also**

-   [Virtual columns](index.md#table_engines-virtual_columns)
+-   [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) <!--hide-->
--- a/docs/en/engines/table-engines/special/null.md
+++ b/docs/en/engines/table-engines/special/null.md
@ -3,10 +3,11 @@ toc_priority: 38
 toc_title: 'Null'
 ---

-# Null {#null}
+# Null Table Engine {#null}

-When writing to a Null table, data is ignored. When reading from a Null table, the response is empty.
+When writing to a `Null` table, data is ignored. When reading from a `Null` table, the response is empty.

-However, you can create a materialized view on a Null table. So the data written to the table will end up in the view.
+!!! info "Hint"
+    However, you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded.

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/null/) <!--hide-->
--- a/docs/en/engines/table-engines/special/set.md
+++ b/docs/en/engines/table-engines/special/set.md
@ -3,14 +3,14 @@ toc_priority: 39
 toc_title: Set
 ---

-# Set {#set}
+# Set Table Engine {#set}

-A data set that is always in RAM. It is intended for use on the right side of the IN operator (see the section “IN operators”).
+A data set that is always in RAM. It is intended for use on the right side of the `IN` operator (see the section “IN operators”).

-You can use INSERT to insert data in the table. New elements will be added to the data set, while duplicates will be ignored.
-But you can’t perform SELECT from the table. The only way to retrieve data is by using it in the right half of the IN operator.
+You can use `INSERT` to insert data in the table. New elements will be added to the data set, while duplicates will be ignored.
+But you can’t perform `SELECT` from the table. The only way to retrieve data is by using it in the right half of the `IN` operator.

-Data is always located in RAM. For INSERT, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place.
+Data is always located in RAM. For `INSERT`, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place.

 For a rough server restart, the block of data on the disk might be lost or damaged. In the latter case, you may need to manually delete the file with damaged data.

--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@ -3,12 +3,13 @@ toc_priority: 41
 toc_title: URL
 ---

-# URL(URL, Format) {#table_engines-url}
+# URL Table Engine {#table_engines-url}

-Manages data on a remote HTTP/HTTPS server. This engine is similar
-to the [File](file.md) engine.
+Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.

-## Using the Engine in the ClickHouse Server {#using-the-engine-in-the-clickhouse-server}
+Syntax: `URL(URL, Format)`
+
+## Usage {#using-the-engine-in-the-clickhouse-server}

 The `format` must be one that ClickHouse can use in
 `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see
@ -24,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support

 You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.

-**Example:**
+## Example {#example}

 **1.** Create a `url_engine_table` table on the server :

--- a/docs/en/engines/table-engines/special/view.md
+++ b/docs/en/engines/table-engines/special/view.md
@ -3,7 +3,7 @@ toc_priority: 42
 toc_title: View
 ---

-# View {#table_engines-view}
+# View Table Engine {#table_engines-view}

 Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query).

--- a/docs/en/faq/index.md
+++ b/docs/en/faq/index.md
@ -1,9 +1,8 @@
 ---
 toc_folder_title: F.A.Q.
+toc_hidden: true
 toc_priority: 76
 toc_title: hidden
-toc_hidden: true
 ---

-
 {## [Original article](https://clickhouse.tech/docs/en/faq) ##}
--- a/docs/en/getting-started/example-datasets/index.md
+++ b/docs/en/getting-started/example-datasets/index.md
@ -9,12 +9,12 @@ toc_title: Introduction
 This section describes how to obtain example datasets and import them into ClickHouse.
 For some datasets example queries are also available.

-   [Anonymized Yandex.Metrica Dataset](metrica.md)
-   [Star Schema Benchmark](star-schema.md)
-   [WikiStat](wikistat.md)
-   [Terabyte of Click Logs from Criteo](criteo.md)
-   [AMPLab Big Data Benchmark](amplab-benchmark.md)
-   [New York Taxi Data](nyc-taxi.md)
-   [OnTime](ontime.md)
+-   [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
+-   [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
+-   [WikiStat](../../getting-started/example-datasets/wikistat.md)
+-   [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
+-   [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
+-   [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
+-   [OnTime](../../getting-started/example-datasets/ontime.md)

 [Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->
--- a/docs/en/getting-started/example-datasets/metrica.md
+++ b/docs/en/getting-started/example-datasets/metrica.md
@ -7,7 +7,7 @@ toc_title: Yandex.Metrica Data

 Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section.

-The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
+The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits\_100m\_obfuscated\_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits\_100m\_obfuscated\_v1.tar.xz.

 ## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions}

--- a/docs/en/getting-started/index.md
+++ b/docs/en/getting-started/index.md
@ -7,9 +7,9 @@ toc_title: hidden

 # Getting Started {#getting-started}

-If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](install.md). After that you can:
+If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can:

-   [Go through detailed tutorial](tutorial.md)
-   [Experiment with example datasets](example-datasets/ontime.md)
+-   [Go through detailed tutorial](../getting-started/tutorial.md)
+-   [Experiment with example datasets](../getting-started/example-datasets/ontime.md)

 [Original article](https://clickhouse.tech/docs/en/getting_started/) <!--hide-->
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@ -64,7 +64,7 @@ You can also download and install packages manually from [here](https://repo.cli

 It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.

-The required version can be downloaded with `curl` or `wget` from repository https://repo.yandex.ru/clickhouse/tgz/.
+The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.tech/tgz/.
 After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version:

 ``` bash
--- a/docs/en/getting-started/playground.md
+++ b/docs/en/getting-started/playground.md
@ -6,13 +6,13 @@ toc_title: Playground
 # ClickHouse Playground {#clickhouse-playground}

 [ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
-Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There's also a selection of ClickHouse LTS releases to experiment with.
+Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with.

 ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).

 You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).

-## Credentials
+## Credentials {#credentials}

 | Parameter           | Value                                   |
 |:--------------------|:----------------------------------------|
@ -23,13 +23,13 @@ You can make queries to playground using any HTTP client, for example [curl](htt

 There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):

-* 20.3 LTS: `play-api-v20-3.clickhouse.tech`
-* 19.14 LTS: `play-api-v19-14.clickhouse.tech`
+-   20.3 LTS: `play-api-v20-3.clickhouse.tech`
+-   19.14 LTS: `play-api-v19-14.clickhouse.tech`

 !!! note "Note"
    All these endpoints require a secure TLS connection.

-## Limitations
+## Limitations {#limitations}

 The queries are executed as a read-only user. It implies some limitations:

@ -37,12 +37,12 @@ The queries are executed as a read-only user. It implies some limitations:
 -   INSERT queries are not allowed

 The following settings are also enforced:
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time)
+- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
+- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
+- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
+- [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time)

-## Examples
+## Examples {#examples}

 HTTPS endpoint example with `curl`:

@ -51,11 +51,12 @@ curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&us
 ```

 TCP endpoint example with [CLI](../interfaces/cli.md):
+
 ``` bash
 clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'"
 ```

-## Implementation Details
+## Implementation Details {#implementation-details}

 ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md).
 The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add extra layer of protection and improved global connectivity.
--- a/docs/en/getting-started/tutorial.md
+++ b/docs/en/getting-started/tutorial.md
@ -11,7 +11,7 @@ By going through this tutorial, you’ll learn how to set up a simple ClickHouse

 ## Single Node Setup {#single-node-setup}

-To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](install.md#install-from-deb-packages) or [rpm](install.md#from-rpm-packages) packages, but there are [alternatives](install.md#from-docker-image) for the operating systems that do no support them.
+To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.

 For example, you have chosen `deb` packages and executed:

@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv

 ## Import Sample Dataset {#import-sample-dataset}

-Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one.
+Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one.

 ### Download and Extract Table Data {#download-and-extract-table-data}

--- a/docs/en/guides/apply-catboost-model.md
+++ b/docs/en/guides/apply-catboost-model.md
@ -232,6 +232,6 @@ FROM
 ```

 !!! note "Note"
-    More info about [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
+    More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.

 [Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide-->
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit a49b4e0e2696a4b8ef286a5b9538d1cbe8490509`
				`@ -0,0 +1 @@`
				`Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28`