Merge remote-tracking branch 'origin/master' into HEAD

2024-11-24 08:32:02 +00:00 · 2020-05-25 06:21:27 +03:00 · 2020-05-25 06:21:27 +03:00 · f1fb7245e5
commit f1fb7245e5
parent 09129a6d21 bf3f76be7f
460 changed files with 5797 additions and 3093 deletions
--- a/.arcignore
+++ b/.arcignore
@ -0,0 +1,12 @@
+# .arcignore is the same as .gitignore but for Arc VCS.
+# Arc VCS is a proprietary VCS in Yandex that is very similar to Git
+# from the user perspective but with the following differences:
+# 1. Data is stored in distributed object storage.
+# 2. Local copy works via FUSE without downloading all the objects.
+# For this reason, it is better suited for huge monorepositories that can be found in large companies (e.g. Yandex, Google).
+# As ClickHouse developers, we don't use Arc as a VCS (we use Git).
+# But the ClickHouse source code is also mirrored into internal monorepository and our collegues are using Arc.
+# You can read more about Arc here: https://habr.com/en/company/yandex/blog/482926/
+
+# Repository is synchronized without 3rd-party submodules.
+contrib
--- a/.clang-tidy
+++ b/.clang-tidy
@ -9,7 +9,7 @@ Checks: '-*,
    misc-unused-alias-decls,
    misc-unused-parameters,
    misc-unused-using-decls,
-    
+
    modernize-avoid-bind,
    modernize-loop-convert,
    modernize-make-shared,
@ -33,7 +33,7 @@ Checks: '-*,
    performance-no-automatic-move,
    performance-trivially-destructible,
    performance-unnecessary-copy-initialization,
-    
+
    readability-avoid-const-params-in-decls,
    readability-const-return-type,
    readability-container-size-empty,
@ -58,7 +58,7 @@ Checks: '-*,
    readability-simplify-boolean-expr,
    readability-inconsistent-declaration-parameter-name,
    readability-identifier-naming,
-    
+
    bugprone-undelegated-constructor,
    bugprone-argument-comment,
    bugprone-bad-signal-to-kill-thread,
@ -102,7 +102,7 @@ Checks: '-*,
    bugprone-unused-return-value,
    bugprone-use-after-move,
    bugprone-virtual-near-miss,
-    
+
    cert-dcl21-cpp,
    cert-dcl50-cpp,
    cert-env33-c,
@ -112,7 +112,7 @@ Checks: '-*,
    cert-mem57-cpp,
    cert-msc50-cpp,
    cert-oop58-cpp,
-    
+
    google-build-explicit-make-pair,
    google-build-namespaces,
    google-default-arguments,
@ -121,9 +121,9 @@ Checks: '-*,
    google-readability-avoid-underscore-in-googletest-name,
    google-runtime-int,
    google-runtime-operator,
-    
+
    hicpp-exception-baseclass,
-    
+
    clang-analyzer-core.CallAndMessage,
    clang-analyzer-core.DivideZero,
    clang-analyzer-core.NonNullParamChecker,
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -9,7 +9,7 @@ Changelog category (leave one):
 - Build/Testing/Packaging Improvement
 - Documentation (changelog entry is not required)
 - Other
- Non-significant (changelog entry is not required)
+- Not for changelog (changelog entry is not required)


 Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
--- a/.gitmodules
+++ b/.gitmodules
@ -157,3 +157,6 @@
 [submodule "contrib/openldap"]
 	path = contrib/openldap
 	url = https://github.com/openldap/openldap.git
+[submodule "contrib/fmtlib"]
+	path = contrib/fmtlib
+	url = https://github.com/fmtlib/fmt.git
--- a/base/common/CMakeLists.txt
+++ b/base/common/CMakeLists.txt
@ -79,6 +79,7 @@ target_link_libraries (common
        Poco::Util
        Poco::Foundation
        replxx
+        fmt

    PRIVATE
        cctz
--- a/base/common/logger_useful.h
+++ b/base/common/logger_useful.h
@ -2,16 +2,14 @@

 /// Macros for convenient usage of Poco logger.

-#include <sstream>
+#include <fmt/format.h>
+#include <fmt/ostream.h>
 #include <Poco/Logger.h>
 #include <Poco/Message.h>
-#include <Poco/Version.h>
 #include <Common/CurrentThread.h>

-#ifndef QUERY_PREVIEW_LENGTH
-#define QUERY_PREVIEW_LENGTH 160
-#endif

+/// TODO Remove this.
 using Poco::Logger;
 using Poco::Message;
 using DB::LogsLevel;
@ -19,21 +17,20 @@ using DB::CurrentThread;

 /// Logs a message to a specified logger with that level.

-#define LOG_SIMPLE(logger, message, priority, PRIORITY) do                        \
+#define LOG_IMPL(logger, priority, PRIORITY, ...) do                    \
 {                                                                                 \
    const bool is_clients_log = (CurrentThread::getGroup() != nullptr) &&         \
            (CurrentThread::getGroup()->client_logs_level >= (priority));         \
    if ((logger)->is((PRIORITY)) || is_clients_log)                               \
    {                                                                             \
-        std::stringstream oss_internal_rare;                                      \
-        oss_internal_rare << message;                                             \
+        std::string formatted_message = fmt::format(__VA_ARGS__);                 \
        if (auto channel = (logger)->getChannel())                                \
        {                                                                         \
            std::string file_function;                                            \
            file_function += __FILE__;                                            \
            file_function += "; ";                                                \
            file_function += __PRETTY_FUNCTION__;                                 \
-            Message poco_message((logger)->name(), oss_internal_rare.str(),       \
+            Message poco_message((logger)->name(), formatted_message,             \
                                 (PRIORITY), file_function.c_str(), __LINE__);    \
            channel->log(poco_message);                                           \
        }                                                                         \
@ -41,10 +38,9 @@ using DB::CurrentThread;
 } while (false)


-#define LOG_TRACE(logger, message)   LOG_SIMPLE(logger, message, LogsLevel::trace, Message::PRIO_TRACE)
-#define LOG_DEBUG(logger, message)   LOG_SIMPLE(logger, message, LogsLevel::debug, Message::PRIO_DEBUG)
-#define LOG_INFO(logger, message)    LOG_SIMPLE(logger, message, LogsLevel::information, Message::PRIO_INFORMATION)
-#define LOG_WARNING(logger, message) LOG_SIMPLE(logger, message, LogsLevel::warning, Message::PRIO_WARNING)
-#define LOG_ERROR(logger, message)   LOG_SIMPLE(logger, message, LogsLevel::error, Message::PRIO_ERROR)
-#define LOG_FATAL(logger, message)   LOG_SIMPLE(logger, message, LogsLevel::error, Message::PRIO_FATAL)
-
+#define LOG_TRACE(logger, ...)   LOG_IMPL(logger, LogsLevel::trace, Message::PRIO_TRACE, __VA_ARGS__)
+#define LOG_DEBUG(logger, ...)   LOG_IMPL(logger, LogsLevel::debug, Message::PRIO_DEBUG, __VA_ARGS__)
+#define LOG_INFO(logger, ...)    LOG_IMPL(logger, LogsLevel::information, Message::PRIO_INFORMATION, __VA_ARGS__)
+#define LOG_WARNING(logger, ...) LOG_IMPL(logger, LogsLevel::warning, Message::PRIO_WARNING, __VA_ARGS__)
+#define LOG_ERROR(logger, ...)   LOG_IMPL(logger, LogsLevel::error, Message::PRIO_ERROR, __VA_ARGS__)
+#define LOG_FATAL(logger, ...)   LOG_IMPL(logger, LogsLevel::error, Message::PRIO_FATAL, __VA_ARGS__)
--- a/base/common/ya.make
+++ b/base/common/ya.make
@ -24,6 +24,7 @@ PEERDIR(
    contrib/libs/cxxsupp/libcxx-filesystem
    contrib/libs/poco/Net
    contrib/libs/poco/Util
+    contrib/libs/fmt
    contrib/restricted/boost
    contrib/restricted/cityhash-1.0.2
 )
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@ -180,7 +180,7 @@ public:
            // levels and more info, but for completeness we log all signals
            // here at trace level.
            // Don't use strsignal here, because it's not thread-safe.
-            LOG_TRACE(log, "Received signal " << sig);
+            LOG_TRACE(log, "Received signal {}", sig);

            if (sig == Signals::StopThread)
            {
@ -236,7 +236,7 @@ private:

    void onTerminate(const std::string & message, UInt32 thread_num) const
    {
-        LOG_FATAL(log, "(version " << VERSION_STRING << VERSION_OFFICIAL << ") (from thread " << thread_num << ") " << message);
+        LOG_FATAL(log, "(version {}{}) (from thread {}) {}", VERSION_STRING, VERSION_OFFICIAL, thread_num, message);
    }

    void onFault(
@ -257,9 +257,9 @@ private:
                message << " (no query)";
            else
                message << " (query_id: " << query_id << ")";
-            message << " Received signal " << strsignal(sig) << " (" << sig << ")" << ".";
+            message << " Received signal " << strsignal(sig) << " (" << sig << ").";

-            LOG_FATAL(log, message.rdbuf());
+            LOG_FATAL(log, message.str());
        }

        LOG_FATAL(log, signalToErrorMessage(sig, info, context));
@ -274,7 +274,7 @@ private:
            for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
                bare_stacktrace << ' ' << stack_trace.getFrames()[i];

-            LOG_FATAL(log, bare_stacktrace.rdbuf());
+            LOG_FATAL(log, bare_stacktrace.str());
        }

        /// Write symbolized stack trace line by line for better grep-ability.
@ -302,7 +302,7 @@ static void sanitizerDeathCallback()
            message << " (query_id: " << query_id << ")";
        message << " Sanitizer trap.";

-        LOG_FATAL(log, message.rdbuf());
+        LOG_FATAL(log, message.str());
    }

    /// Just in case print our own stack trace. In case when llvm-symbolizer does not work.
@ -314,7 +314,7 @@ static void sanitizerDeathCallback()
        for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
            bare_stacktrace << ' ' << stack_trace.getFrames()[i];

-        LOG_FATAL(log, bare_stacktrace.rdbuf());
+        LOG_FATAL(log, bare_stacktrace.str());
    }

    /// Write symbolized stack trace line by line for better grep-ability.
@ -379,7 +379,7 @@ static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path
    }
    catch (...)
    {
-        LOG_WARNING(logger, __PRETTY_FUNCTION__ << ": when creating " << path << ", " << DB::getCurrentExceptionMessage(true));
+        LOG_WARNING(logger, "{}: when creating {}, {}", __PRETTY_FUNCTION__, path, DB::getCurrentExceptionMessage(true));
    }
    return false;
 }
@ -498,11 +498,10 @@ void debugIncreaseOOMScore()
    }
    catch (const Poco::Exception & e)
    {
-        LOG_WARNING(&Logger::root(), "Failed to adjust OOM score: '" +
-                    e.displayText() + "'.");
+        LOG_WARNING(&Logger::root(), "Failed to adjust OOM score: '{}'.", e.displayText());
        return;
    }
-    LOG_INFO(&Logger::root(), "Set OOM score adjustment to " + new_score);
+    LOG_INFO(&Logger::root(), "Set OOM score adjustment to {}", new_score);
 }
 #else
 void debugIncreaseOOMScore() {}
@ -734,7 +733,7 @@ void BaseDaemon::handleNotification(Poco::TaskFailedNotification *_tfn)
    task_failed = true;
    Poco::AutoPtr<Poco::TaskFailedNotification> fn(_tfn);
    Logger *lg = &(logger());
-    LOG_ERROR(lg, "Task '" << fn->task()->name() << "' failed. Daemon is shutting down. Reason - " << fn->reason().displayText());
+    LOG_ERROR(lg, "Task '{}' failed. Daemon is shutting down. Reason - {}", fn->task()->name(), fn->reason().displayText());
    ServerApplication::terminate();
 }

@ -850,7 +849,7 @@ void BaseDaemon::handleSignal(int signal_id)
 void BaseDaemon::onInterruptSignals(int signal_id)
 {
    is_cancelled = true;
-    LOG_INFO(&logger(), "Received termination signal (" << strsignal(signal_id) << ")");
+    LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id));

    if (sigint_signals_counter >= 2)
    {
--- a/base/daemon/GraphiteWriter.h
+++ b/base/daemon/GraphiteWriter.h
@ -52,8 +52,7 @@ private:
        }
        catch (const Poco::Exception & e)
        {
-            LOG_WARNING(&Poco::Util::Application::instance().logger(),
-                        "Fail to write to Graphite " << host << ":" << port << ". e.what() = " << e.what() << ", e.message() = " << e.message());
+            LOG_WARNING(&Poco::Util::Application::instance().logger(), "Fail to write to Graphite {}:{}. e.what() = {}, e.message() = {}", host, port, e.what(), e.message());
        }
    }

--- a/base/ext/chrono_io.h
+++ b/base/ext/chrono_io.h
@ -4,6 +4,7 @@
 #include <ctime>
 #include <string>
 #include <iomanip>
+#include <sstream>


 namespace ext
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@ -162,4 +162,10 @@ elseif (COMPILER_GCC)
    add_cxx_compile_options(-Wunused)
    # Warn if vector operation is not implemented via SIMD capabilities of the architecture
    add_cxx_compile_options(-Wvector-operation-performance)
+
+    # XXX: gcc10 stuck with this option while compiling GatherUtils code
+    # (anyway there are builds with clang, that will warn)
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
+        add_cxx_compile_options(-Wno-sequence-point)
+    endif()
 endif ()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -317,3 +317,5 @@ endif()
 if (USE_FASTOPS)
    add_subdirectory (fastops-cmake)
 endif()
+
+add_subdirectory (fmtlib-cmake)
--- a/contrib/fmtlib
+++ b/contrib/fmtlib
@ -0,0 +1 @@
+Subproject commit 297c3b2ed551a4989826fc8c4780bf533e964bd9
--- a/contrib/fmtlib-cmake/CMakeLists.txt
+++ b/contrib/fmtlib-cmake/CMakeLists.txt
@ -0,0 +1,20 @@
+set (SRCS
+    ../fmtlib/src/format.cc
+    ../fmtlib/src/os.cc
+
+    ../fmtlib/include/fmt/chrono.h
+    ../fmtlib/include/fmt/color.h
+    ../fmtlib/include/fmt/compile.h
+    ../fmtlib/include/fmt/core.h
+    ../fmtlib/include/fmt/format.h
+    ../fmtlib/include/fmt/format-inl.h
+    ../fmtlib/include/fmt/locale.h
+    ../fmtlib/include/fmt/os.h
+    ../fmtlib/include/fmt/ostream.h
+    ../fmtlib/include/fmt/posix.h
+    ../fmtlib/include/fmt/printf.h
+    ../fmtlib/include/fmt/ranges.h
+)
+
+add_library(fmt ${SRCS})
+target_include_directories(fmt SYSTEM PUBLIC ../fmtlib/include)
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@ -9,6 +9,17 @@ if (ENABLE_JEMALLOC)
    option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED})

    if (USE_INTERNAL_JEMALLOC)
+        # ThreadPool select job randomly, and there can be some threads that had been
+        # performed some memory heavy task before and will be inactive for some time,
+        # but until it will became active again, the memory will not be freed since by
+        # default each thread has it's own arena, but there should be not more then
+        # 4*CPU arenas (see opt.nareans description).
+        #
+        # By enabling percpu_arena number of arenas limited to number of CPUs and hence
+        # this problem should go away.
+        set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu" CACHE STRING "Change default configuration string of JEMalloc" )
+        message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}")
+
        set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc")

        set (SRCS
@ -52,11 +63,19 @@ if (ENABLE_JEMALLOC)
        add_library(jemalloc ${SRCS})
        target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
        target_include_directories(jemalloc SYSTEM PUBLIC include)
+
+        set(JEMALLOC_INCLUDE)
        if (ARCH_AMD64)
-            target_include_directories(jemalloc SYSTEM PUBLIC include_linux_x86_64)
+            set(JEMALLOC_INCLUDE_PREFIX include_linux_x86_64)
        elseif (ARCH_ARM)
-            target_include_directories(jemalloc SYSTEM PUBLIC include_linux_aarch64)
+            set(JEMALLOC_INCLUDE_PREFIX include_linux_aarch64)
        endif ()
+        target_include_directories(jemalloc SYSTEM PUBLIC
+            ${JEMALLOC_INCLUDE_PREFIX})
+        configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
+            ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h)
+        target_include_directories(jemalloc SYSTEM PRIVATE
+            ${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal)

        target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)

--- a/contrib/jemalloc-cmake/include_linux_aarch64/README
+++ b/contrib/jemalloc-cmake/include_linux_aarch64/README
@ -5,3 +5,4 @@ Added #define GNU_SOURCE
 Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not.
 Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard.
 Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5.
+Added JEMALLOC_CONFIG_MALLOC_CONF substitution
--- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -369,7 +369,7 @@
 /* #undef JEMALLOC_EXPORT */

 /* config.malloc_conf options string. */
-#define JEMALLOC_CONFIG_MALLOC_CONF ""
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"

 /* If defined, jemalloc takes the malloc/free/etc. symbol names. */
 #define JEMALLOC_IS_MALLOC 1
--- a/contrib/jemalloc-cmake/include_linux_x86_64/README
+++ b/contrib/jemalloc-cmake/include_linux_x86_64/README
@ -5,3 +5,4 @@ Added #define GNU_SOURCE
 Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not.
 Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard.
 Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5.
+Added JEMALLOC_CONFIG_MALLOC_CONF substitution
--- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -360,7 +360,7 @@
 /* #undef JEMALLOC_EXPORT */

 /* config.malloc_conf options string. */
-#define JEMALLOC_CONFIG_MALLOC_CONF ""
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"

 /* If defined, jemalloc takes the malloc/free/etc. symbol names. */
 #define JEMALLOC_IS_MALLOC 1
--- a/debian/rules
+++ b/debian/rules
@ -24,6 +24,10 @@ DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT)

 ifndef ENABLE_TESTS
    CMAKE_FLAGS += -DENABLE_TESTS=0
+else
+# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI
+    DEB_BUILD_OPTIONS+= nocheck
+    DEB_BUILD_OPTIONS+= nostrip
 endif

 ifndef MAKE_TARGET
@ -88,14 +92,19 @@ override_dh_auto_build:
 	$(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)

 override_dh_auto_test:
+ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
 	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server
+endif

 override_dh_clean:
 	rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs
-	dh_clean -X contrib
+	dh_clean # -X contrib

 override_dh_strip:
+#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options
+ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS)))
 	dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg
+endif

 override_dh_install:
 	# Making docs
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -5,6 +5,7 @@ RUN apt-get --allow-unauthenticated update -y && apt-get install --yes wget gnup
 RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
 RUN echo "deb [trusted=yes] http://apt.llvm.org/eoan/ llvm-toolchain-eoan-10 main" >> /etc/apt/sources.list

+
 RUN apt-get --allow-unauthenticated update -y \
    && env DEBIAN_FRONTEND=noninteractive \
        apt-get --allow-unauthenticated install --yes --no-install-recommends \
@ -17,6 +18,14 @@ RUN apt-get --allow-unauthenticated update -y \
            apt-transport-https \
            ca-certificates

+# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
+# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
+# Significantly increase deb packaging speed and compatible with old systems
+RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
+RUN chmod +x dpkg-deb
+RUN cp dpkg-deb /usr/bin
+
+
 # Libraries from OS are only needed to test the "unbundled" build (that is not used in production).
 RUN apt-get --allow-unauthenticated update -y \
    && env DEBIAN_FRONTEND=noninteractive \
@ -74,12 +83,6 @@ RUN apt-get --allow-unauthenticated update -y \
            libldap2-dev


-# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
-# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
-# Significantly increase deb packaging speed and compatible with old systems
-RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
-RUN chmod +x dpkg-deb
-RUN cp dpkg-deb /usr/bin

 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-10 /usr/bin/ld.lld
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@ -10,5 +10,16 @@ mv *.changes /output
 mv *.buildinfo /output
 mv /*.rpm /output ||: # if exists
 mv /*.tgz /output ||: # if exists
+
+if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;}
+then
+  echo Place $BINARY_OUTPUT to output
+  mkdir /output/binary ||: # if exists
+  mv /build/obj-*/programs/clickhouse* /output/binary
+  if [ "$BINARY_OUTPUT" = "tests" ]
+  then
+    mv /build/obj-*/src/unit_tests_dbms /output/binary
+  fi
+fi
 ccache --show-stats ||:
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -54,7 +54,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache

    subprocess.check_call(cmd, shell=True)

-def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage):
+def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
    CLANG_PREFIX = "clang"
    DARWIN_SUFFIX = "-darwin"
    ARM_SUFFIX = "-aarch64"
@ -131,6 +131,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
    if alien_pkgs:
        result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'")

+    if with_binaries == "programs":
+        result.append('BINARY_OUTPUT=programs')
+    elif with_binaries == "tests":
+        result.append('ENABLE_TESTS=1')
+        result.append('BINARY_OUTPUT=tests')
+        cmake_flags.append('-DENABLE_TESTS=1')
+        cmake_flags.append('-DUSE_GTEST=1')
+
    if unbundled:
        # TODO: fix build with ENABLE_RDKAFKA
        cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0')
@ -179,6 +187,7 @@ if __name__ == "__main__":
    parser.add_argument("--official", action="store_true")
    parser.add_argument("--alien-pkgs", nargs='+', default=[])
    parser.add_argument("--with-coverage", action="store_true")
+    parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="")

    args = parser.parse_args()
    if not os.path.isabs(args.output_dir):
@ -195,6 +204,12 @@ if __name__ == "__main__":
    if args.alien_pkgs and not image_type == "deb":
        raise Exception("Can add alien packages only in deb build")

+    if args.with_binaries != "" and not image_type == "deb":
+        raise Exception("Can add additional binaries only in deb build")
+
+    if args.with_binaries != "" and image_type == "deb":
+        logging.info("Should place {} to output".format(args.with_binaries))
+
    dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile")
    if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image:
        if not pull_image(image_name) or args.force_build_image:
@ -202,6 +217,6 @@ if __name__ == "__main__":
    env_prepared = parse_env_variables(
        args.build_type, args.compiler, args.sanitizer, args.package_type, image_type,
        args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy,
-        args.version, args.author, args.official, args.alien_pkgs, args.with_coverage)
+        args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries)
    run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir)
    logging.info("Output placed into {}".format(args.output_dir))
--- a/docker/test/integration/compose/docker_compose_hdfs.yml
+++ b/docker/test/integration/compose/docker_compose_hdfs.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'
 services:
    hdfs1:
        image: sequenceiq/hadoop-docker:2.7.0
--- a/docker/test/integration/compose/docker_compose_kafka.yml
+++ b/docker/test/integration/compose/docker_compose_kafka.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'

 services:
  kafka_zookeeper:
--- a/docker/test/integration/compose/docker_compose_minio.yml
+++ b/docker/test/integration/compose/docker_compose_minio.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'

 services:
  minio1:
--- a/docker/test/integration/compose/docker_compose_mongo.yml
+++ b/docker/test/integration/compose/docker_compose_mongo.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'
 services:
    mongo1:
        image: mongo:3.6
@ -8,3 +8,4 @@ services:
            MONGO_INITDB_ROOT_PASSWORD: clickhouse
        ports:
          - 27018:27017
+        command: --profile=2 --verbose
--- a/docker/test/integration/compose/docker_compose_mysql.yml
+++ b/docker/test/integration/compose/docker_compose_mysql.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'
 services:
    mysql1:
        image: mysql:5.7
--- a/docker/test/integration/compose/docker_compose_net.yml
+++ b/docker/test/integration/compose/docker_compose_net.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'
 networks:
  default:
    driver: bridge
--- a/docker/test/integration/compose/docker_compose_postgres.yml
+++ b/docker/test/integration/compose/docker_compose_postgres.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'
 services:
    postgres1:
        image: postgres
--- a/docker/test/integration/compose/docker_compose_redis.yml
+++ b/docker/test/integration/compose/docker_compose_redis.yml
@ -1,4 +1,4 @@
-version: '2.2'
+version: '2.3'
 services:
    redis1:
        image: redis
--- a/docker/test/integration/compose/docker_compose_zookeeper.yml
+++ b/docker/test/integration/compose/docker_compose_zookeeper.yml
@ -1,25 +1,47 @@
-version: '2.2'
+version: '2.3'
 services:
    zoo1:
        image: zookeeper:3.4.12
        restart: always
        environment:
            ZOO_TICK_TIME: 500
-            ZOO_MY_ID: 1
            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
-
+            ZOO_MY_ID: 1
+            JVMFLAGS: -Dzookeeper.forceSync=no
+        volumes:
+            - type: ${ZK_FS:-tmpfs}
+              source: ${ZK_DATA1:-}
+              target: /data
+            - type: ${ZK_FS:-tmpfs}
+              source: ${ZK_DATA_LOG1:-}
+              target: /datalog
    zoo2:
        image: zookeeper:3.4.12
        restart: always
        environment:
            ZOO_TICK_TIME: 500
-            ZOO_MY_ID: 2
            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
-
+            ZOO_MY_ID: 2
+            JVMFLAGS: -Dzookeeper.forceSync=no
+        volumes:
+            - type: ${ZK_FS:-tmpfs}
+              source: ${ZK_DATA2:-}
+              target: /data
+            - type: ${ZK_FS:-tmpfs}
+              source: ${ZK_DATA_LOG2:-}
+              target: /datalog
    zoo3:
        image: zookeeper:3.4.12
        restart: always
        environment:
            ZOO_TICK_TIME: 500
-            ZOO_MY_ID: 3
            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
+            ZOO_MY_ID: 3
+            JVMFLAGS: -Dzookeeper.forceSync=no
+        volumes:
+            - type: ${ZK_FS:-tmpfs}
+              source: ${ZK_DATA3:-}
+              target: /data
+            - type: ${ZK_FS:-tmpfs}
+              source: ${ZK_DATA_LOG3:-}
+              target: /datalog
--- a/docker/test/performance-comparison/performance_comparison.md
+++ b/docker/test/performance-comparison/performance_comparison.md
@ -76,6 +76,14 @@ directory.
 More complex setup is possible, but inconvenient and requires some scripting.
 See `manual-run.sh` for inspiration.

+#### Compare two published releases
+Use `compare-releases.sh`. It will download and extract static + dbg + test
+packages for both releases, and then call the main comparison script
+`compare.sh`, starting from `configure` stage.
+```
+compare-releaseses.sh 19.16.19.85 20.4.2.9
+```
+

 #### Statistical considerations
 Generating randomization distribution for medians is tricky. Suppose we have N
--- a/docker/test/performance-comparison/compare-releases.sh
+++ b/docker/test/performance-comparison/compare-releases.sh
@ -0,0 +1,82 @@
+#!/bin/bash
+set -ex
+set -o pipefail
+trap "exit" INT TERM
+trap 'kill $(jobs -pr) ||:' EXIT
+
+left_version=${1}
+right_version=${2}
+
+if [ "$left_version" == "" ] || [ "$right_version" == "" ]
+then
+    >&2 echo Usage: $(basename "$0") left_version right_version
+    exit 1
+fi
+
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+repo_dir=${repo_dir:-$(readlink -f "$script_dir/../../..")}
+
+function download_package() # (version, path)
+{
+    version="$1"
+    path="$2"
+    cd "$path"
+    wget -nv -nd -nc "https://repo.clickhouse.tech/deb/stable/main/clickhouse-common-static-dbg_${version}_amd64.deb" ||:
+    wget -nv -nd -nc "https://repo.clickhouse.tech/deb/stable/main/clickhouse-common-static_${version}_amd64.deb" ||:
+    wget -nv -nd -nc "https://repo.clickhouse.tech/deb/stable/main/clickhouse-test_${version}_all.deb" ||:
+    mkdir tmp ||:
+    for x in *.deb; do dpkg-deb -x "$x" tmp ; done
+    mv tmp/usr/bin/clickhouse ./clickhouse
+    mkdir .debug
+    mv tmp/usr/lib/debug/usr/bin/clickhouse .debug/clickhouse
+    mv tmp/usr/share/clickhouse-test/performance .
+    ln -s clickhouse clickhouse-local
+    ln -s clickhouse clickhouse-client
+    ln -s clickhouse clickhouse-server
+    rm -rf tmp
+}
+
+function download
+{
+    rm -r left right db0 ||:
+    mkdir left right db0 ||:
+
+    "$script_dir/download.sh" ||: &
+
+    download_package "$left_version" left &
+    download_package "$right_version" right &
+
+    wait
+
+    rm -rf {right,left}/tmp
+}
+
+function configure
+{
+    # Configs
+    cp -av "$script_dir/config" right
+    cp -av "$script_dir/config" left
+    cp -av "$repo_dir"/programs/server/config* right/config
+    cp -av "$repo_dir"/programs/server/user* right/config
+    cp -av "$repo_dir"/programs/server/config* left/config
+    cp -av "$repo_dir"/programs/server/user* left/config
+}
+
+function run
+{
+    left/clickhouse-local --query "select * from system.build_options format PrettySpace" | sed 's/ *$//' | fold -w 80 -s > left-commit.txt
+    right/clickhouse-local --query "select * from system.build_options format PrettySpace" | sed 's/ *$//' | fold -w 80 -s > right-commit.txt
+
+    PATH=right:"$PATH" \
+        CHPC_TEST_PATH=right/performance \
+        stage=configure \
+        "$script_dir/compare.sh" &> >(tee compare.log)
+}
+
+download
+configure
+run
+
+rm output.7z
+7z a output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs}
+
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -133,7 +133,7 @@ function run_tests
    fi

    # Delete old report files.
-    for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv
+    for x in {test-times,wall-clock-times}.tsv
    do
        rm -v "$x" ||:
        touch "$x"
@ -155,9 +155,6 @@ function run_tests
        TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
        # the grep is to filter out set -x output and keep only time output
        { time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; } 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" || continue
-
-        # The test completed with zero status, so we treat stderr as warnings
-        mv "$test_name-err.log" "$test_name-warn.log"
    done

    unset TIMEFORMAT
@ -217,69 +214,137 @@ function get_profiles
    clickhouse-client --port 9002 --query "select 1"
 }

+function build_log_column_definitions
+{
+# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
+# absolutely atrocious way. This should be done by the file() function itself.
+for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
+do
+    paste -d' ' \
+        <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
+        <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
+        | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
+done
+}
+
 # Build and analyze randomization distribution for all queries.
 function analyze_queries
 {
-rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv client-times.tsv report-thresholds.tsv ||:
+rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||:
+rm -rf analyze ||:
+mkdir analyze analyze/tmp ||:
+
+build_log_column_definitions

 # Split the raw test output into files suitable for analysis.
 IFS=$'\n'
 for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print)
 do
    test_name=$(basename "$test_file" "-raw.tsv")
-    sed -n "s/^query\t//p" < "$test_file" > "$test_name-queries.tsv"
-    sed -n "s/^client-time/$test_name/p" < "$test_file" >> "client-times.tsv"
-    sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "report-thresholds.tsv"
-    sed -n "s/^skipped/$test_name/p" < "$test_file" >> "skipped-tests.tsv"
+    sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
+    sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv"
+    sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv"
+    sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv"
+    sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv"
 done
 unset IFS

+# for each query run, prepare array of metrics from query log
+clickhouse-local --query "
+create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
+    'test text, query_index int, query_id text, version UInt8, time float');
+
+create view left_query_log as select *
+    from file('left-query-log.tsv', TSVWithNamesAndTypes,
+        '$(cat "left-query-log.tsv.columns")');
+
+create view right_query_log as select *
+    from file('right-query-log.tsv', TSVWithNamesAndTypes,
+        '$(cat "right-query-log.tsv.columns")');
+
+create table query_metrics engine File(TSV, -- do not add header -- will parse with grep
+        'analyze/query-run-metrics.tsv')
+    as select
+        test, query_index, 0 run, version,
+        [
+            -- server-reported time
+            query_duration_ms / toFloat64(1000)
+            , toFloat64(memory_usage)
+            -- client-reported time
+            , query_runs.time
+        ] metrics
+    from (
+        select query_duration_ms, memory_usage, query_id, 0 version from left_query_log
+        union all
+        select query_duration_ms, memory_usage, query_id, 1 version from right_query_log
+    ) query_logs
+    right join query_runs
+    using (query_id, version)
+    order by test, query_index
+    ;
+"
+
 # This is a lateral join in bash... please forgive me.
-# We don't have arrayPermute(), so I have to make random permutations with 
+# We don't have arrayPermute(), so I have to make random permutations with
 # `order by rand`, and it becomes really slow if I do it for more than one
 # query. We also don't have lateral joins. So I just put all runs of each
 # query into a separate file, and then compute randomization distribution
 # for each file. I do this in parallel using GNU parallel.
+query_index=1
 IFS=$'\n'
-for test_file in $(find . -maxdepth 1 -name "*-queries.tsv" -print)
+for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
 do
-    test_name=$(basename "$test_file" "-queries.tsv")
-    query_index=1
-    for query in $(cut -d'	' -f1 "$test_file" | sort | uniq)
-    do
-        query_prefix="$test_name.q$query_index"
-        query_index=$((query_index + 1))
-        grep -F "$query	" "$test_file" > "$query_prefix.tmp"
-        printf "%s\0\n" \
-            "clickhouse-local \
-                --file \"$query_prefix.tmp\" \
-                --structure 'query text, run int, version UInt32, time float' \
-                --query \"$(cat "$script_dir/eqmed.sql")\" \
-                >> \"$test_name-report.tsv\"" \
-                2>> analyze-errors.log \
-            >> analyze-commands.txt
-    done
+    file="analyze/tmp/$(echo "$prefix" | sed 's/\t/_/g').tsv"
+    grep "^$prefix	" "analyze/query-run-metrics.tsv" > "$file" &
+    printf "%s\0\n" \
+        "clickhouse-local \
+            --file \"$file\" \
+            --structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
+            --query \"$(cat "$script_dir/eqmed.sql")\" \
+            >> \"analyze/query-reports.tsv\"" \
+            2>> analyze/errors.log \
+        >> analyze/commands.txt
 done
 wait
 unset IFS

-parallel --null < analyze-commands.txt
+parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
 }

 # Analyze results
 function report
 {
-
 rm -r report ||:
-mkdir report ||:
-
+mkdir report report/tmp ||:

 rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:

-cat analyze-errors.log >> report/errors.log ||:
+build_log_column_definitions
+
+cat analyze/errors.log >> report/errors.log ||:
 cat profile-errors.log >> report/errors.log ||:

 clickhouse-local --query "
+create view query_display_names as select * from
+    file('analyze/query-display-names.tsv', TSV,
+        'test text, query_index int, query_display_name text')
+    ;
+
+create table query_metric_stats engine File(TSVWithNamesAndTypes,
+        'report/query-metric-stats.tsv') as
+    select metric_name, left, right, diff, stat_threshold, test, query_index,
+        query_display_name
+    from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
+        right Array(float), diff Array(float), stat_threshold Array(float),
+        test text, query_index int') reports
+    left array join ['server_time', 'memory', 'client_time'] as metric_name,
+        left, right, diff, stat_threshold
+    left join query_display_names
+        on reports.test = query_display_names.test
+            and reports.query_index = query_display_names.query_index
+    ;
+
+-- Main statistics for queries -- query time as reported in query log.
 create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
    as select
        -- FIXME Comparison mode doesn't make sense for queries that complete
@ -296,53 +361,65 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
        
        left, right, diff, stat_threshold,
        if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
-        reports.test,
-        query
-    from
-        (
-            select *,
-                replaceAll(_file, '-report.tsv', '') test
-            from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text')
-        ) reports
-        left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds
-        using test
-        ;
+        test, query_index, query_display_name
+    from query_metric_stats
+    left join file('analyze/report-thresholds.tsv', TSV,
+            'test text, report_threshold float') thresholds
+        on query_metric_stats.test = thresholds.test
+    where metric_name = 'server_time'
+    order by test, query_index, metric_name
+    ;

 -- keep the table in old format so that we can analyze new and old data together
 create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
-    as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query
+    as select short, changed_fail, unstable_fail, left, right, diff,
+        stat_threshold, test, query_display_name query
    from queries
    ;

 -- save all test runs as JSON for the new comparison page
-create table all_query_funs_json engine File(JSON, 'report/all-query-runs.json') as
-    select test, query, versions_runs[1] runs_left, versions_runs[2] runs_right
+create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
+    select test, query_index, query_display_name query,
+        left, right, diff, stat_threshold, report_threshold,
+        versions_runs[1] runs_left, versions_runs[2] runs_right
    from (
        select
-            test, query,
+            test, query_index,
            groupArrayInsertAt(runs, version) versions_runs
        from (
            select
-                replaceAll(_file, '-queries.tsv', '') test,
-                query, version,
-                groupArray(time) runs
-            from file('*-queries.tsv', TSV, 'query text, run int, version UInt32, time float')
-            group by test, query, version
+                test, query_index, version,
+                groupArray(metrics[1]) runs
+            from file('analyze/query-run-metrics.tsv', TSV,
+                'test text, query_index int, run int, version UInt8, metrics Array(float)')
+            group by test, query_index, version
        )
-        group by test, query
-    )
+        group by test, query_index
+    ) runs
+    left join query_display_names
+        on runs.test = query_display_names.test
+            and runs.query_index = query_display_names.query_index
+    left join file('analyze/report-thresholds.tsv',
+            TSV, 'test text, report_threshold float') thresholds
+        on runs.test = thresholds.test
+    left join query_metric_stats
+        on runs.test = query_metric_stats.test
+            and runs.query_index = query_metric_stats.query_index
+    where
+        query_metric_stats.metric_name = 'server_time'
    ;

 create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
-    select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show
-    order by abs(diff) desc;
+    select left, right, diff, stat_threshold, changed_fail, test, query_display_name
+    from queries where changed_show order by abs(diff) desc;

 create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
-    select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show
-    order by stat_threshold desc;
+    select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
+    from queries where unstable_show order by stat_threshold desc;

-create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as
-    select query, test from queries where unstable_show or changed_show
+create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
+        'report/queries-for-flamegraph.tsv') as
+    select test, query_index from queries where unstable_show or changed_show
    ;

 create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
@ -350,23 +427,23 @@ create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
    group by test having s > 0 order by s desc;

 create table query_time engine Memory as select *
-    from file('client-times.tsv', TSV, 'test text, query text, client float, server float');
+    from file('analyze/client-times.tsv', TSV,
+        'test text, query_index int, client float, server float');

 create table wall_clock engine Memory as select *
    from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');

 create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
-    select client, server, floor(client/server, 3) p, query
-    from query_time where p > 1.02 order by p desc;
+    select client, server, floor(client/server, 3) p, query_display_name
+    from query_time left join query_display_names using (test, query_index)
+    where p > 1.02 order by p desc;

 create table test_time engine Memory as
    select test, sum(client) total_client_time,
        maxIf(client, not short) query_max,
        minIf(client, not short) query_min,
-        count(*) queries,
-        sum(short) short_queries
-    from query_time full join queries
-    using test, query
+        count(*) queries, sum(short) short_queries
+    from query_time full join queries using (test, query_index)
    group by test;

 create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
@ -378,144 +455,207 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
        floor(real / queries, 3) avg_real_per_query,
        floor(query_min, 3)
    from test_time
-        -- wall clock times are also measured for skipped tests, so don't
-        -- do full join
-        left join wall_clock using test
+    -- wall clock times are also measured for skipped tests, so don't
+    -- do full join
+    left join wall_clock using test
    order by avg_real_per_query desc;

+-- report for all queries page, only main metric
 create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
    select changed_fail, unstable_fail,
        left, right, diff,
        floor(left > right ? left / right : right / left, 3),
-        stat_threshold, test, query
-    from queries order by test, query;
+        stat_threshold, test, query_display_name
+    from queries order by test, query_display_name;
+
+-- new report for all queries with all metrics (no page yet)
+create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
+    select metric_name, left, right, diff,
+        floor(left > right ? left / right : right / left, 3),
+        stat_threshold, test, query_index, query_display_name
+    from query_metric_stats
+    order by test, query_index;
 " 2> >(tee -a report/errors.log 1>&2)

-for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
-do
-    # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
-    # absolutely atrocious way. This should be done by the file() function itself.
-    paste -d' ' \
-        <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
-        <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
-        | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
-done

+# Prepare source data for metrics and flamegraphs for unstable queries.
 for version in {right,left}
 do
-clickhouse-local --query "
+    rm -rf data
+    clickhouse-local --query "
 create view queries_for_flamegraph as
    select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
-        'query text, test text');
+        'test text, query_index int');
+
+create view query_runs as
+    with 0 as left, 1 as right
+    select * from file('analyze/query-runs.tsv', TSV,
+        'test text, query_index int, query_id text, version UInt8, time float')
+    where version = $version
+    ;
+
+create view query_display_names as select * from
+    file('analyze/query-display-names.tsv', TSV,
+        'test text, query_index int, query_display_name text')
+    ;
+
+create table unstable_query_runs engine File(TSVWithNamesAndTypes,
+        'unstable-query-runs.$version.rep') as
+    select test, query_index, query_display_name, query_id
+    from query_runs
+    join queries_for_flamegraph on
+        query_runs.test = queries_for_flamegraph.test
+        and query_runs.query_index = queries_for_flamegraph.query_index
+    left join query_display_names on
+        query_runs.test = query_display_names.test
+        and query_runs.query_index = query_display_names.query_index
+    ;

 create view query_log as select *
    from file('$version-query-log.tsv', TSVWithNamesAndTypes,
        '$(cat "$version-query-log.tsv.columns")');

+create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
+        'unstable-run-metrics.$version.rep') as
+    select
+        test, query_index, query_id,
+        ProfileEvents.Values value, ProfileEvents.Names metric
+    from query_log array join ProfileEvents
+    join unstable_query_runs using (query_id)
+    ;
+
+create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
+        'unstable-run-metrics-2.$version.rep') as
+    select
+        test, query_index, query_id,
+        v, n
+    from (
+        select
+            test, query_index, query_id,
+            ['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
+            [memory_usage, read_bytes, written_bytes, query_duration_ms] v
+        from query_log
+        join unstable_query_runs using (query_id)
+    )
+    array join v, n;
+
 create view trace_log as select *
    from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
        '$(cat "$version-trace-log.tsv.columns")');

-create view addresses_src as select *
+create view addresses_src as select addr,
+        -- Some functions change name between builds, e.g. '__clone' or 'clone' or
+        -- even '__GI__clone@@GLIBC_2.32'. This breaks differential flame graphs, so
+        -- filter them out here.
+        [name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)']
+            -- this line is a subscript operator of the above array
+            [1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name
    from file('$version-addresses.tsv', TSVWithNamesAndTypes,
        '$(cat "$version-addresses.tsv.columns")');

 create table addresses_join_$version engine Join(any, left, address) as
    select addr address, name from addresses_src;

-create table unstable_query_runs engine File(TSVWithNamesAndTypes,
-        'unstable-query-runs.$version.rep') as
-    select query, query_id from query_log
-    where query in (select query from queries_for_flamegraph)
-        and query_id not like 'prewarm %'
-    ;
-
-create table unstable_query_log engine File(Vertical,
-        'unstable-query-log.$version.rep') as
-    select * from query_log
-    where query_id in (select query_id from unstable_query_runs);
-
-create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
-        'unstable-run-metrics.$version.rep') as
-    select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
-    from query_log array join ProfileEvents
-    where query_id in (select query_id from unstable_query_runs)
-    ;
-
-create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
-        'unstable-run-metrics-2.$version.rep') as
-    select v, n, query_id, query
-    from
-        (select
-            ['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
-            [memory_usage, read_bytes, written_bytes, query_duration_ms] v,
-            query,
-            query_id
-        from query_log
-        where query_id in (select query_id from unstable_query_runs))
-    array join n, v;
-
 create table unstable_run_traces engine File(TSVWithNamesAndTypes,
        'unstable-run-traces.$version.rep') as
    select
+        test, query_index, query_id,
        count() value,
-        joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric,
-        unstable_query_runs.query_id,
-        any(unstable_query_runs.query) query
-    from unstable_query_runs
-    join trace_log on trace_log.query_id = unstable_query_runs.query_id
-    group by unstable_query_runs.query_id, metric
+        joinGet(addresses_join_$version, 'name', arrayJoin(trace))
+            || '(' || toString(trace_type) || ')' metric
+    from trace_log
+    join unstable_query_runs using query_id
+    group by test, query_index, query_id, metric
    order by count() desc
    ;

 create table metric_devation engine File(TSVWithNamesAndTypes,
-        'metric-deviation.$version.rep') as
-    select query, floor((q[3] - q[1])/q[2], 3) d,
-        quantilesExact(0, 0.5, 1)(value) q, metric
-    from (select * from unstable_run_metrics
-        union all select * from unstable_run_traces
-        union all select * from unstable_run_metrics_2) mm
-    join queries_for_flamegraph using query
-    group by query, metric
-    having d > 0.5
-    order by query desc, d desc
+        'report/metric-deviation.$version.tsv') as
+    -- first goes the key used to split the file with grep
+    select test, query_index, query_display_name,
+        d, q, metric
+    from (
+        select
+            test, query_index,
+            floor((q[3] - q[1])/q[2], 3) d,
+            quantilesExact(0, 0.5, 1)(value) q, metric
+        from (select * from unstable_run_metrics
+            union all select * from unstable_run_traces
+            union all select * from unstable_run_metrics_2) mm
+        group by test, query_index, metric
+        having d > 0.5
+    ) metrics
+    left join query_display_names using (test, query_index)
+    order by test, query_index, d desc
    ;

-create table stacks engine File(TSV, 'stacks.$version.rep') as
+create table stacks engine File(TSV, 'report/stacks.$version.tsv') as
    select
-        query,
+        -- first goes the key used to split the file with grep
+        test, query_index, trace_type, any(query_display_name),
+        -- next go the stacks in flamegraph format: 'func1;...;funcN count'
        arrayStringConcat(
-            arrayMap(x -> joinGet(addresses_join_$version, 'name', x),
+            arrayMap(
+                addr -> joinGet(addresses_join_$version, 'name', addr),
                arrayReverse(trace)
            ),
            ';'
        ) readable_trace,
-        count()
+        count() c
    from trace_log
    join unstable_query_runs using query_id
-    group by query, trace
+    group by test, query_index, trace_type, trace
+    order by test, query_index, trace_type, trace
    ;
 " 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
 done
 wait

+# Create per-query flamegraphs
 IFS=$'\n'
 for version in {right,left}
 do
-    for query in $(cut -d'	' -f1 "stacks.$version.rep" | sort | uniq)
+    for query in $(cut -d'	' -f1-4 "report/stacks.$version.tsv" | sort | uniq)
    do
-        query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
+        query_file=$(echo "$query" | cut -c-120 | sed 's/[/	]/_/g')
+        echo "$query_file" >> report/query-files.txt

        # Build separate .svg flamegraph for each query.
-        grep -F "$query	" "stacks.$version.rep" \
-            | cut -d'	' -f 2- \
+        # -F is somewhat unsafe because it might match not the beginning of the
+        # string, but this is unlikely and escaping the query for grep is a pain.
+        grep -F "$query	" "report/stacks.$version.tsv" \
+            | cut -f 5- \
            | sed 's/\t/ /g' \
-            | tee "$query_file.stacks.$version.rep" \
-            | ~/fg/flamegraph.pl > "$query_file.$version.svg" &
+            | tee "report/tmp/$query_file.stacks.$version.tsv" \
+            | ~/fg/flamegraph.pl --hash > "$query_file.$version.svg" &
+    done
+done
+wait
+unset IFS

-        # Copy metric stats into separate files as well.
-        grep -F "$query	" "metric-deviation.$version.rep" \
-            | cut -f2- > "$query_file.$version.metrics.rep" &
+# Create differential flamegraphs.
+IFS=$'\n'
+for query_file in $(cat report/query-files.txt)
+do
+    ~/fg/difffolded.pl "report/tmp/$query_file.stacks.left.tsv" \
+            "report/tmp/$query_file.stacks.right.tsv" \
+        | tee "report/tmp/$query_file.stacks.diff.tsv" \
+        | ~/fg/flamegraph.pl > "$query_file.diff.svg" &
+done
+unset IFS
+wait
+
+# Create per-query files with metrics. Note that the key is different from flamegraphs.
+IFS=$'\n'
+for version in {right,left}
+do
+    for query in $(cut -d'	' -f1-3 "report/metric-deviation.$version.tsv" | sort | uniq)
+    do
+        query_file=$(echo "$query" | cut -c-120 | sed 's/[/	]/_/g')
+
+        # Ditto the above comment about -F.
+        grep -F "$query	" "report/metric-deviation.$version.tsv" \
+            | cut -f4- > "$query_file.$version.metrics.rep" &
    done
 done
 wait
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -46,7 +46,13 @@ function download
    done

    mkdir ~/fg ||:
-    cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl &
+    (
+        cd ~/fg
+        wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl"
+        wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/difffolded.pl"
+        chmod +x ~/fg/difffolded.pl
+        chmod +x ~/fg/flamegraph.pl
+    ) &

    wait
 }
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -81,8 +81,13 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
    fi
 ) | tee right-commit.txt

-# Prepare the list of changed tests for use by compare.sh
-git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST"~ master)" -- tests/performance | tee changed-tests.txt
+if [ "$PR_TO_TEST" != "0" ]
+then
+    # Prepare the list of tests changed in the PR for use by compare.sh. Compare to
+    # merge base, because master might be far in the future and have unrelated test
+    # changes.
+    git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST" master)" -- tests/performance | tee changed-tests.txt
+fi

 # Set python output encoding so that we can print queries with Russian letters.
 export PYTHONIOENCODING=utf-8
@ -119,5 +124,5 @@ done

 dmesg -T > dmesg.log

-7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} ./report
+7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
 cp compare.log /output
--- a/docker/test/performance-comparison/eqmed.sql
+++ b/docker/test/performance-comparison/eqmed.sql
@ -1,32 +1,37 @@
-- input is table(query text, run UInt32, version int, time float)
+-- input is table(test text, query text, run UInt32, version int, metrics Array(float))
 select
-   floor(original_medians_array.time_by_version[1], 4) l,
-   floor(original_medians_array.time_by_version[2], 4) r,
-   floor((r - l) / l, 3) diff_percent,
-   floor(threshold / l, 3) threshold_percent,
-   query
+   arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
+   arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
+   arrayMap(x, y -> floor((y - x) / x, 3), l, r) diff_percent,
+   arrayMap(x, y -> floor(x / y, 3), threshold, l) threshold_percent,
+   test, query
 from
   (
      -- quantiles of randomization distributions
-      select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
+      select quantileExactForEach(0.999)(
+        arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
+      ) threshold
      ---- uncomment to see what the distribution is really like
-      --, uniqExact(d) u
+      --, uniqExact(d.1) u
      --, arraySort(x->x.1,
      --      arrayZip(
-      --          (sumMap([d], [1]) as f).1,
+      --          (sumMap([d.1], [1]) as f).1,
      --          f.2)) full_histogram
      from
         (
-            select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
+            -- make array 'random label' -> '[median metric]'
+            select virtual_run, groupArrayInsertAt(median_metrics, random_label) metrics_by_label
            from (
-                  select medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label
+                  -- get [median metric] arrays among virtual runs, grouping by random label
+                  select medianExactForEach(metrics) median_metrics, virtual_run, random_label
                  from (
-                        select *, toUInt32(rowNumberInAllBlocks() % 2) random_label -- randomly relabel measurements
+                        -- randomly relabel measurements
+                        select *, toUInt32(rowNumberInAllBlocks() % 2) random_label
                        from (
-                              select time, number virtual_run 
+                              select metrics, number virtual_run
                              from
                                -- strip the query away before the join -- it might be several kB long;
-                                (select time, run, version from table) no_query,
+                                (select metrics, run, version from table) no_query,
                                -- duplicate input measurements into many virtual runs
                                numbers(1, 100000) nn
                              -- for each virtual run, randomly reorder measurements
@ -40,19 +45,19 @@ from
      -- this select aggregates by virtual_run
   ) rd,
   (
-        select groupArrayInsertAt(median_time, version) time_by_version
+        select groupArrayInsertAt(median_metrics, version) medians_by_version
        from
        (
-            select medianExact(time) median_time, version
+            select medianExactForEach(metrics) median_metrics, version
            from table
            group by version
        ) original_medians
   ) original_medians_array,
   (
-        select any(query) query from table
+        select any(test) test, any(query) query from table
   ) any_query,
   (
-       select throwIf(uniq(query) != 1) from table
+       select throwIf(uniq((test, query)) != 1) from table
   ) check_single_query -- this subselect checks that there is only one query in the input table;
                        -- written this way so that it is not optimized away (#10523)
 ;
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -11,6 +11,9 @@ import string
 import time
 import traceback

+def tsv_escape(s):
+    return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
+
 stage_start_seconds = time.perf_counter()

 def report_stage_end(stage_name):
@ -29,6 +32,8 @@ parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS',
 parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.')
 args = parser.parse_args()

+test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
+
 tree = et.parse(args.file[0])
 root = tree.getroot()

@ -110,8 +115,9 @@ for t in tables:
        try:
            res = c.execute("select 1 from {} limit 1".format(t))
        except:
-            print('skipped\t' + traceback.format_exception_only(*sys.exc_info()[:2])[-1])
-            traceback.print_exc()
+            exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
+            skipped_message = ' '.join(exception_message.split('\n')[:2])
+            print(f'skipped\t{tsv_escape(skipped_message)}')
            sys.exit(0)

 report_stage_end('preconditions')
@ -133,27 +139,30 @@ for c in connections:
 report_stage_end('fill')

 # Run test queries
-def tsv_escape(s):
-    return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
-
 test_query_templates = [q.text for q in root.findall('query')]
 test_queries = substitute_parameters(test_query_templates)

 report_stage_end('substitute2')

-for i, q in enumerate(test_queries):
+for query_index, q in enumerate(test_queries):
+    query_prefix = f'{test_name}.query{query_index}'
+
    # We have some crazy long queries (about 100kB), so trim them to a sane
-    # length.
+    # length. This means we can't use query text as an identifier and have to
+    # use the test name + the test-wide query index.
    query_display_name = q
    if len(query_display_name) > 1000:
-        query_display_name = f'{query_display_name[:1000]}...({i})'
+        query_display_name = f'{query_display_name[:1000]}...({query_index})'
+
+    print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')

    # Prewarm: run once on both servers. Helps to bring the data into memory,
    # precompile the queries, etc.
    try:
        for conn_index, c in enumerate(connections):
-            res = c.execute(q, query_id = f'prewarm {0} {query_display_name}')
-            print(f'prewarm\t{tsv_escape(query_display_name)}\t{conn_index}\t{c.last_query.elapsed}')
+            prewarm_id = f'{query_prefix}.prewarm0'
+            res = c.execute(q, query_id = prewarm_id)
+            print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
    except KeyboardInterrupt:
        raise
    except:
@ -172,13 +181,14 @@ for i, q in enumerate(test_queries):
    start_seconds = time.perf_counter()
    server_seconds = 0
    for run in range(0, args.runs):
+        run_id = f'{query_prefix}.run{run}'
        for conn_index, c in enumerate(connections):
-            res = c.execute(q)
-            print(f'query\t{tsv_escape(query_display_name)}\t{run}\t{conn_index}\t{c.last_query.elapsed}')
+            res = c.execute(q, query_id = run_id)
+            print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
            server_seconds += c.last_query.elapsed

    client_seconds = time.perf_counter() - start_seconds
-    print(f'client-time\t{tsv_escape(query_display_name)}\t{client_seconds}\t{server_seconds}')
+    print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')

 report_stage_end('benchmark')

--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -25,6 +25,9 @@ very_unstable_queries = 0
 # max seconds to run one query by itself, not counting preparation
 allowed_single_run_time = 2

+color_bad='#ffb0c0'
+color_good='#b0d050'
+
 header_template = """
 <!DOCTYPE html>
 <html>
@ -179,6 +182,16 @@ if args.report == 'main':

    print_tested_commits()

+    run_error_rows = tsvRows('run-errors.tsv')
+    error_tests += len(run_error_rows)
+    printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
+
+    slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
+    error_tests += len(slow_on_client_rows)
+    printSimpleTable('Slow on client',
+                     ['Client time, s', 'Server time, s', 'Ratio', 'Query'],
+                     slow_on_client_rows)
+
    def print_changes():
        rows = tsvRows('report/changed-perf.tsv')
        if not rows:
@ -188,8 +201,8 @@ if args.report == 'main':

        print(tableStart('Changes in performance'))
        columns = [
-            'Old, s.',                                         # 0
-            'New, s.',                                         # 1
+            'Old, s',                                          # 0
+            'New, s',                                          # 1
            'Relative difference (new&nbsp;&minus;&nbsp;old) / old',   # 2
            'p&nbsp;<&nbsp;0.001 threshold',                   # 3
            # Failed                                           # 4
@ -205,10 +218,10 @@ if args.report == 'main':
            if int(row[4]):
                if float(row[2]) < 0.:
                    faster_queries += 1
-                    attrs[2] = 'style="background: #00ff00"'
+                    attrs[2] = f'style="background: {color_good}"'
                else:
                    slower_queries += 1
-                    attrs[2] = 'style="background: #ff0000"'
+                    attrs[2] = f'style="background: {color_bad}"'
            else:
                attrs[2] = ''

@ -218,12 +231,6 @@ if args.report == 'main':

    print_changes()

-    slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
-    error_tests += len(slow_on_client_rows)
-    printSimpleTable('Slow on client',
-        ['Client time, s.', 'Server time, s.', 'Ratio', 'Query'],
-        slow_on_client_rows)
-
    def print_unstable_queries():
        global unstable_queries
        global very_unstable_queries
@ -252,7 +259,7 @@ if args.report == 'main':
        for r in unstable_rows:
            if int(r[4]):
                very_unstable_queries += 1
-                attrs[3] = 'style="background: #ffb0a0"'
+                attrs[3] = f'style="background: {color_bad}"'
            else:
                attrs[3] = ''

@ -262,11 +269,7 @@ if args.report == 'main':

    print_unstable_queries()

-    run_error_rows = tsvRows('run-errors.tsv')
-    error_tests += len(run_error_rows)
-    printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
-
-    skipped_tests_rows = tsvRows('skipped-tests.tsv')
+    skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
    printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)

    printSimpleTable('Tests with most unstable queries',
@ -281,13 +284,13 @@ if args.report == 'main':

        columns = [
            'Test',                                          #0
-            'Wall clock time, s.',                           #1
-            'Total client time, s.',                         #2
+            'Wall clock time, s',                            #1
+            'Total client time, s',                          #2
            'Total queries',                                 #3
            'Ignored short queries',                         #4
-            'Longest query<br>(sum for all runs), s.',       #5
-            'Avg wall clock time<br>(sum for all runs), s.', #6
-            'Shortest query<br>(sum for all runs), s.',      #7
+            'Longest query<br>(sum for all runs), s',        #5
+            'Avg wall clock time<br>(sum for all runs), s',  #6
+            'Shortest query<br>(sum for all runs), s',       #7
            ]

        print(tableStart('Test times'))
@ -300,13 +303,13 @@ if args.report == 'main':
            if float(r[6]) > 1.5 * total_runs:
                # FIXME should be 15s max -- investigate parallel_insert
                slow_average_tests += 1
-                attrs[6] = 'style="background: #ffb0a0"'
+                attrs[6] = f'style="background: {color_bad}"'
            else:
                attrs[6] = ''

            if float(r[5]) > allowed_single_run_time * total_runs:
                slow_average_tests += 1
-                attrs[5] = 'style="background: #ffb0a0"'
+                attrs[5] = f'style="background: {color_bad}"'
            else:
                attrs[5] = ''

@ -320,9 +323,9 @@ if args.report == 'main':

    print("""
    <p class="links">
-    <a href="output.7z">Test output</a>
    <a href="all-queries.html">All queries</a>
    <a href="compare.log">Log</a>
+    <a href="output.7z">Test output</a>
    </p>
    </body>
    </html>
@ -382,8 +385,8 @@ elif args.report == 'all-queries':
        columns = [
            # Changed #0
            # Unstable #1
-            'Old, s.', #2
-            'New, s.', #3
+            'Old, s', #2
+            'New, s', #3
            'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4
            'Times speedup / slowdown',                 #5
            'p&nbsp;<&nbsp;0.001 threshold',          #6
@ -399,21 +402,21 @@ elif args.report == 'all-queries':
        attrs[1] = None
        for r in rows:
            if int(r[1]):
-                attrs[6] = 'style="background: #ffb0a0"'
+                attrs[6] = f'style="background: {color_bad}"'
            else:
                attrs[6] = ''

            if int(r[0]):
                if float(r[4]) > 0.:
-                    attrs[4] = 'style="background: #ffb0a0"'
+                    attrs[4] = f'style="background: {color_bad}"'
                else:
-                    attrs[4] = 'style="background: #adbdff"'
+                    attrs[4] = f'style="background: {color_good}"'
            else:
                attrs[4] = ''

            if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
-                attrs[2] = 'style="background: #ffb0a0"'
-                attrs[3] = 'style="background: #ffb0a0"'
+                attrs[2] = f'style="background: {color_bad}"'
+                attrs[3] = f'style="background: {color_bad}"'
            else:
                attrs[2] = ''
                attrs[3] = ''
@ -428,9 +431,9 @@ elif args.report == 'all-queries':

    print("""
    <p class="links">
-    <a href="output.7z">Test output</a>
    <a href="report.html">Main report</a>
    <a href="compare.log">Log</a>
+    <a href="output.7z">Test output</a>
    </p>
    </body>
    </html>
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@ -7,7 +7,7 @@ Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.

 # If You Use Windows {#if-you-use-windows}

-If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
+If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.

 # If You Use a 32-bit System {#if-you-use-a-32-bit-system}

--- a/docs/en/engines/index.md
+++ b/docs/en/engines/index.md
@ -1,6 +1,8 @@
 ---
 toc_folder_title: Engines
 toc_priority: 25
+toc_title: hidden
+toc_hidden: true
 ---

 {## [Original article](https://clickhouse.tech/docs/en/engines/) ##}
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@ -72,7 +72,7 @@ Examples:
                            kafka_format = 'JSONEachRow',
                            kafka_num_consumers = 4;

-  CREATE TABLE queue2 (
+  CREATE TABLE queue3 (
    timestamp UInt64,
    level String,
    message String
--- a/docs/en/faq/index.md
+++ b/docs/en/faq/index.md
@ -1,6 +1,9 @@
 ---
 toc_folder_title: F.A.Q.
 toc_priority: 76
+toc_title: hidden
+toc_hidden: true
 ---


+{## [Original article](https://clickhouse.tech/docs/en/faq) ##}
--- a/docs/en/sql-reference/aggregate-functions/reference.md
+++ b/docs/en/sql-reference/aggregate-functions/reference.md
@ -1543,20 +1543,32 @@ It represents an unbiased estimate of the variance of a random variable if passe

 Returns `Float64`. When `n <= 1`, returns `+∞`.

+!!! note "Note"
+    This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
+
 ## varPop(x) {#varpopx}

 Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.

 In other words, dispersion for a set of values. Returns `Float64`.

+!!! note "Note"
+    This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
+
 ## stddevSamp(x) {#stddevsampx}

 The result is equal to the square root of `varSamp(x)`.

+!!! note "Note"
+    This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error.
+
 ## stddevPop(x) {#stddevpopx}

 The result is equal to the square root of `varPop(x)`.

+!!! note "Note"
+    This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error.
+
 ## topK(N)(x) {#topknx}

 Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves).
@ -1641,14 +1653,23 @@ Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.

 Returns Float64. When `n <= 1`, returns +∞.

+!!! note "Note"
+    This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
+
 ## covarPop(x, y) {#covarpopx-y}

 Calculates the value of `Σ((x - x̅)(y - y̅)) / n`.

+!!! note "Note"
+    This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error.
+
 ## corr(x, y) {#corrx-y}

 Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.

+!!! note "Note"
+    This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error.
+
 ## categoricalInformationValue {#categoricalinformationvalue}

 Calculates the value of `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` for each category.
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@ -11,7 +11,7 @@ All the functions accept zero arguments or one argument.
 If an argument is passed, it can be any type, and its value is not used for anything.
 The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers.

-## rand {#rand}
+## rand, rand32 {#rand}

 Returns a pseudo-random UInt32 number, evenly distributed among all UInt32-type numbers.
 Uses a linear congruential generator.
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@ -28,9 +28,10 @@ There may be any number of space symbols between syntactical constructions (incl

 ## Comments {#comments}

-ClickHouse supports either SQL-style and C-style comments.
-SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
-C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
+ClickHouse supports either SQL-style and C-style comments:
+
+-   SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
+-   C-style are from `/*` to `*/`and can be multiline, spaces are not required either.

 ## Keywords {#syntax-keywords}

--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -291,30 +291,30 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY
 INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
 ```

-#### Поддержка для функций {#podderzhka-dlia-funktsii}
+#### Поддержка для функций {#functions-support}

 Условия в секции `WHERE` содержат вызовы функций, оперирующих со столбцами. Если столбец - часть индекса, ClickHouse пытается использовать индекс при выполнении функции. Для разных видов индексов, ClickHouse поддерживает различные наборы функций, которые могут использоваться индексами.

 Индекс `set` используется со всеми функциями. Наборы функций для остальных индексов представлены в таблице ниже.

-| Function (operator) / Index                                                                                    | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter |
-|----------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
-| [equals (=, ==)](../../../engines/table-engines/mergetree-family/mergetree.md#function-equals)                 | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [notEquals(!=, \<\>)](../../../engines/table-engines/mergetree-family/mergetree.md#function-notequals)         | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [like](../../../engines/table-engines/mergetree-family/mergetree.md#function-like)                             | ✔           | ✔      | ✔           | ✗           | ✗             |
-| [notLike](../../../engines/table-engines/mergetree-family/mergetree.md#function-notlike)                       | ✔           | ✔      | ✔           | ✔           | ✗             |
-| [startsWith](../../../engines/table-engines/mergetree-family/mergetree.md#startswith)                          | ✔           | ✔      | ✔           | ✔           | ✗             |
-| [endsWith](../../../engines/table-engines/mergetree-family/mergetree.md#endswith)                              | ✗           | ✗      | ✔           | ✔           | ✗             |
-| [multiSearchAny](../../../engines/table-engines/mergetree-family/mergetree.md#function-multisearchany)         | ✗           | ✗      | ✔           | ✔           | ✗             |
-| [in](../../../engines/table-engines/mergetree-family/mergetree.md#in-functions)                                | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [notIn](../../../engines/table-engines/mergetree-family/mergetree.md#in-functions)                             | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [less (\<)](../../../engines/table-engines/mergetree-family/mergetree.md#function-less)                        | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [greater (\>)](../../../engines/table-engines/mergetree-family/mergetree.md#function-greater)                  | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [lessOrEquals (\<=)](../../../engines/table-engines/mergetree-family/mergetree.md#function-lessorequals)       | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [greaterOrEquals (\>=)](../../../engines/table-engines/mergetree-family/mergetree.md#function-greaterorequals) | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [empty](../../../engines/table-engines/mergetree-family/mergetree.md#function-empty)                           | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [notEmpty](../../../engines/table-engines/mergetree-family/mergetree.md#function-notempty)                     | ✔           | ✔      | ✗           | ✗           | ✗             |
-| hasToken                                                                                                       | ✗           | ✗      | ✗           | ✔           | ✗             |
+| Функция (оператор) / Индекс                                                                                | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter |
+|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
+| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals)                 | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals)         | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [like](../../../sql-reference/functions/string-search-functions.md#function-like)                          | ✔           | ✔      | ✔           | ✗           | ✗             |
+| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike)                    | ✔           | ✔      | ✔           | ✗           | ✗             |
+| [startsWith](../../../sql-reference/functions/string-functions.md#startswith)                              | ✔           | ✔      | ✔           | ✔           | ✗             |
+| [endsWith](../../../sql-reference/functions/string-functions.md#endswith)                                  | ✗           | ✗      | ✔           | ✔           | ✗             |
+| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany)      | ✗           | ✗      | ✔           | ✗           | ✗             |
+| [in](../../../sql-reference/functions/in-functions.md#in-functions)                                        | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [notIn](../../../sql-reference/functions/in-functions.md#in-functions)                                     | ✔           | ✔      | ✔           | ✔           | ✔             |
+| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less)                        | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater)                  | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals)       | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [empty](../../../sql-reference/functions/array-functions.md#function-empty)                                | ✔           | ✔      | ✗           | ✗           | ✗             |
+| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty)                          | ✔           | ✔      | ✗           | ✗           | ✗             |
+| hasToken                                                                                                   | ✗           | ✗      | ✗           | ✔           | ✗             |

 Функции с постоянным агрументом, который меньше, чем размер ngram не могут использовать индекс `ngrambf_v1` для оптимизации запроса.

--- a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md
@ -113,7 +113,7 @@ ClickHouse может слить куски данных таким образо

 Если название вложенной таблицы заканчивается на `Map` и она содержит не менее двух столбцов, удовлетворяющих критериям:

-   первый столбец - числовой `(*Int*, Date, DateTime)`, назовем его условно `key`,
+-   первый столбец - числовой `(*Int*, Date, DateTime)` или строковый `(String, FixedString)`, назовем его условно `key`,
 -   остальные столбцы - арифметические `(*Int*, Float32/64)`, условно `(values...)`,

 то вложенная таблица воспринимается как отображение `key => (values...)` и при слиянии её строк выполняется слияние элементов двух множеств по `key` со сложением соответствующих `(values...)`.
--- a/docs/ru/interfaces/third-party/client-libraries.md
+++ b/docs/ru/interfaces/third-party/client-libraries.md
@ -45,6 +45,7 @@
    -   [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
 -   Elixir
    -   [clickhousex](https://github.com/appodeal/clickhousex/)
+    -   [pillar](https://github.com/sofakingworld/pillar)
 -   Nim
    -   [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)

--- a/docs/ru/sql-reference/aggregate-functions/reference.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference.md
@ -1533,20 +1533,33 @@ SELECT medianDeterministic(val, 1) FROM t

 Возвращает `Float64`. В случае, когда `n <= 1`, возвращается `+∞`.

+!!! note "Примечание"
+    Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varSampStable`. Она работает медленнее, но обеспечиват меньшую вычислительную ошибку.
+
 ## varPop(x) {#varpopx}

 Вычисляет величину `Σ((x - x̅)^2) / n`, где `n` - размер выборки, `x̅`- среднее значение `x`.

 То есть, дисперсию для множества значений. Возвращает `Float64`.

+!!! note "Примечание"
+    Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
+
 ## stddevSamp(x) {#stddevsampx}

 Результат равен квадратному корню от `varSamp(x)`.

+!!! note "Примечание"
+    Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
+
 ## stddevPop(x) {#stddevpopx}

 Результат равен квадратному корню от `varPop(x)`.

+!!! note "Примечание"
+    Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
+
+
 ## topK(N)(column) {#topkncolumn}

 Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям).
@ -1626,14 +1639,24 @@ SELECT topKWeighted(10)(number, number) FROM numbers(1000)

 Возвращает Float64. В случае, когда `n <= 1`, возвращается +∞.

+!!! note "Примечание"
+    Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
+
 ## covarPop(x, y) {#covarpopx-y}

 Вычисляет величину `Σ((x - x̅)(y - y̅)) / n`.

+!!! note "Примечание"
+    Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
+
+
 ## corr(x, y) {#corrx-y}

 Вычисляет коэффициент корреляции Пирсона: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.

+!!! note "Примечание"
+    Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `corrStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
+
 ## simpleLinearRegression {#simplelinearregression}

 Выполняет простую (одномерную) линейную регрессию.
--- a/docs/ru/whats-new/extended-roadmap.md
+++ b/docs/ru/whats-new/extended-roadmap.md
@ -174,7 +174,7 @@ Upd. Всё ещё ждём удаление старого кода, котор

 ### 2.3. Перенос столбцового ser/de из DataType в Column {#perenos-stolbtsovogo-serde-iz-datatype-v-column}

-В очереди.
+В очереди. Антон Попов.

 ### 2.4. Перевод LowCardinality из DataType в Column. Добавление ColumnSparse {#perevod-lowcardinality-iz-datatype-v-column-dobavlenie-columnsparse}

@ -977,10 +977,10 @@ Q2.

 [Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1.

-### 12.6. Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
+### 12.6. + Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}

 [Виталий Баранов](https://github.com/vitlibar). Требует 12.1.
-Есть pull request. Q2.
+Есть pull request. Q2. Готово.


 ## 13. Разделение ресурсов, multi-tenancy {#razdelenie-resursov-multi-tenancy}
--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@ -58,17 +58,6 @@ def build_for_lang(lang, args):
            'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
            'language': lang,
            'direction': 'rtl' if lang == 'fa' else 'ltr',
-            # TODO: cleanup
-            'feature': {
-                'tabs': False
-            },
-            'palette': {
-                'primary': 'white',
-                'accent': 'white'
-            },
-            'font': False,
-            'logo': 'images/logo.svg',
-            'favicon': 'assets/images/favicon.ico',
            'static_templates': ['404.html'],
            'extra': {
                'now': int(time.mktime(datetime.datetime.now().timetuple()))  # TODO better way to avoid caching
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@ -18,10 +18,10 @@ Markdown==3.2.1
 MarkupSafe==1.1.1
 mkdocs==1.1.2
 mkdocs-htmlproofer-plugin==0.0.3
-mkdocs-macros-plugin==0.4.7
+mkdocs-macros-plugin==0.4.9
 nltk==3.5
 nose==1.3.7
-protobuf==3.12.0
+protobuf==3.12.1
 numpy==1.18.4
 Pygments==2.5.2
 pymdown-extensions==7.1
@ -30,7 +30,7 @@ PyYAML==5.3.1
 repackage==0.7.3
 requests==2.23.0
 singledispatch==3.4.0.3
-six==1.14.0
+six==1.15.0
 soupsieve==2.0.1
 termcolor==1.1.0
 tornado==5.1.1
--- a/docs/zh/getting-started/install.md
+++ b/docs/zh/getting-started/install.md
@ -46,7 +46,7 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/
 sudo yum install clickhouse-server clickhouse-client
 ```

-您也可以从此处手动下载和安装软件包：https://repo.yandex.ru/clickhouse/rpm/stable/x86\_64。
+您也可以从此处手动下载和安装软件包：https://repo.yandex.ru/clickhouse/rpm/stable/x86_64。

 ### 来自Docker {#from-docker-image}

--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -1,3 +1,7 @@
+if (USE_CLANG_TIDY)
+    set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
+endif ()
+
 # 'clickhouse' binary is a multi purpose tool,
 # that contain multiple execution modes (client, server, etc.)
 # each of them is built and linked as a separate library, defined below.
@ -201,3 +205,9 @@ endif ()
 if (TARGET clickhouse-server AND TARGET copy-headers)
    add_dependencies(clickhouse-server copy-headers)
 endif ()
+
+if (ENABLE_TESTS AND USE_GTEST)
+    set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor expression_analyzer)
+    add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS})
+    add_dependencies(clickhouse-bundle clickhouse-tests)
+endif()
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -289,7 +289,7 @@ private:
                    connection_entries.emplace_back(std::make_shared<Entry>(
                            connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings))));

-                pool.scheduleOrThrowOnError(std::bind(&Benchmark::thread, this, connection_entries));
+                pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); });
            }
        }
        catch (...)
@ -424,7 +424,7 @@ private:
            std::cerr << percent << "%\t\t";
            for (const auto & info : infos)
            {
-                std::cerr << info->sampler.quantileNearest(percent / 100.0) << " sec." << "\t";
+                std::cerr << info->sampler.quantileNearest(percent / 100.0) << " sec.\t";
            }
            std::cerr << "\n";
        };
@ -459,7 +459,7 @@ private:

        auto print_percentile = [&json_out](Stats & info, auto percent, bool with_comma = true)
        {
-            json_out << "\"" << percent << "\"" << ": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n");
+            json_out << "\"" << percent << "\": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n");
        };

        json_out << "{\n";
@ -469,7 +469,7 @@ private:
            const auto & info = infos[i];

            json_out << double_quote << connections[i]->getDescription() << ": {\n";
-            json_out << double_quote << "statistics" << ": {\n";
+            json_out << double_quote << "statistics: {\n";

            print_key_value("QPS", info->queries / info->work_time);
            print_key_value("RPS", info->read_rows / info->work_time);
@ -479,7 +479,7 @@ private:
            print_key_value("num_queries", info->queries.load(), false);

            json_out << "},\n";
-            json_out << double_quote << "query_time_percentiles" << ": {\n";
+            json_out << double_quote << "query_time_percentiles: {\n";

            for (int percent = 0; percent <= 90; percent += 10)
                print_percentile(*info, percent);
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -485,7 +485,7 @@ private:
                history_file = config().getString("history_file");
            else
            {
-                auto history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
+                auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
                if (history_file_from_env)
                    history_file = history_file_from_env;
                else if (!home_path.empty())
@ -1480,7 +1480,7 @@ private:
            "\033[1m↗\033[0m",
        };

-        auto indicator = indicators[increment % 8];
+        const char * indicator = indicators[increment % 8];

        if (!send_logs && written_progress_chars)
            message << '\r';
--- a/programs/client/ConnectionParameters.cpp
+++ b/programs/client/ConnectionParameters.cpp
@ -51,7 +51,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
    {
        std::string prompt{"Password for user (" + user + "): "};
        char buf[1000] = {};
-        if (auto result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
+        if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
            password = result;
    }

--- a/programs/client/Suggest.h
+++ b/programs/client/Suggest.h
@ -5,6 +5,7 @@
 #include <Client/Connection.h>
 #include <IO/ConnectionTimeouts.h>
 #include <common/LineReader.h>
+#include <thread>


 namespace DB
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@ -26,7 +26,7 @@ void ClusterCopier::init()
        if (response.error != Coordination::ZOK)
            return;
        UInt64 version = ++task_description_version;
-        LOG_DEBUG(log, "Task description should be updated, local version " << version);
+        LOG_DEBUG(log, "Task description should be updated, local version {}", version);
    };

    task_description_path = task_zookeeper_path + "/description";
@ -47,7 +47,7 @@ void ClusterCopier::init()
        task_table.initShards(task_cluster->random_engine);
    }

-    LOG_DEBUG(log, "Will process " << task_cluster->table_tasks.size() << " table tasks");
+    LOG_DEBUG(log, "Will process {} table tasks", task_cluster->table_tasks.size());

    /// Do not initialize tables, will make deferred initialization in process()

@ -85,7 +85,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
 {
    TaskTable & task_table = task_shard->task_table;

-    LOG_INFO(log, "Discover partitions of shard " << task_shard->getDescription());
+    LOG_INFO(log, "Discover partitions of shard {}", task_shard->getDescription());

    auto get_partitions = [&] () { return getShardPartitions(timeouts, *task_shard); };
    auto existing_partitions_names = retry(get_partitions, 60);
@ -132,8 +132,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
        {
            if (!task_table.enabled_partitions_set.count(partition_name))
            {
-                LOG_DEBUG(log, "Partition " << partition_name << " will not be processed, since it is not in "
-                                            << "enabled_partitions of " << task_table.table_id);
+                LOG_DEBUG(log, "Partition {} will not be processed, since it is not in enabled_partitions of {}", partition_name, task_table.table_id);
            }
        }
    }
@ -165,11 +164,10 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
        for (const String & missing_partition : missing_partitions)
            ss << " " << missing_partition;

-        LOG_WARNING(log, "There are no " << missing_partitions.size() << " partitions from enabled_partitions in shard "
-                         << task_shard->getDescription() << " :" << ss.str());
+        LOG_WARNING(log, "There are no {} partitions from enabled_partitions in shard {} :{}", missing_partitions.size(), task_shard->getDescription(), ss.str());
    }

-    LOG_DEBUG(log, "Will copy " << task_shard->partition_tasks.size() << " partitions from shard " << task_shard->getDescription());
+    LOG_DEBUG(log, "Will copy {} partitions from shard {}", task_shard->partition_tasks.size(), task_shard->getDescription());
 }

 void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads)
@ -181,7 +179,7 @@ void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts,
        for (const TaskShardPtr & task_shard : task_table.all_shards)
            thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]() { discoverShardPartitions(timeouts, task_shard); });

-        LOG_DEBUG(log, "Waiting for " << thread_pool.active() << " setup jobs");
+        LOG_DEBUG(log, "Waiting for {} setup jobs", thread_pool.active());
        thread_pool.wait();
    }
 }
@ -205,7 +203,8 @@ void ClusterCopier::uploadTaskDescription(const std::string & task_path, const s
    if (code && force)
        zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);

-    LOG_DEBUG(log, "Task description " << ((code && !force) ? "not " : "") << "uploaded to " << local_task_description_path << " with result " << code << " ("<< zookeeper->error2string(code) << ")");
+    LOG_DEBUG(log, "Task description {} uploaded to {} with result {} ({})",
+        ((code && !force) ? "not " : ""), local_task_description_path, code, zookeeper->error2string(code));
 }

 void ClusterCopier::reloadTaskDescription()
@ -221,7 +220,7 @@ void ClusterCopier::reloadTaskDescription()
    if (code)
        throw Exception("Can't get description node " + task_description_path, ErrorCodes::BAD_ARGUMENTS);

-    LOG_DEBUG(log, "Loading description, zxid=" << task_description_current_stat.czxid);
+    LOG_DEBUG(log, "Loading description, zxid={}", task_description_current_stat.czxid);
    auto config = getConfigurationFromXMLString(task_config_str);

    /// Setup settings
@ -251,9 +250,7 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts)
 {
    for (TaskTable & task_table : task_cluster->table_tasks)
    {
-        LOG_INFO(log, "Process table task " << task_table.table_id << " with "
-                                            << task_table.all_shards.size() << " shards, "
-                                            << task_table.local_shards.size() << " of them are local ones");
+        LOG_INFO(log, "Process table task {} with {} shards, {} of them are local ones", task_table.table_id, task_table.all_shards.size(), task_table.local_shards.size());

        if (task_table.all_shards.empty())
            continue;
@ -357,8 +354,7 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee

        if (static_cast<UInt64>(stat.numChildren) >= task_cluster->max_workers)
        {
-            LOG_DEBUG(log, "Too many workers (" << stat.numChildren << ", maximum " << task_cluster->max_workers << ")"
-                << ". Postpone processing " << description);
+            LOG_DEBUG(log, "Too many workers ({}, maximum {}). Postpone processing {}", stat.numChildren, task_cluster->max_workers, description);

            if (unprioritized)
                current_sleep_time = std::min(max_sleep_time, current_sleep_time + default_sleep_time);
@ -419,7 +415,7 @@ bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_tabl
    {
        bool piece_is_done = checkPartitionPieceIsDone(task_table, partition_name, piece_number, shards_with_partition);
        if (!piece_is_done)
-            LOG_DEBUG(log, "Partition " << partition_name << " piece " + toString(piece_number) + " is not already done.");
+            LOG_DEBUG(log, "Partition {} piece {} is not already done.", partition_name, piece_number);
        answer &= piece_is_done;
    }

@ -435,14 +431,13 @@ bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_tabl
 bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name,
                               size_t piece_number, const TasksShard & shards_with_partition)
 {
-    LOG_DEBUG(log, "Check that all shards processed partition " << partition_name
-                   << " piece " + toString(piece_number) + " successfully");
+    LOG_DEBUG(log, "Check that all shards processed partition {} piece {} successfully", partition_name, piece_number);

    auto zookeeper = context.getZooKeeper();

    /// Collect all shards that contain partition piece number piece_number.
    Strings piece_status_paths;
-    for (auto & shard : shards_with_partition)
+    for (const auto & shard : shards_with_partition)
    {
        ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second;
        ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number];
@ -465,8 +460,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
            TaskStateWithOwner status = TaskStateWithOwner::fromString(res.data);
            if (status.state != TaskState::Finished)
            {
-                LOG_INFO(log, "The task " << res.data << " is being rewritten by "
-                              << status.owner << ". Partition piece will be rechecked");
+                LOG_INFO(log, "The task {} is being rewritten by {}. Partition piece will be rechecked", res.data, status.owner);
                return false;
            }

@ -484,7 +478,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons

        if (!is_clean)
        {
-            LOG_INFO(log, "Partition " << partition_name << " become dirty");
+            LOG_INFO(log, "Partition {} become dirty", partition_name);
            return false;
        }

@ -501,8 +495,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
    }
    catch (const Coordination::Exception & e)
    {
-        LOG_INFO(log, "A ZooKeeper error occurred while checking partition " << partition_name << " piece number "
-                       << toString(piece_number) << ". Will recheck the partition. Error: " << e.displayText());
+        LOG_INFO(log, "A ZooKeeper error occurred while checking partition {} piece number {}. Will recheck the partition. Error: {}", partition_name, toString(piece_number), e.displayText());
        return false;
    }

@ -511,12 +504,12 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
    {
        if (zxid1[shard_num] != zxid2[shard_num])
        {
-            LOG_INFO(log, "The task " << piece_status_paths[shard_num] << " is being modified now. Partition piece will be rechecked");
+            LOG_INFO(log, "The task {} is being modified now. Partition piece will be rechecked", piece_status_paths[shard_num]);
            return false;
        }
    }

-    LOG_INFO(log, "Partition " << partition_name << " piece number " << toString(piece_number) << " is copied successfully");
+    LOG_INFO(log, "Partition {} piece number {} is copied successfully", partition_name, toString(piece_number));
    return true;
 }

@ -530,7 +523,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
        inject_fault = value < move_fault_probability;
    }

-    LOG_DEBUG(log, "Try to move  " << partition_name << " to destionation table");
+    LOG_DEBUG(log, "Try to move  {} to destionation table", partition_name);

    auto zookeeper = context.getZooKeeper();

@ -548,7 +541,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
    {
        if (e.code == Coordination::ZNODEEXISTS)
        {
-            LOG_DEBUG(log, "Someone is already moving pieces " << current_partition_attach_is_active);
+            LOG_DEBUG(log, "Someone is already moving pieces {}", current_partition_attach_is_active);
            return TaskStatus::Active;
        }

@ -565,16 +558,13 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
            TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data);
            if (status.state == TaskState::Finished)
            {
-                LOG_DEBUG(log, "All pieces for partition from this task " << current_partition_attach_is_active
-                                       << " has been successfully moved to destination table by " << status.owner);
+                LOG_DEBUG(log, "All pieces for partition from this task {} has been successfully moved to destination table by {}", current_partition_attach_is_active, status.owner);
                return TaskStatus::Finished;
            }

            /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process.
            /// Initialize DROP PARTITION
-            LOG_DEBUG(log, "Moving piece for partition " << current_partition_attach_is_active
-                                   << " has not been successfully finished by " << status.owner
-                                   << ". Will try to move by myself.");
+            LOG_DEBUG(log, "Moving piece for partition {} has not been successfully finished by {}. Will try to move by myself.", current_partition_attach_is_active, status.owner);

            /// Remove is_done marker.
            zookeeper->remove(current_partition_attach_is_done);
@ -591,9 +581,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
    /// Move partition to original destination table.
    for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number)
    {
-        LOG_DEBUG(log, "Trying to move partition " << partition_name
-                                                   << " piece " << toString(current_piece_number)
-                                                   << " to original table");
+        LOG_DEBUG(log, "Trying to move partition {} piece {} to original table", partition_name, toString(current_piece_number));

        ASTPtr query_alter_ast;
        String query_alter_ast_string;
@ -614,7 +602,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                                  " ATTACH PARTITION " + partition_name +
                                  " FROM " + getQuotedTable(helping_table);

-        LOG_DEBUG(log, "Executing ALTER query: " << query_alter_ast_string);
+        LOG_DEBUG(log, "Executing ALTER query: {}", query_alter_ast_string);

        try
        {
@ -626,13 +614,11 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                    PoolMode::GET_MANY,
                    ClusterExecutionMode::ON_EACH_NODE);

-            LOG_INFO(log, "Number of nodes that executed ALTER query successfully : " << toString(num_nodes));
+            LOG_INFO(log, "Number of nodes that executed ALTER query successfully : {}", toString(num_nodes));
        }
        catch (...)
        {
-            LOG_DEBUG(log, "Error while moving partition " << partition_name
-                                                           << " piece " << toString(current_piece_number)
-                                                           << "to original table");
+            LOG_DEBUG(log, "Error while moving partition {} piece {} to original table", partition_name, toString(current_piece_number));
            throw;
        }

@ -647,7 +633,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
                                                " PARTITION " + partition_name + " DEDUPLICATE;";

-                LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: " << query_alter_ast_string);
+                LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string);

                UInt64 num_nodes = executeQueryOnCluster(
                        task_table.cluster_push,
@ -656,14 +642,12 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                        &task_cluster->settings_push,
                        PoolMode::GET_MANY);

-                LOG_INFO(log, "Number of shard that executed OPTIMIZE DEDUPLICATE query successfully : "
-                        << toString(num_nodes));
+                LOG_INFO(log, "Number of shard that executed OPTIMIZE DEDUPLICATE query successfully : {}", toString(num_nodes));
            }
        }
        catch (...)
        {
-            LOG_DEBUG(log, "Error while executing OPTIMIZE DEDUPLICATE partition " << partition_name
-                                                                                   << "in the original table");
+            LOG_DEBUG(log, "Error while executing OPTIMIZE DEDUPLICATE partition {}in the original table", partition_name);
            throw;
        }
    }
@ -702,7 +686,7 @@ ASTPtr ClusterCopier::removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast

    auto new_columns_list = std::make_shared<ASTColumns>();
    new_columns_list->set(new_columns_list->columns, new_columns);
-    if (auto indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
+    if (const auto * indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
        new_columns_list->set(new_columns_list->indices, indices->clone());

    new_query.replace(new_query.columns_list, new_columns_list);
@ -759,8 +743,7 @@ bool ClusterCopier::tryDropPartitionPiece(
    {
        if (e.code == Coordination::ZNODEEXISTS)
        {
-            LOG_DEBUG(log, "Partition " << task_partition.name << " piece "
-                            << toString(current_piece_number) << " is cleaning now by somebody, sleep");
+            LOG_DEBUG(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number));
            std::this_thread::sleep_for(default_sleep_time);
            return false;
        }
@ -773,8 +756,7 @@ bool ClusterCopier::tryDropPartitionPiece(
    {
        if (stat.numChildren != 0)
        {
-            LOG_DEBUG(log, "Partition " << task_partition.name << " contains " << stat.numChildren
-                            << " active workers while trying to drop it. Going to sleep.");
+            LOG_DEBUG(log, "Partition {} contains {} active workers while trying to drop it. Going to sleep.", task_partition.name, stat.numChildren);
            std::this_thread::sleep_for(default_sleep_time);
            return false;
        }
@ -794,7 +776,7 @@ bool ClusterCopier::tryDropPartitionPiece(
        {
            if (e.code == Coordination::ZNODEEXISTS)
            {
-                LOG_DEBUG(log, "Partition " << task_partition.name << " is being filled now by somebody, sleep");
+                LOG_DEBUG(log, "Partition {} is being filled now by somebody, sleep", task_partition.name);
                return false;
            }

@ -832,7 +814,7 @@ bool ClusterCopier::tryDropPartitionPiece(
        /// It is important, DROP PARTITION must be done synchronously
        settings_push.replication_alter_partitions_sync = 2;

-        LOG_DEBUG(log, "Execute distributed DROP PARTITION: " << query);
+        LOG_DEBUG(log, "Execute distributed DROP PARTITION: {}", query);
        /// We have to drop partition_piece on each replica
        size_t num_shards = executeQueryOnCluster(
                cluster_push, query,
@ -841,7 +823,7 @@ bool ClusterCopier::tryDropPartitionPiece(
                PoolMode::GET_MANY,
                ClusterExecutionMode::ON_EACH_NODE);

-        LOG_INFO(log, "DROP PARTITION was successfully executed on " << num_shards << " nodes of a cluster.");
+        LOG_INFO(log, "DROP PARTITION was successfully executed on {} nodes of a cluster.", num_shards);

        /// Update the locking node
        if (!my_clock.is_stale())
@ -859,13 +841,12 @@ bool ClusterCopier::tryDropPartitionPiece(
            return false;
        }

-        LOG_INFO(log, "Partition " << task_partition.name <<  " piece " << toString(current_piece_number)
-                       << " was dropped on cluster " << task_table.cluster_push_name);
+        LOG_INFO(log, "Partition {} piece {} was dropped on cluster {}", task_partition.name, toString(current_piece_number), task_table.cluster_push_name);
        if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::ZNODEEXISTS)
            zookeeper->set(current_shards_path, host_id);
    }

-    LOG_INFO(log, "Partition " << task_partition.name <<  " piece " << toString(current_piece_number) << " is safe for work now.");
+    LOG_INFO(log, "Partition {} piece {} is safe for work now.", task_partition.name, toString(current_piece_number));
    return true;
 }

@ -889,7 +870,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab

        ++cluster_partition.total_tries;

-        LOG_DEBUG(log, "Processing partition " << partition_name << " for the whole cluster");
+        LOG_DEBUG(log, "Processing partition {} for the whole cluster", partition_name);

        /// Process each source shard having current partition and copy current partition
        /// NOTE: shards are sorted by "distance" to current host
@ -911,7 +892,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
                    {
                        const size_t number_of_splits = task_table.number_of_splits;
                        shard->partition_tasks.emplace(partition_name, ShardPartition(*shard, partition_name, number_of_splits));
-                        LOG_DEBUG(log, "Discovered partition " << partition_name << " in shard " << shard->getDescription());
+                        LOG_DEBUG(log, "Discovered partition {} in shard {}", partition_name, shard->getDescription());
                        /// To save references in the future.
                        auto shard_partition_it = shard->partition_tasks.find(partition_name);
                        PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces;
@ -924,7 +905,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
                    }
                    else
                    {
-                        LOG_DEBUG(log, "Found that shard " << shard->getDescription() << " does not contain current partition " << partition_name);
+                        LOG_DEBUG(log, "Found that shard {} does not contain current partition {}", shard->getDescription(), partition_name);
                        continue;
                    }
                }
@ -1030,21 +1011,20 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
            task_table.rows_copied += cluster_partition.rows_copied;
            double elapsed = cluster_partition.elapsed_time_seconds;

-            LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name
-                                     << ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes"
-                                     << ", " << formatReadableQuantity(cluster_partition.rows_copied) << " rows"
-                                     << " and " << cluster_partition.blocks_copied << " source blocks are copied");
+            LOG_INFO(log, "It took {} seconds to copy partition {}: {} uncompressed bytes, {} rows and {} source blocks are copied",
+                elapsed, partition_name,
+                formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied),
+                formatReadableQuantity(cluster_partition.rows_copied),
+                cluster_partition.blocks_copied);

            if (cluster_partition.rows_copied)
            {
-                LOG_INFO(log, "Average partition speed: "
-                        << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed) << " per second.");
+                LOG_INFO(log, "Average partition speed: {} per second.", formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed));
            }

            if (task_table.rows_copied)
            {
-                LOG_INFO(log, "Average table " << task_table.table_id << " speed: "
-                                               << formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) << " per second.");
+                LOG_INFO(log, "Average table {} speed: {} per second.", task_table.table_id, formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed));
            }
        }
    }
@ -1055,8 +1035,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab

    if (!table_is_done)
    {
-        LOG_INFO(log, "Table " + task_table.table_id + " is not processed yet."
-                << "Copied " << finished_partitions << " of " << required_partitions << ", will retry");
+        LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions);
    }

    return table_is_done;
@ -1104,9 +1083,11 @@ TaskStatus ClusterCopier::iterateThroughAllPiecesInPartition(const ConnectionTim
    {
        for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num)
        {
-            LOG_INFO(log, "Attempt number " << try_num << " to process partition " << task_partition.name
-                          << " piece number " << piece_number << " on shard number " << task_partition.task_shard.numberInCluster()
-                          << " with index " << task_partition.task_shard.indexInCluster());
+            LOG_INFO(log, "Attempt number {} to process partition {} piece number {} on shard number {} with index {}.",
+                try_num, task_partition.name, piece_number,
+                task_partition.task_shard.numberInCluster(),
+                task_partition.task_shard.indexInCluster());
+
            res = processPartitionPieceTaskImpl(timeouts, task_partition, piece_number, is_unprioritized_task);

            /// Exit if success
@ -1210,7 +1191,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    /// Load balancing
    auto worker_node_holder = createTaskWorkerNodeAndWaitIfNeed(zookeeper, current_task_piece_status_path, is_unprioritized_task);

-    LOG_DEBUG(log, "Processing " << current_task_piece_status_path);
+    LOG_DEBUG(log, "Processing {}", current_task_piece_status_path);

    const String piece_status_path = partition_piece.getPartitionPieceShardsPath();

@ -1221,14 +1202,12 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    /// Do not start if partition piece is dirty, try to clean it
    if (is_clean)
    {
-        LOG_DEBUG(log, "Partition " << task_partition.name
-                        << " piece " + toString(current_piece_number) + " appears to be clean");
+        LOG_DEBUG(log, "Partition {} piece {} appears to be clean", task_partition.name, current_piece_number);
        zookeeper->createAncestors(current_task_piece_status_path);
    }
    else
    {
-        LOG_DEBUG(log, "Partition " << task_partition.name
-                        << " piece " + toString(current_piece_number) + " is dirty, try to drop it");
+        LOG_DEBUG(log, "Partition {} piece {} is dirty, try to drop it", task_partition.name, current_piece_number);

        try
        {
@ -1253,7 +1232,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    {
        if (e.code == Coordination::ZNODEEXISTS)
        {
-            LOG_DEBUG(log, "Someone is already processing " << current_task_piece_is_active_path);
+            LOG_DEBUG(log, "Someone is already processing {}", current_task_piece_is_active_path);
            return TaskStatus::Active;
        }

@ -1269,16 +1248,13 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
            TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data);
            if (status.state == TaskState::Finished)
            {
-                LOG_DEBUG(log, "Task " << current_task_piece_status_path
-                                << " has been successfully executed by " << status.owner);
+                LOG_DEBUG(log, "Task {} has been successfully executed by {}", current_task_piece_status_path, status.owner);
                return TaskStatus::Finished;
            }

            /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process.
            /// Initialize DROP PARTITION
-            LOG_DEBUG(log, "Task " << current_task_piece_status_path
-                            << " has not been successfully finished by " << status.owner
-                            << ". Partition will be dropped and refilled.");
+            LOG_DEBUG(log, "Task {} has not been successfully finished by {}. Partition will be dropped and refilled.", current_task_piece_status_path, status.owner);

            create_is_dirty_node(clean_state_clock);
            return TaskStatus::Error;
@ -1293,11 +1269,9 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id);
        auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent);
        if (res == Coordination::ZNODEEXISTS)
-            LOG_DEBUG(log, "Partition " << task_partition.name << " piece "
-            + toString(current_piece_number) + " is absent on current replica of a shard. But other replicas have already marked it as done.");
+            LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number);
        if (res == Coordination::ZOK)
-            LOG_DEBUG(log, "Partition " << task_partition.name << " piece "
-            + toString(current_piece_number) + " is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.");
+            LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number);
        return TaskStatus::Finished;
    }

@ -1325,18 +1299,14 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(

        if (count != 0)
        {
-            LOG_INFO(log, "Partition " << task_partition.name << " piece "
-                          << current_piece_number << "is not empty. In contains " << count << " rows.");
+            LOG_INFO(log, "Partition {} piece {}is not empty. In contains {} rows.", task_partition.name, current_piece_number, count);
            Coordination::Stat stat_shards{};
            zookeeper->get(partition_piece.getPartitionPieceShardsPath(), &stat_shards);

            /// NOTE: partition is still fresh if dirt discovery happens before cleaning
            if (stat_shards.numChildren == 0)
            {
-                LOG_WARNING(log, "There are no workers for partition " << task_partition.name
-                                  << " piece " << toString(current_piece_number)
-                                  << ", but destination table contains " << count << " rows"
-                                  << ". Partition will be dropped and refilled.");
+                LOG_WARNING(log, "There are no workers for partition {} piece {}, but destination table contains {} rows. Partition will be dropped and refilled.", task_partition.name, toString(current_piece_number), count);

                create_is_dirty_node(clean_state_clock);
                return TaskStatus::Error;
@ -1353,14 +1323,12 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path);
        if (clean_state_clock != new_clean_state_clock)
        {
-            LOG_INFO(log, "Partition " << task_partition.name << " piece "
-                          << toString(current_piece_number) << " clean state changed, cowardly bailing");
+            LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number));
            return TaskStatus::Error;
        }
        else if (!new_clean_state_clock.is_clean())
        {
-            LOG_INFO(log, "Partition " << task_partition.name << " piece "
-                          << toString(current_piece_number) << " is dirty and will be dropped and refilled");
+            LOG_INFO(log, "Partition {} piece {} is dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number));
            create_is_dirty_node(new_clean_state_clock);
            return TaskStatus::Error;
        }
@ -1387,12 +1355,11 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        create_query_push_ast->as<ASTCreateQuery &>().if_not_exists = true;
        String query = queryToString(create_query_push_ast);

-        LOG_DEBUG(log, "Create destination tables. Query: " << query);
+        LOG_DEBUG(log, "Create destination tables. Query: {}", query);
        UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query,
                                              create_query_push_ast, &task_cluster->settings_push,
                                              PoolMode::GET_MANY);
-        LOG_DEBUG(log, "Destination tables " << getQuotedTable(task_table.table_push)
-                        << " have been created on " << shards << " shards of " << task_table.cluster_push->getShardCount());
+        LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}", getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount());
    }

    /// Do the copying
@ -1407,8 +1374,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        // Select all fields
        ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", /*enable_splitting*/ true, inject_fault ? "1" : "");

-        LOG_DEBUG(log, "Executing SELECT query and pull from " << task_shard.getDescription()
-                                                               << " : " << queryToString(query_select_ast));
+        LOG_DEBUG(log, "Executing SELECT query and pull from {} : {}", task_shard.getDescription(), queryToString(query_select_ast));

        ASTPtr query_insert_ast;
        {
@ -1419,7 +1385,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
            const auto & settings = context.getSettingsRef();
            query_insert_ast = parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth);

-            LOG_DEBUG(log, "Executing INSERT query: " << query);
+            LOG_DEBUG(log, "Executing INSERT query: {}", query);
        }

        try
@ -1501,8 +1467,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        }
    }

-    LOG_INFO(log, "Partition " << task_partition.name << " piece "
-                               << toString(current_piece_number) << " copied. But not moved to original destination table.");
+    LOG_INFO(log, "Partition {} piece {} copied. But not moved to original destination table.", task_partition.name, toString(current_piece_number));


    /// Try create original table (if not exists) on each shard
@ -1513,12 +1478,11 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        create_query_push_ast->as<ASTCreateQuery &>().if_not_exists = true;
        String query = queryToString(create_query_push_ast);

-        LOG_DEBUG(log, "Create destination tables. Query: " << query);
+        LOG_DEBUG(log, "Create destination tables. Query: {}", query);
        UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query,
                                              create_query_push_ast, &task_cluster->settings_push,
                                              PoolMode::GET_MANY);
-        LOG_DEBUG(log, "Destination tables " << getQuotedTable(task_table.table_push)
-                                             << " have been created on " << shards << " shards of " << task_table.cluster_push->getShardCount());
+        LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}", getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount());
    }
    catch (...)
    {
@ -1531,14 +1495,12 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path);
        if (clean_state_clock != new_clean_state_clock)
        {
-            LOG_INFO(log, "Partition " << task_partition.name << " piece "
-                           << toString(current_piece_number) <<  " clean state changed, cowardly bailing");
+            LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number));
            return TaskStatus::Error;
        }
        else if (!new_clean_state_clock.is_clean())
        {
-            LOG_INFO(log, "Partition " << task_partition.name << " piece "
-                           << toString(current_piece_number) << " became dirty and will be dropped and refilled");
+            LOG_INFO(log, "Partition {} piece {} became dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number));
            create_is_dirty_node(new_clean_state_clock);
            return TaskStatus::Error;
        }
@ -1582,7 +1544,7 @@ void ClusterCopier::dropHelpingTables(const TaskTable & task_table)
        const ClusterPtr & cluster_push = task_table.cluster_push;
        Settings settings_push = task_cluster->settings_push;

-        LOG_DEBUG(log, "Execute distributed DROP TABLE: " << query);
+        LOG_DEBUG(log, "Execute distributed DROP TABLE: {}", query);
        /// We have to drop partition_piece on each replica
        UInt64 num_nodes = executeQueryOnCluster(
                cluster_push, query,
@ -1591,7 +1553,7 @@ void ClusterCopier::dropHelpingTables(const TaskTable & task_table)
                PoolMode::GET_MANY,
                ClusterExecutionMode::ON_EACH_NODE);

-        LOG_DEBUG(log, "DROP TABLE query was successfully executed on " << toString(num_nodes) << " nodes.");
+        LOG_DEBUG(log, "DROP TABLE query was successfully executed on {} nodes.", toString(num_nodes));
    }
 }

@ -1609,7 +1571,7 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
        const ClusterPtr & cluster_push = task_table.cluster_push;
        Settings settings_push = task_cluster->settings_push;

-        LOG_DEBUG(log, "Execute distributed DROP PARTITION: " << query);
+        LOG_DEBUG(log, "Execute distributed DROP PARTITION: {}", query);
        /// We have to drop partition_piece on each replica
        UInt64 num_nodes = executeQueryOnCluster(
                cluster_push, query,
@ -1618,9 +1580,9 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
                PoolMode::GET_MANY,
                ClusterExecutionMode::ON_EACH_NODE);

-        LOG_DEBUG(log, "DROP PARTITION query was successfully executed on " << toString(num_nodes) << " nodes.");
+        LOG_DEBUG(log, "DROP PARTITION query was successfully executed on {} nodes.", toString(num_nodes));
    }
-    LOG_DEBUG(log, "All helping tables dropped partition " << partition_name);
+    LOG_DEBUG(log, "All helping tables dropped partition {}", partition_name);
 }

 String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings * settings)
@ -1724,7 +1686,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
    const auto & settings = context.getSettingsRef();
    ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth);

-    LOG_DEBUG(log, "Computing destination partition set, executing query: " << query);
+    LOG_DEBUG(log, "Computing destination partition set, executing query: {}", query);

    Context local_context = context;
    local_context.setSettings(task_cluster->settings_pull);
@ -1744,7 +1706,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
        }
    }

-    LOG_DEBUG(log, "There are " << res.size() << " destination partitions in shard " << task_shard.getDescription());
+    LOG_DEBUG(log, "There are {} destination partitions in shard {}", res.size(), task_shard.getDescription());

    return res;
 }
@ -1765,8 +1727,7 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts,

    query += " LIMIT 1";

-    LOG_DEBUG(log, "Checking shard " << task_shard.getDescription() << " for partition "
-                                     << partition_quoted_name << " existence, executing query: " << query);
+    LOG_DEBUG(log, "Checking shard {} for partition {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, query);

    ParserQuery parser_query(query.data() + query.size());
 const auto & settings = context.getSettingsRef();
@ -1805,9 +1766,7 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi

    query += " LIMIT 1";

-    LOG_DEBUG(log, "Checking shard " << task_shard.getDescription() << " for partition "
-                   << partition_quoted_name << " piece " << std::to_string(current_piece_number)
-                   << "existence, executing query: " << query);
+    LOG_DEBUG(log, "Checking shard {} for partition {} piece {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query);

    ParserQuery parser_query(query.data() + query.size());
    const auto & settings = context.getSettingsRef();
@ -1817,11 +1776,9 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
    local_context.setSettings(task_cluster->settings_pull);
    auto result = InterpreterFactory::get(query_ast, local_context)->execute().in->read().rows();
    if (result != 0)
-        LOG_DEBUG(log, "Partition " << partition_quoted_name << " piece number "
-                       << std::to_string(current_piece_number) << " is PRESENT on shard " << task_shard.getDescription());
+        LOG_DEBUG(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
    else
-        LOG_DEBUG(log, "Partition " << partition_quoted_name << " piece number "
-                       << std::to_string(current_piece_number) << " is ABSENT on shard " << task_shard.getDescription());
+        LOG_DEBUG(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
    return result != 0;
 }

@ -1938,8 +1895,7 @@ UInt64 ClusterCopier::executeQueryOnCluster(

    if (execution_mode == ClusterExecutionMode::ON_EACH_NODE && successful_nodes != origin_replicas_number)
    {
-        LOG_INFO(log, "There was an error while executing ALTER on each node. Query was executed on "
-                << toString(successful_nodes) << " nodes. But had to be executed on " << toString(origin_replicas_number.load()));
+        LOG_INFO(log, "There was an error while executing ALTER on each node. Query was executed on {} nodes. But had to be executed on {}", toString(successful_nodes), toString(origin_replicas_number.load()));
    }


--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@ -94,12 +94,8 @@ void ClusterCopierApp::mainImpl()
    StatusFile status_file(process_path + "/status");
    ThreadStatus thread_status;

-    auto log = &logger();
-    LOG_INFO(log, "Starting clickhouse-copier ("
-        << "id " << process_id << ", "
-        << "host_id " << host_id << ", "
-        << "path " << process_path << ", "
-        << "revision " << ClickHouseRevision::get() << ")");
+    auto * log = &logger();
+    LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::get());

    SharedContextHolder shared_context = Context::createShared();
    auto context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
--- a/programs/copier/Internals.cpp
+++ b/programs/copier/Internals.cpp
@ -260,7 +260,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
        return res;

    res.is_remote = 1;
-    for (auto & replica : replicas)
+    for (const auto & replica : replicas)
    {
        if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
        {
@ -270,7 +270,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
    }

    res.hostname_difference = std::numeric_limits<size_t>::max();
-    for (auto & replica : replicas)
+    for (const auto & replica : replicas)
    {
        size_t difference = getHostNameDifference(local_hostname, replica.host_name);
        res.hostname_difference = std::min(difference, res.hostname_difference);
--- a/programs/copier/ZooKeeperStaff.h
+++ b/programs/copier/ZooKeeperStaff.h
@ -183,11 +183,11 @@ public:
                        switch (rsp.type)
                        {
                            case Coordination::CREATED:
-                                LOG_DEBUG(logger, "CleanStateClock change: CREATED, at " << rsp.path);
+                                LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path);
                                stale->store(true);
                                break;
                            case Coordination::CHANGED:
-                                LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at" << rsp.path);
+                                LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path);
                                stale->store(true);
                        }
                    }
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -8,7 +8,6 @@
 #include <Poco/NullChannel.h>
 #include <Databases/DatabaseMemory.h>
 #include <Storages/System/attachSystemTables.h>
-#include <Interpreters/Context.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/loadMetadata.h>
@ -212,7 +211,7 @@ try
        /// Lock path directory before read
        status.emplace(context->getPath() + "status");

-        LOG_DEBUG(log, "Loading metadata from " << context->getPath());
+        LOG_DEBUG(log, "Loading metadata from {}", context->getPath());
        loadMetadataSystem(*context);
        attachSystemTables();
        loadMetadata(*context);
--- a/programs/local/LocalServer.h
+++ b/programs/local/LocalServer.h
@ -4,13 +4,12 @@
 #include <Poco/Util/Application.h>
 #include <memory>
 #include <loggers/Loggers.h>
+#include <Interpreters/Context.h>


 namespace DB
 {

-class Context;
-
 /// Lightweight Application for clickhouse-local
 /// No networking, no extra configs and working directories, no pid and status files, no dictionaries, no logging.
 /// Quiet mode by default
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@ -937,10 +937,10 @@ public:
        if (typeid_cast<const DataTypeFixedString *>(&data_type))
            return std::make_unique<FixedStringModel>(seed);

-        if (auto type = typeid_cast<const DataTypeArray *>(&data_type))
+        if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
            return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));

-        if (auto type = typeid_cast<const DataTypeNullable *>(&data_type))
+        if (const auto * type = typeid_cast<const DataTypeNullable *>(&data_type))
            return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params));

        throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED);
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@ -62,7 +62,7 @@ namespace
 void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
 {
    Poco::Net::HTMLForm params(request, request.stream());
-    LOG_TRACE(log, "Request URI: " + request.getURI());
+    LOG_TRACE(log, "Request URI: {}", request.getURI());

    auto process_error = [&response, this](const std::string & message)
    {
@ -89,11 +89,11 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
    if (params.has("schema"))
    {
        schema_name = params.get("schema");
-        LOG_TRACE(log, "Will fetch info for table '" << schema_name + "." + table_name << "'");
+        LOG_TRACE(log, "Will fetch info for table '{}'", schema_name + "." + table_name);
    }
    else
-        LOG_TRACE(log, "Will fetch info for table '" << table_name << "'");
-    LOG_TRACE(log, "Got connection str '" << connection_string << "'");
+        LOG_TRACE(log, "Will fetch info for table '{}'", table_name);
+    LOG_TRACE(log, "Got connection str '{}'", connection_string);

    try
    {
@ -124,7 +124,7 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
        select->format(settings);
        std::string query = ss.str();

-        LOG_TRACE(log, "Inferring structure with query '" << query << "'");
+        LOG_TRACE(log, "Inferring structure with query '{}'", query);

        if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLPrepare(hstmt, reinterpret_cast<SQLCHAR *>(query.data()), query.size())))
            throw POCO_SQL_ODBC_CLASS::DescriptorException(session.dbc());
--- a/programs/odbc-bridge/HandlerFactory.cpp
+++ b/programs/odbc-bridge/HandlerFactory.cpp
@ -10,7 +10,7 @@ namespace DB
 Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
 {
    Poco::URI uri{request.getURI()};
-    LOG_TRACE(log, "Request URI: " + uri.toString());
+    LOG_TRACE(log, "Request URI: {}", uri.toString());

    if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
        return new PingHandler(keep_alive_timeout);
--- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
@ -25,7 +25,7 @@ namespace DB
 void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
 {
    Poco::Net::HTMLForm params(request, request.stream());
-    LOG_TRACE(log, "Request URI: " + request.getURI());
+    LOG_TRACE(log, "Request URI: {}", request.getURI());

    auto process_error = [&response, this](const std::string & message)
    {
--- a/programs/odbc-bridge/MainHandler.cpp
+++ b/programs/odbc-bridge/MainHandler.cpp
@ -84,7 +84,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
    Poco::Net::HTMLForm params(request);
    if (mode == "read")
        params.read(request.stream());
-    LOG_TRACE(log, "Request URI: " + request.getURI());
+    LOG_TRACE(log, "Request URI: {}", request.getURI());

    if (mode == "read" && !params.has("query"))
    {
@ -132,7 +132,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
    std::string format = params.get("format", "RowBinary");

    std::string connection_string = params.get("connection_string");
-    LOG_TRACE(log, "Connection string: '" << connection_string << "'");
+    LOG_TRACE(log, "Connection string: '{}'", connection_string);

    WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);

@ -152,7 +152,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
            }
            std::string db_name = params.get("db_name");
            std::string table_name = params.get("table_name");
-            LOG_TRACE(log, "DB name: '" << db_name << "', table name: '" << table_name << "'");
+            LOG_TRACE(log, "DB name: '{}', table name: '{}'", db_name, table_name);

            auto quoting_style = IdentifierQuotingStyle::None;
 #if USE_ODBC
@ -171,7 +171,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
        else
        {
            std::string query = params.get("query");
-            LOG_TRACE(log, "Query: " << query);
+            LOG_TRACE(log, "Query: {}", query);

            BlockOutputStreamPtr writer = FormatFactory::instance().getOutput(format, out, *sample_block, context);
            auto pool = getPool(connection_string);
--- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp
+++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp
@ -24,8 +24,8 @@ namespace
        query.table_id.table_name = table_name;
        query.columns = std::make_shared<ASTExpressionList>(',');
        query.children.push_back(query.columns);
-        for (size_t i = 0; i < columns.size(); ++i)
-            query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(columns[i].name));
+        for (const auto & column : columns)
+            query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));

        std::stringstream ss;
        IAST::FormatSettings settings(ss, true);
--- a/programs/odbc-bridge/ODBCBridge.cpp
+++ b/programs/odbc-bridge/ODBCBridge.cpp
@ -48,12 +48,7 @@ namespace
 #endif
            )
            {
-                LOG_ERROR(log,
-                    "Cannot resolve listen_host (" << host << "), error " << e.code() << ": " << e.message()
-                                                   << ". "
-                                                      "If it is an IPv6 address and your host has disabled IPv6, then consider to "
-                                                      "specify IPv4 address to listen in <listen_host> element of configuration "
-                                                      "file. Example: <listen_host>0.0.0.0</listen_host>");
+                LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. If it is an IPv6 address and your host has disabled IPv6, then consider to specify IPv4 address to listen in <listen_host> element of configuration file. Example: <listen_host>0.0.0.0</listen_host>", host, e.code(), e.message());
            }

            throw;
@ -188,7 +183,7 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
        new HandlerFactory("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), server_pool, socket, http_params);
    server.start();

-    LOG_INFO(log, "Listening http://" + address.toString());
+    LOG_INFO(log, "Listening http://{}", address.toString());

    SCOPE_EXIT({
        LOG_DEBUG(log, "Received termination signal.");
@ -198,7 +193,7 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
        {
            if (server.currentConnections() == 0)
                break;
-            LOG_DEBUG(log, "Waiting for " << server.currentConnections() << " connections, try " << count);
+            LOG_DEBUG(log, "Waiting for {} connections, try {}", server.currentConnections(), count);
            std::this_thread::sleep_for(std::chrono::milliseconds(1000));
        }
    });
--- a/programs/server/HTTPHandler.cpp
+++ b/programs/server/HTTPHandler.cpp
@ -195,7 +195,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
    std::vector<ReadBufferPtr> read_buffers;
    std::vector<ReadBuffer *> read_buffers_raw_ptr;

-    auto cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
+    auto * cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
    if (!cascade_buffer)
        throw Exception("Expected CascadeWriteBuffer", ErrorCodes::LOGICAL_ERROR);

@ -241,7 +241,7 @@ void HTTPHandler::processQuery(

    CurrentThread::QueryScope query_scope(context);

-    LOG_TRACE(log, "Request URI: " << request.getURI());
+    LOG_TRACE(log, "Request URI: {}", request.getURI());

    std::istream & istr = request.stream();

@ -383,7 +383,7 @@ void HTTPHandler::processQuery(
        {
            auto push_memory_buffer_and_continue = [next_buffer = used_output.out_maybe_compressed] (const WriteBufferPtr & prev_buf)
            {
-                auto prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
+                auto * prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
                if (!prev_memory_buffer)
                    throw Exception("Expected MemoryWriteBuffer", ErrorCodes::LOGICAL_ERROR);

--- a/programs/server/HTTPHandlerFactory.cpp
+++ b/programs/server/HTTPHandlerFactory.cpp
@ -28,19 +28,16 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string &
 {
 }

-Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request) // override
+Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
 {
-    LOG_TRACE(log, "HTTP Request for " << name << ". "
-        << "Method: " << request.getMethod()
-        << ", Address: " << request.clientAddress().toString()
-        << ", User-Agent: " << (request.has("User-Agent") ? request.get("User-Agent") : "none")
-        << (request.hasContentLength() ? (", Length: " + std::to_string(request.getContentLength())) : (""))
-        << ", Content Type: " << request.getContentType()
-        << ", Transfer Encoding: " << request.getTransferEncoding());
+    LOG_TRACE(log, "HTTP Request for {}. Method: {}, Address: {}, User-Agent: {}{}, Content Type: {}, Transfer Encoding: {}",
+        name, request.getMethod(), request.clientAddress().toString(), request.has("User-Agent") ? request.get("User-Agent") : "none",
+        (request.hasContentLength() ? (", Length: " + std::to_string(request.getContentLength())) : ("")),
+        request.getContentType(), request.getTransferEncoding());

    for (auto & handler_factory : child_factories)
    {
-        auto handler = handler_factory->createRequestHandler(request);
+        auto * handler = handler_factory->createRequestHandler(request);
        if (handler != nullptr)
            return handler;
    }
@ -72,80 +69,98 @@ HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler

 static inline auto createHandlersFactoryFromConfig(IServer & server, const std::string & name, const String & prefix)
 {
-    auto main_handler_factory = new HTTPRequestHandlerFactoryMain(name);
+    auto main_handler_factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);

-    try
+    Poco::Util::AbstractConfiguration::Keys keys;
+    server.config().keys(prefix, keys);
+
+    for (const auto & key : keys)
    {
-        Poco::Util::AbstractConfiguration::Keys keys;
-        server.config().keys(prefix, keys);
+        if (!startsWith(key, "rule"))
+            throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);

-        for (const auto & key : keys)
-        {
-            if (!startsWith(key, "rule"))
-                throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+        const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");

-            const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");
-
-            if (handler_type == "static")
-                main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
-            else if (handler_type == "dynamic_query_handler")
-                main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
-            else if (handler_type == "predefined_query_handler")
-                main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
-            else if (handler_type.empty())
-                throw Exception("Handler type in config is not specified here: " +
-                                prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
-            else
-                throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
-                                prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
-        }
-
-        return main_handler_factory;
-    }
-    catch (...)
-    {
-        delete main_handler_factory;
-        throw;
+        if (handler_type == "static")
+            main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
+        else if (handler_type == "dynamic_query_handler")
+            main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
+        else if (handler_type == "predefined_query_handler")
+            main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
+        else if (handler_type.empty())
+            throw Exception("Handler type in config is not specified here: " +
+                            prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
+        else
+            throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
+                            prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
    }
+
+    return main_handler_factory.release();
 }

 static const auto ping_response_expression = "Ok.\n";
 static const auto root_response_expression = "config://http_server_default_response";

-static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
+static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(
+    IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
 {
    if (server.config().has("http_handlers"))
        return createHandlersFactoryFromConfig(server, name, "http_handlers");
    else
    {
-        auto factory = (new HTTPRequestHandlerFactoryMain(name))
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
-                ->attachStrictPath("/")->allowGetAndHeadRequest())
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
-                ->attachStrictPath("/ping")->allowGetAndHeadRequest())
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
-                ->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>(server, "query"))->allowPostAndGetParamsRequest());
+        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);

+        auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
+        root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
+        factory->addHandler(root_handler.release());
+
+        auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
+        ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
+        factory->addHandler(ping_handler.release());
+
+        auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+        replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
+        factory->addHandler(replicas_status_handler.release());
+
+        auto query_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
+        query_handler->allowPostAndGetParamsRequest();
+        factory->addHandler(query_handler.release());
+
+        /// We check that prometheus handler will be served on current (default) port.
+        /// Otherwise it will be created separately, see below.
        if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0)
-            factory->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
-                server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
-                    ->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
+        {
+            auto prometheus_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+                server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
+            prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
+            factory->addHandler(prometheus_handler.release());
+        }

-        return factory;
+        return factory.release();
    }
 }

 static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
 {
-    return (new HTTPRequestHandlerFactoryMain(name))
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
-            ->attachStrictPath("/")->allowGetAndHeadRequest())
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
-            ->attachStrictPath("/ping")->allowGetAndHeadRequest())
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
-            ->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>(server))->allowPostAndGetParamsRequest());
+    auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+
+    auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
+    root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
+    factory->addHandler(root_handler.release());
+
+    auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
+    ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
+    factory->addHandler(ping_handler.release());
+
+    auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+    replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
+    factory->addHandler(replicas_status_handler.release());
+
+    auto main_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
+    main_handler->allowPostAndGetParamsRequest();
+    factory->addHandler(main_handler.release());
+
+    return factory.release();
 }

 Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
@ -155,9 +170,14 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As
    else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory")
        return createInterserverHTTPHandlerFactory(server, name);
    else if (name == "PrometheusHandler-factory")
-        return (new HTTPRequestHandlerFactoryMain(name))->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
-            server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
-                ->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
+    {
+        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+        auto handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+            server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
+        handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
+        factory->addHandler(handler.release());
+        return factory.release();
+    }

    throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR);
 }
--- a/programs/server/InterserverIOHTTPHandler.cpp
+++ b/programs/server/InterserverIOHTTPHandler.cpp
@ -53,7 +53,7 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque
 {
    HTMLForm params(request);

-    LOG_TRACE(log, "Request URI: " << request.getURI());
+    LOG_TRACE(log, "Request URI: {}", request.getURI());

    String endpoint_name = params.get("endpoint");
    bool compress = params.get("compress") == "true";
@ -103,7 +103,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
            response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
            if (!response.sent())
                writeString(message, *used_output.out);
-            LOG_WARNING(log, "Query processing failed request: '" << request.getURI() << "' authentication failed");
+            LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
        }
    }
    catch (Exception & e)
--- a/programs/server/MySQLHandler.cpp
+++ b/programs/server/MySQLHandler.cpp
@ -83,19 +83,15 @@ void MySQLHandler::run()
        if (!connection_context.mysql.max_packet_size)
            connection_context.mysql.max_packet_size = MAX_PACKET_LENGTH;

-        LOG_TRACE(log, "Capabilities: " << handshake_response.capability_flags
-                                        << ", max_packet_size: "
-                                        << handshake_response.max_packet_size
-                                        << ", character_set: "
-                                        << static_cast<int>(handshake_response.character_set)
-                                        << ", user: "
-                                        << handshake_response.username
-                                        << ", auth_response length: "
-                                        << handshake_response.auth_response.length()
-                                        << ", database: "
-                                        << handshake_response.database
-                                        << ", auth_plugin_name: "
-                                        << handshake_response.auth_plugin_name);
+        LOG_TRACE(log,
+            "Capabilities: {}, max_packet_size: {}, character_set: {}, user: {}, auth_response length: {}, database: {}, auth_plugin_name: {}",
+            handshake_response.capability_flags,
+            handshake_response.max_packet_size,
+            static_cast<int>(handshake_response.character_set),
+            handshake_response.username,
+            handshake_response.auth_response.length(),
+            handshake_response.database,
+            handshake_response.auth_plugin_name);

        client_capability_flags = handshake_response.capability_flags;
        if (!(client_capability_flags & CLIENT_PROTOCOL_41))
@ -129,7 +125,9 @@ void MySQLHandler::run()
            // For commands which are executed without MemoryTracker.
            LimitReadBuffer limited_payload(payload, 10000, true, "too long MySQL packet.");

-            LOG_DEBUG(log, "Received command: " << static_cast<int>(static_cast<unsigned char>(command)) << ". Connection id: " << connection_id << ".");
+            LOG_DEBUG(log, "Received command: {}. Connection id: {}.",
+                static_cast<int>(static_cast<unsigned char>(command)), connection_id);
+
            try
            {
                switch (command)
@ -197,7 +195,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::HandshakeResponse & packet)
    read_bytes(3); /// We can find out whether it is SSLRequest of HandshakeResponse by first 3 bytes.

    size_t payload_size = unalignedLoad<uint32_t>(buf) & 0xFFFFFFu;
-    LOG_TRACE(log, "payload size: " << payload_size);
+    LOG_TRACE(log, "payload size: {}", payload_size);

    if (payload_size == SSL_REQUEST_PAYLOAD_SIZE)
    {
@ -234,18 +232,18 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl
    }
    catch (const Exception & exc)
    {
-        LOG_ERROR(log, "Authentication for user " << user_name << " failed.");
+        LOG_ERROR(log, "Authentication for user {} failed.", user_name);
        packet_sender->sendPacket(ERR_Packet(exc.code(), "00000", exc.message()), true);
        throw;
    }
-    LOG_INFO(log, "Authentication for user " << user_name << " succeeded.");
+    LOG_INFO(log, "Authentication for user {} succeeded.", user_name);
 }

 void MySQLHandler::comInitDB(ReadBuffer & payload)
 {
    String database;
    readStringUntilEOF(database, payload);
-    LOG_DEBUG(log, "Setting current database to " << database);
+    LOG_DEBUG(log, "Setting current database to {}", database);
    connection_context.setCurrentDatabase(database);
    packet_sender->sendPacket(OK_Packet(0, client_capability_flags, 0, 0, 1), true);
 }
--- a/programs/server/MySQLHandlerFactory.cpp
+++ b/programs/server/MySQLHandlerFactory.cpp
@ -32,7 +32,7 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
    }
    catch (...)
    {
-        LOG_TRACE(log, "Failed to create SSL context. SSL will be disabled. Error: " << getCurrentExceptionMessage(false));
+        LOG_TRACE(log, "Failed to create SSL context. SSL will be disabled. Error: {}", getCurrentExceptionMessage(false));
        ssl_enabled = false;
    }

@ -43,7 +43,7 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
    }
    catch (...)
    {
-        LOG_TRACE(log, "Failed to read RSA key pair from server certificate. Error: " << getCurrentExceptionMessage(false));
+        LOG_TRACE(log, "Failed to read RSA key pair from server certificate. Error: {}", getCurrentExceptionMessage(false));
        generateRSAKeys();
    }
 #endif
@ -122,7 +122,7 @@ void MySQLHandlerFactory::generateRSAKeys()
 Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket)
 {
    size_t connection_id = last_connection_id++;
-    LOG_TRACE(log, "MySQL connection. Id: " << connection_id << ". Address: " << socket.peerAddress().toString());
+    LOG_TRACE(log, "MySQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString());
 #if USE_SSL
    return new MySQLHandlerSSL(server, socket, ssl_enabled, connection_id, *public_key, *private_key);
 #else
--- a/programs/server/ReplicasStatusHandler.cpp
+++ b/programs/server/ReplicasStatusHandler.cpp
@ -46,7 +46,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request

            for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next())
            {
-                auto & table = iterator->table();
+                const auto & table = iterator->table();
                StorageReplicatedMergeTree * table_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get());

                if (!table_replicated)
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -91,7 +91,7 @@ namespace

 void setupTmpPath(Logger * log, const std::string & path)
 {
-    LOG_DEBUG(log, "Setting up " << path << " to store temporary data in it");
+    LOG_DEBUG(log, "Setting up {} to store temporary data in it", path);

    Poco::File(path).createDirectories();

@ -101,11 +101,11 @@ void setupTmpPath(Logger * log, const std::string & path)
    {
        if (it->isFile() && startsWith(it.name(), "tmp"))
        {
-            LOG_DEBUG(log, "Removing old temporary file " << it->path());
+            LOG_DEBUG(log, "Removing old temporary file {}", it->path());
            it->remove();
        }
        else
-            LOG_DEBUG(log, "Skipped file in temporary path " << it->path());
+            LOG_DEBUG(log, "Skipped file in temporary path {}", it->path());
    }
 }

@ -276,7 +276,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
            {
                LOG_TRACE(log, "Will mlockall to prevent executable memory from being paged out. It may take a few seconds.");
                if (0 != mlockall(MCL_CURRENT))
-                    LOG_WARNING(log, "Failed mlockall: " + errnoToString(ErrorCodes::SYSTEM_ERROR));
+                    LOG_WARNING(log, "Failed mlockall: {}", errnoToString(ErrorCodes::SYSTEM_ERROR));
                else
                    LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed");
            }
@ -284,8 +284,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
            {
                LOG_INFO(log, "It looks like the process has no CAP_IPC_LOCK capability, binary mlock will be disabled."
                    " It could happen due to incorrect ClickHouse package installation."
-                    " You could resolve the problem manually with 'sudo setcap cap_ipc_lock=+ep " << executable_path << "'."
-                    " Note that it will not work on 'nosuid' mounted filesystems.");
+                    " You could resolve the problem manually with 'sudo setcap cap_ipc_lock=+ep {}'."
+                    " Note that it will not work on 'nosuid' mounted filesystems.", executable_path);
            }
        }
    }
@ -349,7 +349,7 @@ int Server::main(const std::vector<std::string> & /*args*/)

        if (rlim.rlim_cur == rlim.rlim_max)
        {
-            LOG_DEBUG(log, "rlimit on number of file descriptors is " << rlim.rlim_cur);
+            LOG_DEBUG(log, "rlimit on number of file descriptors is {}", rlim.rlim_cur);
        }
        else
        {
@ -357,12 +357,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
            rlim.rlim_cur = config().getUInt("max_open_files", rlim.rlim_max);
            int rc = setrlimit(RLIMIT_NOFILE, &rlim);
            if (rc != 0)
-                LOG_WARNING(log,
-                    "Cannot set max number of file descriptors to " << rlim.rlim_cur
-                        << ". Try to specify max_open_files according to your system limits. error: "
-                        << strerror(errno));
+                LOG_WARNING(log, "Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, strerror(errno));
            else
-                LOG_DEBUG(log, "Set max number of file descriptors to " << rlim.rlim_cur << " (was " << old << ").");
+                LOG_DEBUG(log, "Set max number of file descriptors to {} (was {}).", rlim.rlim_cur, old);
        }
    }

@ -372,7 +369,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
    /// Initialize DateLUT early, to not interfere with running time of first query.
    LOG_DEBUG(log, "Initializing DateLUT.");
    DateLUT::instance();
-    LOG_TRACE(log, "Initialized DateLUT with time zone '" << DateLUT::instance().getTimeZone() << "'.");
+    LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());


    /// Storage with temporary data for processing of heavy queries.
@ -431,9 +428,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
            if (this_host.empty())
            {
                this_host = getFQDNOrHostName();
-                LOG_DEBUG(log,
-                    "Configuration parameter '" + String(host_tag) + "' doesn't exist or exists and empty. Will use '" + this_host
-                        + "' as replica host.");
+                LOG_DEBUG(log, "Configuration parameter '{}' doesn't exist or exists and empty. Will use '{}' as replica host.",
+                    host_tag, this_host);
            }

            String port_str = config().getString(port_tag);
@ -538,8 +534,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
    if (uncompressed_cache_size > max_cache_size)
    {
        uncompressed_cache_size = max_cache_size;
-        LOG_INFO(log, "Uncompressed cache size was lowered to " << formatReadableSizeWithBinarySuffix(uncompressed_cache_size)
-            << " because the system has low amount of memory");
+        LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
    }
    global_context->setUncompressedCache(uncompressed_cache_size);

@ -554,8 +549,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
    if (mark_cache_size > max_cache_size)
    {
        mark_cache_size = max_cache_size;
-        LOG_INFO(log, "Mark cache size was lowered to " << formatReadableSizeWithBinarySuffix(uncompressed_cache_size)
-            << " because the system has low amount of memory");
+        LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
    }
    global_context->setMarkCache(mark_cache_size);

@ -579,20 +573,19 @@ int Server::main(const std::vector<std::string> & /*args*/)
    if (max_server_memory_usage == 0)
    {
        max_server_memory_usage = default_max_server_memory_usage;
-        LOG_INFO(log, "Setting max_server_memory_usage was set to " << formatReadableSizeWithBinarySuffix(max_server_memory_usage));
+        LOG_INFO(log, "Setting max_server_memory_usage was set to {}", formatReadableSizeWithBinarySuffix(max_server_memory_usage));
    }
    else if (max_server_memory_usage > default_max_server_memory_usage)
    {
        max_server_memory_usage = default_max_server_memory_usage;
-        LOG_INFO(log, "Setting max_server_memory_usage was lowered to " << formatReadableSizeWithBinarySuffix(max_server_memory_usage)
-            << " because the system has low amount of memory");
+        LOG_INFO(log, "Setting max_server_memory_usage was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(max_server_memory_usage));
    }

    total_memory_tracker.setOrRaiseHardLimit(max_server_memory_usage);
    total_memory_tracker.setDescription("(total)");
    total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);

-    LOG_INFO(log, "Loading metadata from " + path);
+    LOG_INFO(log, "Loading metadata from {}", path);

    try
    {
@ -694,17 +687,19 @@ int Server::main(const std::vector<std::string> & /*args*/)
            " neither clickhouse-server process has CAP_NET_ADMIN capability."
            " 'taskstats' performance statistics will be disabled."
            " It could happen due to incorrect ClickHouse package installation."
-            " You can try to resolve the problem manually with 'sudo setcap cap_net_admin=+ep " << executable_path << "'."
+            " You can try to resolve the problem manually with 'sudo setcap cap_net_admin=+ep {}'."
            " Note that it will not work on 'nosuid' mounted filesystems."
-            " It also doesn't work if you run clickhouse-server inside network namespace as it happens in some containers.");
+            " It also doesn't work if you run clickhouse-server inside network namespace as it happens in some containers.",
+            executable_path);
    }

    if (!hasLinuxCapability(CAP_SYS_NICE))
    {
        LOG_INFO(log, "It looks like the process has no CAP_SYS_NICE capability, the setting 'os_thread_nice' will have no effect."
            " It could happen due to incorrect ClickHouse package installation."
-            " You could resolve the problem manually with 'sudo setcap cap_sys_nice=+ep " << executable_path << "'."
-            " Note that it will not work on 'nosuid' mounted filesystems.");
+            " You could resolve the problem manually with 'sudo setcap cap_sys_nice=+ep {}'."
+            " Note that it will not work on 'nosuid' mounted filesystems.",
+            executable_path);
    }
 #else
    LOG_INFO(log, "TaskStats is not implemented for this OS. IO accounting will be disabled.");
@ -746,11 +741,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
 #endif
                    )
                {
-                    LOG_ERROR(log,
-                        "Cannot resolve listen_host (" << host << "), error " << e.code() << ": " << e.message() << ". "
+                    LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. "
                        "If it is an IPv6 address and your host has disabled IPv6, then consider to "
                        "specify IPv4 address to listen in <listen_host> element of configuration "
-                        "file. Example: <listen_host>0.0.0.0</listen_host>");
+                        "file. Example: <listen_host>0.0.0.0</listen_host>",
+                        host, e.code(), e.message());
                }

                throw;
@ -802,11 +797,11 @@ int Server::main(const std::vector<std::string> & /*args*/)

                    if (listen_try)
                    {
-                        LOG_ERROR(log, message
-                            << ". If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to "
+                        LOG_ERROR(log, "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to "
                            "specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
                            "file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
-                            " Example for disabled IPv4: <listen_host>::</listen_host>");
+                            " Example for disabled IPv4: <listen_host>::</listen_host>",
+                            message);
                    }
                    else
                    {
@ -826,7 +821,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
                    createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));

-                LOG_INFO(log, "Listening for http://" + address.toString());
+                LOG_INFO(log, "Listening for http://{}", address.toString());
            });

            /// HTTPS
@ -840,7 +835,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
                    createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));

-                LOG_INFO(log, "Listening for https://" + address.toString());
+                LOG_INFO(log, "Listening for https://{}", address.toString());
 #else
                UNUSED(port);
                throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.",
@ -861,7 +856,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                    socket,
                    new Poco::Net::TCPServerParams));

-                LOG_INFO(log, "Listening for connections with native protocol (tcp): " + address.toString());
+                LOG_INFO(log, "Listening for connections with native protocol (tcp): {}", address.toString());
            });

            /// TCP with SSL
@ -877,7 +872,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                    server_pool,
                    socket,
                    new Poco::Net::TCPServerParams));
-                LOG_INFO(log, "Listening for connections with secure native protocol (tcp_secure): " + address.toString());
+                LOG_INFO(log, "Listening for connections with secure native protocol (tcp_secure): {}", address.toString());
 #else
                UNUSED(port);
                throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
@ -895,7 +890,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
                    createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, http_params));

-                LOG_INFO(log, "Listening for replica communication (interserver): http://" + address.toString());
+                LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString());
            });

            create_server("interserver_https_port", [&](UInt16 port)
@ -908,7 +903,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
                    createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, http_params));

-                LOG_INFO(log, "Listening for secure replica communication (interserver): https://" + address.toString());
+                LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString());
 #else
                UNUSED(port);
                throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
@ -928,7 +923,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                    socket,
                    new Poco::Net::TCPServerParams));

-                LOG_INFO(log, "Listening for MySQL compatibility protocol: " + address.toString());
+                LOG_INFO(log, "Listening for MySQL compatibility protocol: {}", address.toString());
            });

            /// Prometheus (if defined and not setup yet with http_port)
@ -941,7 +936,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
                    createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));

-                LOG_INFO(log, "Listening for Prometheus: http://" + address.toString());
+                LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString());
            });
        }

@ -966,12 +961,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
            dns_cache_updater->start();

        {
-            std::stringstream message;
-            message << "Available RAM: " << formatReadableSizeWithBinarySuffix(memory_amount) << ";"
-                << " physical cores: " << getNumberOfPhysicalCPUCores() << ";"
-                // on ARM processors it can show only enabled at current moment cores
-                << " logical cores: " << std::thread::hardware_concurrency() << ".";
-            LOG_INFO(log, message.str());
+            LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",
+                formatReadableSizeWithBinarySuffix(memory_amount),
+                getNumberOfPhysicalCPUCores(),  // on ARM processors it can show only enabled at current moment cores
+                std::thread::hardware_concurrency());
        }

        LOG_INFO(log, "Ready for connections.");
@ -989,9 +982,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
                current_connections += server->currentConnections();
            }

-            LOG_INFO(log,
-                "Closed all listening sockets."
-                    << (current_connections ? " Waiting for " + toString(current_connections) + " outstanding connections." : ""));
+            if (current_connections)
+                LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
+            else
+                LOG_INFO(log, "Closed all listening sockets.");

            /// Killing remaining queries.
            global_context->getProcessList().killAllQueries();
@ -1013,9 +1007,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
                }
            }

-            LOG_INFO(
-                log, "Closed connections." << (current_connections ? " But " + toString(current_connections) + " remains."
-                    " Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>" : ""));
+            if (current_connections)
+                LOG_INFO(log, "Closed connections. But {} remain."
+                    " Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>", current_connections);
+            else
+                LOG_INFO(log, "Closed connections.");

            dns_cache_updater.reset();
            main_config_reloader.reset();
--- a/programs/server/TCPHandler.cpp
+++ b/programs/server/TCPHandler.cpp
@ -28,7 +28,7 @@
 #include <Compression/CompressionFactory.h>
 #include <common/logger_useful.h>

-#include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/Executors/PullingAsyncPipelineExecutor.h>

 #include "TCPHandler.h"

@ -115,8 +115,7 @@ void TCPHandler::runImpl()
        if (!DatabaseCatalog::instance().isDatabaseExist(default_database))
        {
            Exception e("Database " + backQuote(default_database) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE);
-            LOG_ERROR(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText()
-                << ", Stack trace:\n\n" << e.getStackTraceString());
+            LOG_ERROR(log, "Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString());
            sendException(e, connection_context.getSettingsRef().calculate_text_stack_trace);
            return;
        }
@ -278,8 +277,11 @@ void TCPHandler::runImpl()
            sendLogs();
            sendEndOfStream();

-            query_scope.reset();
+            /// QueryState should be cleared before QueryScope, since otherwise
+            /// the MemoryTracker will be wrong for possible deallocations.
+            /// (i.e. deallocations from the Aggregator with two-level aggregation)
            state.reset();
+            query_scope.reset();
        }
        catch (const Exception & e)
        {
@ -359,8 +361,11 @@ void TCPHandler::runImpl()

        try
        {
-            query_scope.reset();
+            /// QueryState should be cleared before QueryScope, since otherwise
+            /// the MemoryTracker will be wrong for possible deallocations.
+            /// (i.e. deallocations from the Aggregator with two-level aggregation)
            state.reset();
+            query_scope.reset();
        }
        catch (...)
        {
@ -373,8 +378,7 @@ void TCPHandler::runImpl()

        watch.stop();

-        LOG_INFO(log, std::fixed << std::setprecision(3)
-            << "Processed in " << watch.elapsedSeconds() << " sec.");
+        LOG_INFO(log, "Processed in {} sec.", watch.elapsedSeconds());

        /// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
        query_context.reset();
@ -560,7 +564,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
    }

    {
-        PullingPipelineExecutor executor(pipeline);
+        PullingAsyncPipelineExecutor executor(pipeline);
        CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};

        Block block;
@ -726,14 +730,12 @@ void TCPHandler::receiveHello()
    readStringBinary(user, *in);
    readStringBinary(password, *in);

-    LOG_DEBUG(log, "Connected " << client_name
-        << " version " << client_version_major
-        << "." << client_version_minor
-        << "." << client_version_patch
-        << ", revision: " << client_revision
-        << (!default_database.empty() ? ", database: " + default_database : "")
-        << (!user.empty() ? ", user: " + user : "")
-        << ".");
+    LOG_DEBUG(log, "Connected {} version {}.{}.{}, revision: {}{}{}.",
+        client_name,
+        client_version_major, client_version_minor, client_version_patch,
+        client_revision,
+        (!default_database.empty() ? ", database: " + default_database : ""),
+        (!user.empty() ? ", user: " + user : ""));

    connection_context.setUser(user, password, socket().peerAddress());
 }
@ -1199,8 +1201,7 @@ void TCPHandler::run()
        /// Timeout - not an error.
        if (!strcmp(e.what(), "Timeout"))
        {
-            LOG_DEBUG(log, "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
-                << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what());
+            LOG_DEBUG(log, "Poco::Exception. Code: {}, e.code() = {}, e.displayText() = {}, e.what() = {}", ErrorCodes::POCO_EXCEPTION, e.code(), e.displayText(), e.what());
        }
        else
            throw;
--- a/programs/server/TCPHandlerFactory.h
+++ b/programs/server/TCPHandlerFactory.h
@ -35,7 +35,7 @@ public:
    {
        try
        {
-            LOG_TRACE(log, "TCP Request. Address: " << socket.peerAddress().toString());
+            LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
            return new TCPHandler(server, socket);
        }
        catch (const Poco::Net::NetException &)
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -227,7 +227,7 @@
          and to prevent clickhouse executable from being paged out under high IO load.
         Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
    -->
-    <mlock_executable>false</mlock_executable>
+    <mlock_executable>true</mlock_executable>

    <!-- Configuration of clusters that could be used in Distributed tables.
         https://clickhouse.tech/docs/en/operations/table_engines/distributed/
@ -405,6 +405,9 @@
    </prometheus>
    -->

+    <!-- Lazy system.*_log table creation -->
+    <!-- <system_tables_lazy_load>false</system_tables_lazy_load> -->
+
    <!-- Query log. Used only for queries with setting log_queries = 1. -->
    <query_log>
        <!-- What table to insert data. If table is not exist, it will be created.
--- a/src/Access/AccessRights.cpp
+++ b/src/Access/AccessRights.cpp
@ -251,12 +251,11 @@ public:

    void logTree(Poco::Logger * log) const
    {
-        LOG_TRACE(log, "Tree(" << level << "): name=" << (node_name ? *node_name : "NULL")
-                  << ", access=" << access.toString()
-                  << ", final_access=" << final_access.toString()
-                  << ", min_access=" << min_access.toString()
-                  << ", max_access=" << max_access.toString()
-                  << ", num_children=" << (children ? children->size() : 0));
+        LOG_TRACE(log, "Tree({}): name={}, access={}, final_access={}, min_access={}, max_access={}, num_children={}",
+            level, node_name ? *node_name : "NULL", access.toString(),
+            final_access.toString(), min_access.toString(), max_access.toString(),
+            (children ? children->size() : 0));
+
        if (children)
        {
            for (auto & child : *children | boost::adaptors::map_values)
--- a/src/Access/AllowedClientHosts.cpp
+++ b/src/Access/AllowedClientHosts.cpp
@ -310,8 +310,8 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const
            /// Try to ignore DNS errors: if host cannot be resolved, skip it and try next.
            LOG_WARNING(
                &Logger::get("AddressPatterns"),
-                "Failed to check if the allowed client hosts contain address " << client_address.toString() << ". " << e.displayText()
-                                                                               << ", code = " << e.code());
+                "Failed to check if the allowed client hosts contain address {}. {}, code = {}",
+                client_address.toString(), e.displayText(), e.code());
            return false;
        }
    };
@ -343,8 +343,8 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const
            /// Try to ignore DNS errors: if host cannot be resolved, skip it and try next.
            LOG_WARNING(
                &Logger::get("AddressPatterns"),
-                "Failed to check if the allowed client hosts contain address " << client_address.toString() << ". " << e.displayText()
-                                                                             << ", code = " << e.code());
+                "Failed to check if the allowed client hosts contain address {}. {}, code = {}",
+                client_address.toString(), e.displayText(), e.code());
            return false;
        }
    };
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@ -200,7 +200,7 @@ bool ContextAccess::calculateResultAccessAndCheck(Poco::Logger * log_, const Acc
    bool is_granted = access->isGranted(flags, args...);

    if (trace_log)
-        LOG_TRACE(trace_log, "Access " << (is_granted ? "granted" : "denied") << ": " << (AccessRightsElement{flags, args...}.toString()));
+        LOG_TRACE(trace_log, "Access {}: {}", (is_granted ? "granted" : "denied"), (AccessRightsElement{flags, args...}.toString()));

    if (is_granted)
        return true;
@ -219,7 +219,7 @@ bool ContextAccess::calculateResultAccessAndCheck(Poco::Logger * log_, const Acc
        if constexpr (mode == THROW_IF_ACCESS_DENIED)
            throw Exception(user_name + ": " + msg, error_code);
        else if constexpr (mode == LOG_WARNING_IF_ACCESS_DENIED)
-            LOG_WARNING(log_, user_name + ": " + msg + formatSkippedMessage(args...));
+            LOG_WARNING(log_, "{}: {}{}", user_name, msg, formatSkippedMessage(args...));
    };

    if (!user)
@ -451,15 +451,18 @@ boost::shared_ptr<const AccessRights> ContextAccess::calculateResultAccess(bool

    if (trace_log && (params.readonly == readonly_) && (params.allow_ddl == allow_ddl_) && (params.allow_introspection == allow_introspection_))
    {
-        LOG_TRACE(trace_log, "List of all grants: " << merged_access->toString() << (grant_option ? " WITH GRANT OPTION" : ""));
+        if (grant_option)
+            LOG_TRACE(trace_log, "List of all grants: {} WITH GRANT OPTION", merged_access->toString());
+        else
+            LOG_TRACE(trace_log, "List of all grants: {}", merged_access->toString());
+
        if (roles_info && !roles_info->getCurrentRolesNames().empty())
        {
-            LOG_TRACE(
-                trace_log,
-                "Current_roles: " << boost::algorithm::join(roles_info->getCurrentRolesNames(), ", ")
-                                  << ", enabled_roles: " << boost::algorithm::join(roles_info->getEnabledRolesNames(), ", "));
+            LOG_TRACE(trace_log, "Current_roles: {}, enabled_roles: {}",
+                boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "),
+                boost::algorithm::join(roles_info->getEnabledRolesNames(), ", "));
        }
-        LOG_TRACE(trace_log, "Settings: readonly=" << readonly_ << ", allow_ddl=" << allow_ddl_ << ", allow_introspection_functions=" << allow_introspection_);
+        LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", readonly_, allow_ddl_, allow_introspection_);
    }

    res = std::move(merged_access);
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@ -367,7 +367,7 @@ bool DiskAccessStorage::readLists()
        auto file_path = getListFilePath(directory_path, type);
        if (!std::filesystem::exists(file_path))
        {
-            LOG_WARNING(getLogger(), "File " + file_path.string() + " doesn't exist");
+            LOG_WARNING(getLogger(), "File {} doesn't exist", file_path.string());
            ok = false;
            break;
        }
@ -496,7 +496,7 @@ void DiskAccessStorage::listsWritingThreadFunc()
 /// and then saves the files "users.list", "roles.list", etc. to the same directory.
 bool DiskAccessStorage::rebuildLists()
 {
-    LOG_WARNING(getLogger(), "Recovering lists in directory " + directory_path);
+    LOG_WARNING(getLogger(), "Recovering lists in directory {}", directory_path);
    clear();

    for (const auto & directory_entry : std::filesystem::directory_iterator(directory_path))
--- a/src/AggregateFunctions/AggregateFunctionNull.h
+++ b/src/AggregateFunctions/AggregateFunctionNull.h
@ -4,6 +4,7 @@
 #include <AggregateFunctions/IAggregateFunction.h>
 #include <Columns/ColumnNullable.h>
 #include <Common/assert_cast.h>
+#include <Columns/ColumnsCommon.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
@ -53,13 +54,13 @@ protected:

    static void initFlag(AggregateDataPtr place) noexcept
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            place[0] = 0;
    }

    static void setFlag(AggregateDataPtr place) noexcept
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            place[0] = 1;
    }

@ -72,7 +73,7 @@ public:
    AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
        : IAggregateFunctionHelper<Derived>(arguments, params), nested_function{nested_function_}
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            prefix_size = nested_function->alignOfData();
        else
            prefix_size = 0;
@ -128,7 +129,7 @@ public:
    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
    {
        bool flag = getFlag(place);
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            writeBinary(flag, buf);
        if (flag)
            nested_function->serialize(nestedPlace(place), buf);
@ -137,7 +138,7 @@ public:
    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
    {
        bool flag = 1;
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            readBinary(flag, buf);
        if (flag)
        {
@ -148,7 +149,7 @@ public:

    void insertResultInto(AggregateDataPtr place, IColumn & to) const override
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
        {
            ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
            if (getFlag(place))
@ -194,13 +195,26 @@ public:
    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
    {
        const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
+        const IColumn * nested_column = &column->getNestedColumn();
        if (!column->isNullAt(row_num))
        {
            this->setFlag(place);
-            const IColumn * nested_column = &column->getNestedColumn();
            this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
        }
    }
+
+    void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
+    {
+        const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
+        const IColumn * nested_column = &column->getNestedColumn();
+        const UInt8 * null_map = column->getNullMapData().data();
+
+        this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), &nested_column, null_map, arena);
+
+        if constexpr (result_is_nullable)
+            if (!memoryIsByte(null_map, batch_size, 1))
+                this->setFlag(place);
+    }
 };


--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@ -20,11 +20,72 @@ struct AggregateFunctionSumData
 {
    T sum{};

-    void add(T value)
+    void ALWAYS_INLINE add(T value)
    {
        sum += value;
    }

+    /// Vectorized version
+    template <typename Value>
+    void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
+    {
+        /// Compiler cannot unroll this loop, do it manually.
+        /// (at least for floats, most likely due to the lack of -fassociative-math)
+
+        /// Something around the number of SSE registers * the number of elements fit in register.
+        constexpr size_t unroll_count = 128 / sizeof(T);
+        T partial_sums[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                partial_sums[i] += ptr[i];
+            ptr += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            sum += partial_sums[i];
+
+        while (ptr < end)
+        {
+            sum += *ptr;
+            ++ptr;
+        }
+    }
+
+    template <typename Value>
+    void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
+    {
+        constexpr size_t unroll_count = 128 / sizeof(T);
+        T partial_sums[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                if (!null_map[i])
+                    partial_sums[i] += ptr[i];
+            ptr += unroll_count;
+            null_map += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            sum += partial_sums[i];
+
+        while (ptr < end)
+        {
+            if (!*null_map)
+                sum += *ptr;
+            ++ptr;
+            ++null_map;
+        }
+    }
+
    void merge(const AggregateFunctionSumData & rhs)
    {
        sum += rhs.sum;
@ -55,21 +116,95 @@ struct AggregateFunctionSumKahanData
    T sum{};
    T compensation{};

-    void add(T value)
+    template <typename Value>
+    void ALWAYS_INLINE addImpl(Value value, T & out_sum, T & out_compensation)
    {
-        auto compensated_value = value - compensation;
-        auto new_sum = sum + compensated_value;
-        compensation = (new_sum - sum) - compensated_value;
-        sum = new_sum;
+        auto compensated_value = value - out_compensation;
+        auto new_sum = out_sum + compensated_value;
+        out_compensation = (new_sum - out_sum) - compensated_value;
+        out_sum = new_sum;
+    }
+
+    void ALWAYS_INLINE add(T value)
+    {
+        addImpl(value, sum, compensation);
+    }
+
+    /// Vectorized version
+    template <typename Value>
+    void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
+    {
+        /// Less than in ordinary sum, because the algorithm is more complicated and too large loop unrolling is questionable.
+        /// But this is just a guess.
+        constexpr size_t unroll_count = 4;
+        T partial_sums[unroll_count]{};
+        T partial_compensations[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
+            ptr += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
+
+        while (ptr < end)
+        {
+            addImpl(*ptr, sum, compensation);
+            ++ptr;
+        }
+    }
+
+    template <typename Value>
+    void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
+    {
+        constexpr size_t unroll_count = 4;
+        T partial_sums[unroll_count]{};
+        T partial_compensations[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                if (!null_map[i])
+                    addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
+            ptr += unroll_count;
+            null_map += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
+
+        while (ptr < end)
+        {
+            if (!*null_map)
+                addImpl(*ptr, sum, compensation);
+            ++ptr;
+            ++null_map;
+        }
+    }
+
+    void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation)
+    {
+        auto raw_sum = to_sum + from_sum;
+        auto rhs_compensated = raw_sum - to_sum;
+        /// Kahan summation is tricky because it depends on non-associativity of float arithmetic.
+        /// Do not simplify this expression if you are not sure.
+        auto compensations = ((from_sum - rhs_compensated) + (to_sum - (raw_sum - rhs_compensated))) + compensation + from_compensation;
+        to_sum = raw_sum + compensations;
+        to_compensation = compensations - (to_sum - raw_sum);
    }

    void merge(const AggregateFunctionSumKahanData & rhs)
    {
-        auto raw_sum = sum + rhs.sum;
-        auto rhs_compensated = raw_sum - sum;
-        auto compensations = ((rhs.sum - rhs_compensated) + (sum - (raw_sum - rhs_compensated))) + compensation + rhs.compensation;
-        sum = raw_sum + compensations;
-        compensation = compensations - (sum - raw_sum);
+        mergeImpl(sum, compensation, rhs.sum, rhs.compensation);
    }

    void write(WriteBuffer & buf) const
@ -141,6 +276,20 @@ public:
        this->data(place).add(column.getData()[row_num]);
    }

+    /// Vectorized version when there is no GROUP BY keys.
+    void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *) const override
+    {
+        const auto & column = static_cast<const ColVecType &>(*columns[0]);
+        this->data(place).addMany(column.getData().data(), batch_size);
+    }
+
+    void addBatchSinglePlaceNotNull(
+        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *) const override
+    {
+        const auto & column = static_cast<const ColVecType &>(*columns[0]);
+        this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size);
+    }
+
    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
    {
        this->data(place).merge(this->data(rhs));
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -145,6 +145,11 @@ public:
      */
    virtual void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;

+    /** The same for single place when need to aggregate only filtered data.
+      */
+    virtual void addBatchSinglePlaceNotNull(
+        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0;
+
    /** In addition to addBatch, this method collects multiple rows of arguments into array "places"
      *  as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
      *  -Array combinator. It might also be used generally to break data dependency when array
@ -201,6 +206,14 @@ public:
            static_cast<const Derived *>(this)->add(place, columns, i, arena);
    }

+    void addBatchSinglePlaceNotNull(
+        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const override
+    {
+        for (size_t i = 0; i < batch_size; ++i)
+            if (!null_map[i])
+                static_cast<const Derived *>(this)->add(place, columns, i, arena);
+    }
+
    void addBatchArray(
        size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
        const override
--- a/src/AggregateFunctions/QuantileTDigest.h
+++ b/src/AggregateFunctions/QuantileTDigest.h
@ -103,6 +103,7 @@ class QuantileTDigest
    struct RadixSortTraits
    {
        using Element = Centroid;
+        using Result = Element;
        using Key = Value;
        using CountType = UInt32;
        using KeyBits = UInt32;
@ -114,6 +115,7 @@ class QuantileTDigest

        /// The function to get the key from an array element.
        static Key & extractKey(Element & elem) { return elem.mean; }
+        static Result & extractResult(Element & elem) { return elem; }
    };

    /** Adds a centroid `c` to the digest
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@ -61,11 +61,11 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
        if (connected)
            disconnect();

-        LOG_TRACE(log_wrapper.get(), "Connecting. Database: "
-            << (default_database.empty() ? "(not specified)" : default_database)
-            << ". User: " << user
-            << (static_cast<bool>(secure) ? ". Secure" : "")
-            << (static_cast<bool>(compression) ? "" : ". Uncompressed"));
+        LOG_TRACE(log_wrapper.get(), "Connecting. Database: {}. User: {}{}{}",
+            default_database.empty() ? "(not specified)" : default_database,
+            user,
+            static_cast<bool>(secure) ? ". Secure" : "",
+            static_cast<bool>(compression) ? "" : ". Uncompressed");

        if (static_cast<bool>(secure))
        {
@ -107,11 +107,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
        sendHello();
        receiveHello();

-        LOG_TRACE(log_wrapper.get(), "Connected to " << server_name
-            << " server version " << server_version_major
-            << "." << server_version_minor
-            << "." << server_version_patch
-            << ".");
+        LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.",
+            server_name, server_version_major, server_version_minor, server_version_patch);
    }
    catch (Poco::Net::NetException & e)
    {
@ -132,8 +129,6 @@ void Connection::connect(const ConnectionTimeouts & timeouts)

 void Connection::disconnect()
 {
-    //LOG_TRACE(log_wrapper.get(), "Disconnecting");
-
    in = nullptr;
    last_input_packet_type.reset();
    out = nullptr; // can write to socket
@ -186,8 +181,6 @@ void Connection::sendHello()

 void Connection::receiveHello()
 {
-    //LOG_TRACE(log_wrapper.get(), "Receiving hello");
-
    /// Receive hello packet.
    UInt64 packet_type = 0;

@ -391,8 +384,6 @@ void Connection::sendQuery(

    query_id = query_id_;

-    //LOG_TRACE(log_wrapper.get(), "Sending query");
-
    writeVarUInt(Protocol::Client::Query, *out);
    writeStringBinary(query_id, *out);

@ -441,8 +432,6 @@ void Connection::sendCancel()
    if (!out)
        return;

-    //LOG_TRACE(log_wrapper.get(), "Sending cancel");
-
    writeVarUInt(Protocol::Client::Cancel, *out);
    out->next();
 }
@ -450,8 +439,6 @@ void Connection::sendCancel()

 void Connection::sendData(const Block & block, const String & name, bool scalar)
 {
-    //LOG_TRACE(log_wrapper.get(), "Sending data");
-
    if (!block_out)
    {
        if (compression == Protocol::Compression::Enable)
@ -516,19 +503,23 @@ void Connection::sendScalarsData(Scalars & data)
    maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
    double elapsed = watch.elapsedSeconds();

-    std::stringstream msg;
-    msg << std::fixed << std::setprecision(3);
-    msg << "Sent data for " << data.size() << " scalars, total " << rows << " rows in " << elapsed << " sec., "
-        << static_cast<size_t>(rows / watch.elapsedSeconds()) << " rows/sec., "
-        << maybe_compressed_out_bytes / 1048576.0 << " MiB (" << maybe_compressed_out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
-
    if (compression == Protocol::Compression::Enable)
-        msg << ", compressed " << static_cast<double>(maybe_compressed_out_bytes) / out_bytes << " times to "
-            << out_bytes / 1048576.0 << " MiB (" << out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
+        LOG_DEBUG(log_wrapper.get(),
+            "Sent data for {} scalars, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), compressed {} times to {} ({}/sec.)",
+            data.size(), rows, elapsed,
+            static_cast<size_t>(rows / watch.elapsedSeconds()),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()),
+            static_cast<double>(maybe_compressed_out_bytes) / out_bytes,
+            formatReadableSizeWithBinarySuffix(out_bytes),
+            formatReadableSizeWithBinarySuffix(out_bytes / watch.elapsedSeconds()));
    else
-        msg << ", no compression.";
-
-    LOG_DEBUG(log_wrapper.get(), msg.rdbuf());
+        LOG_DEBUG(log_wrapper.get(),
+            "Sent data for {} scalars, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), no compression.",
+            data.size(), rows, elapsed,
+            static_cast<size_t>(rows / watch.elapsedSeconds()),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()));
 }

 namespace
@ -616,19 +607,23 @@ void Connection::sendExternalTablesData(ExternalTablesData & data)
    maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
    double elapsed = watch.elapsedSeconds();

-    std::stringstream msg;
-    msg << std::fixed << std::setprecision(3);
-    msg << "Sent data for " << data.size() << " external tables, total " << rows << " rows in " << elapsed << " sec., "
-        << static_cast<size_t>(rows / watch.elapsedSeconds()) << " rows/sec., "
-        << maybe_compressed_out_bytes / 1048576.0 << " MiB (" << maybe_compressed_out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
-
    if (compression == Protocol::Compression::Enable)
-        msg << ", compressed " << static_cast<double>(maybe_compressed_out_bytes) / out_bytes << " times to "
-            << out_bytes / 1048576.0 << " MiB (" << out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
+        LOG_DEBUG(log_wrapper.get(),
+            "Sent data for {} external tables, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), compressed {} times to {} ({}/sec.)",
+            data.size(), rows, elapsed,
+            static_cast<size_t>(rows / watch.elapsedSeconds()),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()),
+            static_cast<double>(maybe_compressed_out_bytes) / out_bytes,
+            formatReadableSizeWithBinarySuffix(out_bytes),
+            formatReadableSizeWithBinarySuffix(out_bytes / watch.elapsedSeconds()));
    else
-        msg << ", no compression.";
-
-    LOG_DEBUG(log_wrapper.get(), msg.rdbuf());
+        LOG_DEBUG(log_wrapper.get(),
+            "Sent data for {} external tables, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), no compression.",
+            data.size(), rows, elapsed,
+            static_cast<size_t>(rows / watch.elapsedSeconds()),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
+            formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()));
 }

 std::optional<Poco::Net::SocketAddress> Connection::getResolvedAddress() const
@ -682,12 +677,9 @@ Packet Connection::receivePacket()
        }
        else
        {
-            //LOG_TRACE(log_wrapper.get(), "Receiving packet type");
            readVarUInt(res.type, *in);
        }

-        //LOG_TRACE(log_wrapper.get(), "Receiving packet " << res.type << " " << Protocol::Server::toString(res.type));
-        //std::cerr << "Client got packet: " << Protocol::Server::toString(res.type) << "\n";
        switch (res.type)
        {
            case Protocol::Server::Data: [[fallthrough]];
@ -740,8 +732,6 @@ Packet Connection::receivePacket()

 Block Connection::receiveData()
 {
-    //LOG_TRACE(log_wrapper.get(), "Receiving data");
-
    initBlockInput();
    return receiveDataImpl(block_in);
 }
@ -820,8 +810,6 @@ void Connection::setDescription()

 std::unique_ptr<Exception> Connection::receiveException()
 {
-    //LOG_TRACE(log_wrapper.get(), "Receiving exception");
-
    return std::make_unique<Exception>(readException(*in, "Received from " + getDescription()));
 }

@ -838,8 +826,6 @@ std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)

 Progress Connection::receiveProgress()
 {
-    //LOG_TRACE(log_wrapper.get(), "Receiving progress");
-
    Progress progress;
    progress.read(*in, server_revision);
    return progress;
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@ -50,6 +50,8 @@ class Connection;
 using ConnectionPtr = std::shared_ptr<Connection>;
 using Connections = std::vector<ConnectionPtr>;

+using Scalars = std::map<String, Block>;
+

 /// Packet that could be received from server.
 struct Packet
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@ -222,8 +222,8 @@ ConnectionPoolWithFailover::tryGetEntry(
        auto table_status_it = status_response.table_states_by_id.find(*table_to_check);
        if (table_status_it == status_response.table_states_by_id.end())
        {
-            fail_message = "There is no table " + table_to_check->database + "." + table_to_check->table
-                + " on server: " + result.entry->getDescription();
+            const char * message_pattern = "There is no table {}.{} on server: {}";
+            fail_message = fmt::format(message_pattern, backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription());
            LOG_WARNING(log, fail_message);
            ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable);

@ -248,10 +248,7 @@ ConnectionPoolWithFailover::tryGetEntry(
            result.is_up_to_date = false;
            result.staleness = delay;

-            LOG_TRACE(
-                    log, "Server " << result.entry->getDescription() << " has unacceptable replica delay "
-                    << "for table " << table_to_check->database << "." << table_to_check->table
-                    << ": " << delay);
+            LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
            ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
        }
    }
--- a/src/Client/TimeoutSetter.cpp
+++ b/src/Client/TimeoutSetter.cpp
@ -35,7 +35,7 @@ TimeoutSetter::~TimeoutSetter()
    catch (std::exception & e)
    {
        // Sometimes catched on macos
-        LOG_ERROR(&Logger::get("Client"), std::string{"TimeoutSetter: Can't reset timeouts: "} + e.what());
+        LOG_ERROR(&Logger::get("Client"), "TimeoutSetter: Can't reset timeouts: {}", e.what());
    }
 }
 }
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@ -17,6 +17,7 @@
 #include <DataStreams/ColumnGathererStream.h>
 #include <ext/bit_cast.h>
 #include <pdqsort.h>
+#include <numeric>

 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
@ -117,7 +118,10 @@ namespace
    struct RadixSortTraits : RadixSortNumTraits<T>
    {
        using Element = ValueWithIndex<T>;
+        using Result = size_t;
+
        static T & extractKey(Element & elem) { return elem.value; }
+        static size_t extractResult(Element & elem) { return elem.index; }
    };
 }

@ -179,53 +183,27 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
                for (UInt32 i = 0; i < UInt32(s); ++i)
                    pairs[i] = {data[i], i};

-                RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s);
+                RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s, reverse, res.data());

                /// Radix sort treats all NaNs to be greater than all numbers.
                /// If the user needs the opposite, we must move them accordingly.
-                size_t nans_to_move = 0;
                if (std::is_floating_point_v<T> && nan_direction_hint < 0)
                {
-                    for (ssize_t i = s - 1; i >= 0; --i)
+                    size_t nans_to_move = 0;
+
+                    for (size_t i = 0; i < s; ++i)
                    {
-                        if (isNaN(pairs[i].value))
+                        if (isNaN(data[res[reverse ? i : s - 1 - i]]))
                            ++nans_to_move;
                        else
                            break;
                    }
-                }

-                if (reverse)
-                {
                    if (nans_to_move)
                    {
-                        for (size_t i = 0; i < s - nans_to_move; ++i)
-                            res[i] = pairs[s - nans_to_move - 1 - i].index;
-                        for (size_t i = s - nans_to_move; i < s; ++i)
-                            res[i] = pairs[s - 1 - (i - (s - nans_to_move))].index;
-                    }
-                    else
-                    {
-                        for (size_t i = 0; i < s; ++i)
-                            res[s - 1 - i] = pairs[i].index;
+                        std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : s - nans_to_move), std::end(res));
                    }
                }
-                else
-                {
-                    if (nans_to_move)
-                    {
-                        for (size_t i = 0; i < nans_to_move; ++i)
-                            res[i] = pairs[i + s - nans_to_move].index;
-                        for (size_t i = nans_to_move; i < s; ++i)
-                            res[i] = pairs[i - nans_to_move].index;
-                    }
-                    else
-                    {
-                        for (size_t i = 0; i < s; ++i)
-                            res[i] = pairs[i].index;
-                    }
-                }
-
                return;
            }
        }
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@ -5,6 +5,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <algorithm>
+#include <sstream>
 #include <iostream>
 #include <functional>
 #include <Poco/DOM/Text.h>
@ -303,7 +304,7 @@ void ConfigProcessor::doIncludesRecursive(
            else if (throw_on_bad_incl)
                throw Poco::Exception(error_msg + name);
            else
-                LOG_WARNING(log, error_msg << name);
+                LOG_WARNING(log, "{}{}", error_msg, name);
        }
        else
        {
@ -440,7 +441,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
    zkutil::ZooKeeperNodeCache * zk_node_cache,
    const zkutil::EventPtr & zk_changed_event)
 {
-    LOG_DEBUG(log, "Processing configuration file '" + path + "'.");
+    LOG_DEBUG(log, "Processing configuration file '{}'.", path);

    XMLDocumentPtr config = dom_parser.parse(path);

@ -451,7 +452,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
    {
        try
        {
-            LOG_DEBUG(log, "Merging configuration file '" + merge_file + "'.");
+            LOG_DEBUG(log, "Merging configuration file '{}'.", merge_file);

            XMLDocumentPtr with = dom_parser.parse(merge_file);
            merge(config, with);
@ -488,7 +489,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
        }
        if (!include_from_path.empty())
        {
-            LOG_DEBUG(log, "Including configuration file '" + include_from_path + "'.");
+            LOG_DEBUG(log, "Including configuration file '{}'.", include_from_path);

            contributing_files.push_back(include_from_path);
            include_from = dom_parser.parse(include_from_path);
@ -568,10 +569,7 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
        if (!zk_exception)
            throw;

-        LOG_WARNING(
-                log,
-                "Error while processing from_zk config includes: " + zk_exception->message() +
-                ". Config will be loaded from preprocessed file: " + preprocessed_path);
+        LOG_WARNING(log, "Error while processing from_zk config includes: {}. Config will be loaded from preprocessed file: {}", zk_exception->message(), preprocessed_path);

        config_xml = dom_parser.parse(preprocessed_path);
    }
@ -619,11 +617,11 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
                Poco::File(preprocessed_path_parent).createDirectories();
        }
        DOMWriter().writeNode(preprocessed_path, loaded_config.preprocessed_xml);
-        LOG_DEBUG(log, "Saved preprocessed configuration to '" << preprocessed_path << "'.");
+        LOG_DEBUG(log, "Saved preprocessed configuration to '{}'.", preprocessed_path);
    }
    catch (Poco::Exception & e)
    {
-        LOG_WARNING(log, "Couldn't save preprocessed config to " << preprocessed_path << ": " << e.displayText());
+        LOG_WARNING(log, "Couldn't save preprocessed config to {}: {}", preprocessed_path, e.displayText());
    }
 }

--- a/src/Common/Config/ConfigReloader.cpp
+++ b/src/Common/Config/ConfigReloader.cpp
@ -87,7 +87,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac
        ConfigProcessor::LoadedConfig loaded_config;
        try
        {
-            LOG_DEBUG(log, "Loading config '" << path << "'");
+            LOG_DEBUG(log, "Loading config '{}'", path);

            loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true);
            if (loaded_config.has_zk_includes)
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@ -202,7 +202,7 @@ bool DNSResolver::updateCache()
    }

    if (!lost_hosts.empty())
-        LOG_INFO(&Logger::get("DNSResolver"), "Cached hosts not found: " << lost_hosts);
+        LOG_INFO(&Logger::get("DNSResolver"), "Cached hosts not found: {}", lost_hosts);

    return updated;
 }
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@ -36,7 +36,7 @@ Exception::Exception(const std::string & msg, int code)
 #ifndef NDEBUG
    if (code == ErrorCodes::LOGICAL_ERROR)
    {
-        LOG_ERROR(&Poco::Logger::root(), "Logical error: '" + msg + "'.");
+        LOG_ERROR(&Poco::Logger::root(), "Logical error: '{}'.", msg);
        assert(false);
    }
 #endif
@ -125,7 +125,10 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_
 {
    try
    {
-        LOG_ERROR(logger, start_of_message << (start_of_message.empty() ? "" : ": ") << getCurrentExceptionMessage(true));
+        if (start_of_message.empty())
+            LOG_ERROR(logger, "{}", getCurrentExceptionMessage(true));
+        else
+            LOG_ERROR(logger, "{}: {}", start_of_message, getCurrentExceptionMessage(true));
    }
    catch (...)
    {
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 297c3b2ed551a4989826fc8c4780bf533e964bd9`