Merge branch 'master' into merging_external_source_cassandra

2024-11-21 15:12:02 +00:00 · 2020-05-22 17:27:40 +03:00 · 2020-05-22 17:27:40 +03:00 · ac54d470da
commit ac54d470da
parent e6ca09e134 27f365dc10
244 changed files with 3047 additions and 1000 deletions
--- a/.arcignore
+++ b/.arcignore
@ -0,0 +1,12 @@
+# .arcignore is the same as .gitignore but for Arc VCS.
+# Arc VCS is a proprietary VCS in Yandex that is very similar to Git
+# from the user perspective but with the following differences:
+# 1. Data is stored in distributed object storage.
+# 2. Local copy works via FUSE without downloading all the objects.
+# For this reason, it is better suited for huge monorepositories that can be found in large companies (e.g. Yandex, Google).
+# As ClickHouse developers, we don't use Arc as a VCS (we use Git).
+# But the ClickHouse source code is also mirrored into internal monorepository and our collegues are using Arc.
+# You can read more about Arc here: https://habr.com/en/company/yandex/blog/482926/
+
+# Repository is synchronized without 3rd-party submodules.
+contrib
--- a/.clang-tidy
+++ b/.clang-tidy
@ -9,7 +9,7 @@ Checks: '-*,
    misc-unused-alias-decls,
    misc-unused-parameters,
    misc-unused-using-decls,
-    
+
    modernize-avoid-bind,
    modernize-loop-convert,
    modernize-make-shared,
@ -33,7 +33,7 @@ Checks: '-*,
    performance-no-automatic-move,
    performance-trivially-destructible,
    performance-unnecessary-copy-initialization,
-    
+
    readability-avoid-const-params-in-decls,
    readability-const-return-type,
    readability-container-size-empty,
@ -58,7 +58,7 @@ Checks: '-*,
    readability-simplify-boolean-expr,
    readability-inconsistent-declaration-parameter-name,
    readability-identifier-naming,
-    
+
    bugprone-undelegated-constructor,
    bugprone-argument-comment,
    bugprone-bad-signal-to-kill-thread,
@ -102,7 +102,7 @@ Checks: '-*,
    bugprone-unused-return-value,
    bugprone-use-after-move,
    bugprone-virtual-near-miss,
-    
+
    cert-dcl21-cpp,
    cert-dcl50-cpp,
    cert-env33-c,
@ -112,7 +112,7 @@ Checks: '-*,
    cert-mem57-cpp,
    cert-msc50-cpp,
    cert-oop58-cpp,
-    
+
    google-build-explicit-make-pair,
    google-build-namespaces,
    google-default-arguments,
@ -121,9 +121,9 @@ Checks: '-*,
    google-readability-avoid-underscore-in-googletest-name,
    google-runtime-int,
    google-runtime-operator,
-    
+
    hicpp-exception-baseclass,
-    
+
    clang-analyzer-core.CallAndMessage,
    clang-analyzer-core.DivideZero,
    clang-analyzer-core.NonNullParamChecker,
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@ -162,4 +162,10 @@ elseif (COMPILER_GCC)
    add_cxx_compile_options(-Wunused)
    # Warn if vector operation is not implemented via SIMD capabilities of the architecture
    add_cxx_compile_options(-Wvector-operation-performance)
+
+    # XXX: gcc10 stuck with this option while compiling GatherUtils code
+    # (anyway there are builds with clang, that will warn)
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
+        add_cxx_compile_options(-Wno-sequence-point)
+    endif()
 endif ()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -22,6 +22,15 @@ add_subdirectory (consistent-hashing)
 add_subdirectory (croaring)
 add_subdirectory (FastMemcpy)
 add_subdirectory (grpc-cmake)
+# ThreadPool select job randomly, and there can be some threads that had been
+# performed some memory heavy task before and will be inactive for some time,
+# but until it will became active again, the memory will not be freed since by
+# default each thread has it's own arena, but there should be not more then
+# 4*CPU arenas (see opt.nareans description).
+#
+# By enabling percpu_arena number of arenas limited to number of CPUs and hence
+# this problem should go away.
+set(JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu")
 add_subdirectory (jemalloc-cmake)
 add_subdirectory (libcpuid-cmake)
 add_subdirectory (murmurhash)
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@ -9,6 +9,9 @@ if (ENABLE_JEMALLOC)
    option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED})

    if (USE_INTERNAL_JEMALLOC)
+        option (JEMALLOC_CONFIG_MALLOC_CONF "Change default configuration string" "")
+        message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}")
+
        set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc")

        set (SRCS
@ -52,11 +55,19 @@ if (ENABLE_JEMALLOC)
        add_library(jemalloc ${SRCS})
        target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
        target_include_directories(jemalloc SYSTEM PUBLIC include)
+
+        set(JEMALLOC_INCLUDE)
        if (ARCH_AMD64)
-            target_include_directories(jemalloc SYSTEM PUBLIC include_linux_x86_64)
+            set(JEMALLOC_INCLUDE_PREFIX include_linux_x86_64)
        elseif (ARCH_ARM)
-            target_include_directories(jemalloc SYSTEM PUBLIC include_linux_aarch64)
+            set(JEMALLOC_INCLUDE_PREFIX include_linux_aarch64)
        endif ()
+        target_include_directories(jemalloc SYSTEM PUBLIC
+            ${JEMALLOC_INCLUDE_PREFIX})
+        configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
+            ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h)
+        target_include_directories(jemalloc SYSTEM PRIVATE
+            ${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal)

        target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)

--- a/contrib/jemalloc-cmake/include_linux_aarch64/README
+++ b/contrib/jemalloc-cmake/include_linux_aarch64/README
@ -5,3 +5,4 @@ Added #define GNU_SOURCE
 Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not.
 Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard.
 Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5.
+Added JEMALLOC_CONFIG_MALLOC_CONF substitution
--- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -369,7 +369,7 @@
 /* #undef JEMALLOC_EXPORT */

 /* config.malloc_conf options string. */
-#define JEMALLOC_CONFIG_MALLOC_CONF ""
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"

 /* If defined, jemalloc takes the malloc/free/etc. symbol names. */
 #define JEMALLOC_IS_MALLOC 1
--- a/contrib/jemalloc-cmake/include_linux_x86_64/README
+++ b/contrib/jemalloc-cmake/include_linux_x86_64/README
@ -5,3 +5,4 @@ Added #define GNU_SOURCE
 Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not.
 Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard.
 Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5.
+Added JEMALLOC_CONFIG_MALLOC_CONF substitution
--- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -360,7 +360,7 @@
 /* #undef JEMALLOC_EXPORT */

 /* config.malloc_conf options string. */
-#define JEMALLOC_CONFIG_MALLOC_CONF ""
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"

 /* If defined, jemalloc takes the malloc/free/etc. symbol names. */
 #define JEMALLOC_IS_MALLOC 1
--- a/debian/rules
+++ b/debian/rules
@ -24,6 +24,10 @@ DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT)

 ifndef ENABLE_TESTS
    CMAKE_FLAGS += -DENABLE_TESTS=0
+else
+# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI
+    DEB_BUILD_OPTIONS+= nocheck
+    DEB_BUILD_OPTIONS+= nostrip
 endif

 ifndef MAKE_TARGET
@ -88,14 +92,19 @@ override_dh_auto_build:
 	$(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)

 override_dh_auto_test:
+ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
 	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server
+endif

 override_dh_clean:
 	rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs
-	dh_clean -X contrib
+	dh_clean # -X contrib

 override_dh_strip:
+#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options
+ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS)))
 	dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg
+endif

 override_dh_install:
 	# Making docs
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -5,6 +5,7 @@ RUN apt-get --allow-unauthenticated update -y && apt-get install --yes wget gnup
 RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
 RUN echo "deb [trusted=yes] http://apt.llvm.org/eoan/ llvm-toolchain-eoan-10 main" >> /etc/apt/sources.list

+
 RUN apt-get --allow-unauthenticated update -y \
    && env DEBIAN_FRONTEND=noninteractive \
        apt-get --allow-unauthenticated install --yes --no-install-recommends \
@ -17,6 +18,14 @@ RUN apt-get --allow-unauthenticated update -y \
            apt-transport-https \
            ca-certificates

+# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
+# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
+# Significantly increase deb packaging speed and compatible with old systems
+RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
+RUN chmod +x dpkg-deb
+RUN cp dpkg-deb /usr/bin
+
+
 # Libraries from OS are only needed to test the "unbundled" build (that is not used in production).
 RUN apt-get --allow-unauthenticated update -y \
    && env DEBIAN_FRONTEND=noninteractive \
@ -74,12 +83,6 @@ RUN apt-get --allow-unauthenticated update -y \
            libldap2-dev


-# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
-# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
-# Significantly increase deb packaging speed and compatible with old systems
-RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
-RUN chmod +x dpkg-deb
-RUN cp dpkg-deb /usr/bin

 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-10 /usr/bin/ld.lld
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@ -10,5 +10,16 @@ mv *.changes /output
 mv *.buildinfo /output
 mv /*.rpm /output ||: # if exists
 mv /*.tgz /output ||: # if exists
+
+if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;}
+then
+  echo Place $BINARY_OUTPUT to output
+  mkdir /output/binary ||: # if exists
+  mv /build/obj-*/programs/clickhouse* /output/binary
+  if [ "$BINARY_OUTPUT" = "tests" ]
+  then
+    mv /build/obj-*/src/unit_tests_dbms /output/binary
+  fi
+fi
 ccache --show-stats ||:
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -54,7 +54,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache

    subprocess.check_call(cmd, shell=True)

-def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage):
+def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
    CLANG_PREFIX = "clang"
    DARWIN_SUFFIX = "-darwin"
    ARM_SUFFIX = "-aarch64"
@ -131,6 +131,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
    if alien_pkgs:
        result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'")

+    if with_binaries == "programs":
+        result.append('BINARY_OUTPUT=programs')
+    elif with_binaries == "tests":
+        result.append('ENABLE_TESTS=1')
+        result.append('BINARY_OUTPUT=tests')
+        cmake_flags.append('-DENABLE_TESTS=1')
+        cmake_flags.append('-DUSE_GTEST=1')
+
    if unbundled:
        # TODO: fix build with ENABLE_RDKAFKA
        cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0')
@ -179,6 +187,7 @@ if __name__ == "__main__":
    parser.add_argument("--official", action="store_true")
    parser.add_argument("--alien-pkgs", nargs='+', default=[])
    parser.add_argument("--with-coverage", action="store_true")
+    parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="")

    args = parser.parse_args()
    if not os.path.isabs(args.output_dir):
@ -195,6 +204,12 @@ if __name__ == "__main__":
    if args.alien_pkgs and not image_type == "deb":
        raise Exception("Can add alien packages only in deb build")

+    if args.with_binaries != "" and not image_type == "deb":
+        raise Exception("Can add additional binaries only in deb build")
+
+    if args.with_binaries != "" and image_type == "deb":
+        logging.info("Should place {} to output".format(args.with_binaries))
+
    dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile")
    if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image:
        if not pull_image(image_name) or args.force_build_image:
@ -202,6 +217,6 @@ if __name__ == "__main__":
    env_prepared = parse_env_variables(
        args.build_type, args.compiler, args.sanitizer, args.package_type, image_type,
        args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy,
-        args.version, args.author, args.official, args.alien_pkgs, args.with_coverage)
+        args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries)
    run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir)
    logging.info("Output placed into {}".format(args.output_dir))
--- a/docker/test/integration/compose/docker_compose_mongo.yml
+++ b/docker/test/integration/compose/docker_compose_mongo.yml
@ -8,3 +8,4 @@ services:
            MONGO_INITDB_ROOT_PASSWORD: clickhouse
        ports:
          - 27018:27017
+        command: --profile=2 --verbose
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -133,7 +133,7 @@ function run_tests
    fi

    # Delete old report files.
-    for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv
+    for x in {test-times,wall-clock-times}.tsv
    do
        rm -v "$x" ||:
        touch "$x"
@ -217,69 +217,136 @@ function get_profiles
    clickhouse-client --port 9002 --query "select 1"
 }

+function build_log_column_definitions
+{
+# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
+# absolutely atrocious way. This should be done by the file() function itself.
+for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
+do
+    paste -d' ' \
+        <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
+        <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
+        | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
+done
+}
+
 # Build and analyze randomization distribution for all queries.
 function analyze_queries
 {
-rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv client-times.tsv report-thresholds.tsv ||:
+rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||:
+rm -rfv analyze ||:
+mkdir analyze ||:
+
+build_log_column_definitions

 # Split the raw test output into files suitable for analysis.
 IFS=$'\n'
 for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print)
 do
    test_name=$(basename "$test_file" "-raw.tsv")
-    sed -n "s/^query\t//p" < "$test_file" > "$test_name-queries.tsv"
-    sed -n "s/^client-time/$test_name/p" < "$test_file" >> "client-times.tsv"
-    sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "report-thresholds.tsv"
-    sed -n "s/^skipped/$test_name/p" < "$test_file" >> "skipped-tests.tsv"
+    sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
+    sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv"
+    sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv"
+    sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv"
+    sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv"
 done
 unset IFS

+# for each query run, prepare array of metrics from query log
+clickhouse-local --query "
+create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
+    'test text, query_index int, query_id text, version UInt8, time float');
+
+create view left_query_log as select *
+    from file('left-query-log.tsv', TSVWithNamesAndTypes,
+        '$(cat "left-query-log.tsv.columns")');
+
+create view right_query_log as select *
+    from file('right-query-log.tsv', TSVWithNamesAndTypes,
+        '$(cat "right-query-log.tsv.columns")');
+
+create table query_metrics engine File(TSV, -- do not add header -- will parse with grep
+        'analyze/query-run-metrics.tsv')
+    as select
+        test, query_index, 0 run, version,
+        [
+            -- server-reported time
+            query_duration_ms / toFloat64(1000)
+            , toFloat64(memory_usage)
+            -- client-reported time
+            , query_runs.time
+        ] metrics
+    from (
+        select *, 0 version from left_query_log
+        union all
+        select *, 1 version from right_query_log
+    ) query_logs
+    right join query_runs
+    using (query_id, version)
+    order by test, query_index
+    ;
+"
+
 # This is a lateral join in bash... please forgive me.
-# We don't have arrayPermute(), so I have to make random permutations with 
+# We don't have arrayPermute(), so I have to make random permutations with
 # `order by rand`, and it becomes really slow if I do it for more than one
 # query. We also don't have lateral joins. So I just put all runs of each
 # query into a separate file, and then compute randomization distribution
 # for each file. I do this in parallel using GNU parallel.
+query_index=1
 IFS=$'\n'
-for test_file in $(find . -maxdepth 1 -name "*-queries.tsv" -print)
+for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
 do
-    test_name=$(basename "$test_file" "-queries.tsv")
-    query_index=1
-    for query in $(cut -d'	' -f1 "$test_file" | sort | uniq)
-    do
-        query_prefix="$test_name.q$query_index"
-        query_index=$((query_index + 1))
-        grep -F "$query	" "$test_file" > "$query_prefix.tmp"
-        printf "%s\0\n" \
-            "clickhouse-local \
-                --file \"$query_prefix.tmp\" \
-                --structure 'query text, run int, version UInt32, time float' \
-                --query \"$(cat "$script_dir/eqmed.sql")\" \
-                >> \"$test_name-report.tsv\"" \
-                2>> analyze-errors.log \
-            >> analyze-commands.txt
-    done
+    file="analyze/$(echo "$prefix" | sed 's/\t/_/g').tmp"
+    grep "^$prefix	" "analyze/query-run-metrics.tsv" > "$file" &
+    printf "%s\0\n" \
+        "clickhouse-local \
+            --file \"$file\" \
+            --structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
+            --query \"$(cat "$script_dir/eqmed.sql")\" \
+            >> \"analyze/query-reports.tsv\"" \
+            2>> analyze/errors.log \
+        >> analyze/commands.txt
 done
 wait
 unset IFS

-parallel --null < analyze-commands.txt
+parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
 }

 # Analyze results
 function report
 {
-
 rm -r report ||:
 mkdir report ||:

-
 rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:

-cat analyze-errors.log >> report/errors.log ||:
+build_log_column_definitions
+
+cat analyze/errors.log >> report/errors.log ||:
 cat profile-errors.log >> report/errors.log ||:

 clickhouse-local --query "
+create view query_display_names as select * from
+    file('analyze/query-display-names.tsv', TSV,
+        'test text, query_index int, query_display_name text')
+    ;
+
+create table query_metric_stats engine File(TSVWithNamesAndTypes,
+        'report/query-metric-stats.tsv') as
+    select *, metric_name
+    from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
+        right Array(float), diff Array(float), stat_threshold Array(float),
+        test text, query_index int') reports
+    left array join ['server_time', 'memory', 'client_time'] as metric_name,
+        left, right, diff, stat_threshold
+    left join query_display_names
+        on reports.test = query_display_names.test
+            and reports.query_index = query_display_names.query_index
+    ;
+
+-- Main statistics for queries -- query time as reported in query log.
 create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
    as select
        -- FIXME Comparison mode doesn't make sense for queries that complete
@ -296,53 +363,54 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
        
        left, right, diff, stat_threshold,
        if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
-        reports.test,
-        query
-    from
-        (
-            select *,
-                replaceAll(_file, '-report.tsv', '') test
-            from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text')
-        ) reports
-        left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds
-        using test
-        ;
+        test, query_index, query_display_name
+    from query_metric_stats
+    left join file('analyze/report-thresholds.tsv', TSV,
+            'test text, report_threshold float') thresholds
+        on query_metric_stats.test = thresholds.test
+    where metric_name = 'server_time'
+    order by test, query_index, metric_name
+    ;

 -- keep the table in old format so that we can analyze new and old data together
 create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
-    as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query
+    as select short, changed_fail, unstable_fail, left, right, diff,
+        stat_threshold, test, query_display_name query
    from queries
    ;

 -- save all test runs as JSON for the new comparison page
-create table all_query_funs_json engine File(JSON, 'report/all-query-runs.json') as
-    select test, query, versions_runs[1] runs_left, versions_runs[2] runs_right
+create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
+    select test, query_display_name query,
+        versions_runs[1] runs_left, versions_runs[2] runs_right
    from (
        select
-            test, query,
+            test, query_index,
            groupArrayInsertAt(runs, version) versions_runs
        from (
            select
-                replaceAll(_file, '-queries.tsv', '') test,
-                query, version,
-                groupArray(time) runs
-            from file('*-queries.tsv', TSV, 'query text, run int, version UInt32, time float')
-            group by test, query, version
+                test, query_index, version,
+                groupArray(metrics[1]) runs
+            from file('analyze/query-run-metrics.tsv', TSV,
+                'test text, query_index int, run int, version UInt8, metrics Array(float)')
+            group by test, query_index, version
        )
-        group by test, query
-    )
+        group by test, query_index
+    ) runs
+    left join query_display_names using (test, query_index)
    ;

 create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
-    select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show
-    order by abs(diff) desc;
+    select left, right, diff, stat_threshold, changed_fail, test, query_display_name
+    from queries where changed_show order by abs(diff) desc;

 create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
-    select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show
-    order by stat_threshold desc;
+    select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
+    from queries where unstable_show order by stat_threshold desc;

-create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as
-    select query, test from queries where unstable_show or changed_show
+create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
+        'report/queries-for-flamegraph.tsv') as
+    select test, query_index from queries where unstable_show or changed_show
    ;

 create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
@ -350,23 +418,23 @@ create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
    group by test having s > 0 order by s desc;

 create table query_time engine Memory as select *
-    from file('client-times.tsv', TSV, 'test text, query text, client float, server float');
+    from file('analyze/client-times.tsv', TSV,
+        'test text, query_index int, client float, server float');

 create table wall_clock engine Memory as select *
    from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');

 create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
-    select client, server, floor(client/server, 3) p, query
-    from query_time where p > 1.02 order by p desc;
+    select client, server, floor(client/server, 3) p, query_display_name
+    from query_time left join query_display_names using (test, query_index)
+    where p > 1.02 order by p desc;

 create table test_time engine Memory as
    select test, sum(client) total_client_time,
        maxIf(client, not short) query_max,
        minIf(client, not short) query_min,
-        count(*) queries,
-        sum(short) short_queries
-    from query_time full join queries
-    using test, query
+        count(*) queries, sum(short) short_queries
+    from query_time full join queries using (test, query_index)
    group by test;

 create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
@ -378,40 +446,89 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
        floor(real / queries, 3) avg_real_per_query,
        floor(query_min, 3)
    from test_time
-        -- wall clock times are also measured for skipped tests, so don't
-        -- do full join
-        left join wall_clock using test
+    -- wall clock times are also measured for skipped tests, so don't
+    -- do full join
+    left join wall_clock using test
    order by avg_real_per_query desc;

+-- report for all queries page, only main metric
 create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
    select changed_fail, unstable_fail,
        left, right, diff,
        floor(left > right ? left / right : right / left, 3),
-        stat_threshold, test, query
-    from queries order by test, query;
+        stat_threshold, test, query_display_name
+    from queries order by test, query_display_name;
+
+-- new report for all queries with all metrics (no page yet)
+create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
+    select metric_name, left, right, diff,
+        floor(left > right ? left / right : right / left, 3),
+        stat_threshold, test, query_index, query_display_name
+    from query_metric_stats
+    order by test, query_index;
 " 2> >(tee -a report/errors.log 1>&2)

-for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
-do
-    # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
-    # absolutely atrocious way. This should be done by the file() function itself.
-    paste -d' ' \
-        <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
-        <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
-        | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
-done
-
+# Prepare source data for metrics and flamegraphs for unstable queries.
 for version in {right,left}
-do
-clickhouse-local --query "
+    do
+    rm -rf data
+    clickhouse-local --query "
 create view queries_for_flamegraph as
    select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
-        'query text, test text');
+        'test text, query_index int');
+
+create view query_runs as
+    with 0 as left, 1 as right
+    select * from file('analyze/query-runs.tsv', TSV,
+        'test text, query_index int, query_id text, version UInt8, time float')
+    where version = $version
+    ;
+
+create view query_display_names as select * from
+    file('analyze/query-display-names.tsv', TSV,
+        'test text, query_index int, query_display_name text')
+    ;
+
+create table unstable_query_runs engine File(TSVWithNamesAndTypes,
+        'unstable-query-runs.$version.rep') as
+    select test, query_index, query_display_name, query_id
+    from query_runs
+    join queries_for_flamegraph on
+        query_runs.test = queries_for_flamegraph.test
+        and query_runs.query_index = queries_for_flamegraph.query_index
+    left join query_display_names on
+        query_runs.test = query_display_names.test
+        and query_runs.query_index = query_display_names.query_index
+    ;

 create view query_log as select *
    from file('$version-query-log.tsv', TSVWithNamesAndTypes,
        '$(cat "$version-query-log.tsv.columns")');

+create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
+        'unstable-run-metrics.$version.rep') as
+    select
+        test, query_index, query_id,
+        ProfileEvents.Values value, ProfileEvents.Names metric
+    from query_log array join ProfileEvents
+    join unstable_query_runs using (query_id)
+    ;
+
+create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
+        'unstable-run-metrics-2.$version.rep') as
+    select
+        test, query_index, query_id,
+        v, n
+    from (
+        select
+            test, query_index, query_id,
+            ['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
+            [memory_usage, read_bytes, written_bytes, query_duration_ms] v
+        from query_log
+        join unstable_query_runs using (query_id)
+    )
+    array join v, n;
+
 create view trace_log as select *
    from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
        '$(cat "$version-trace-log.tsv.columns")');
@ -423,104 +540,104 @@ create view addresses_src as select *
 create table addresses_join_$version engine Join(any, left, address) as
    select addr address, name from addresses_src;

-create table unstable_query_runs engine File(TSVWithNamesAndTypes,
-        'unstable-query-runs.$version.rep') as
-    select query, query_id from query_log
-    where query in (select query from queries_for_flamegraph)
-        and query_id not like 'prewarm %'
-    ;
-
-create table unstable_query_log engine File(Vertical,
-        'unstable-query-log.$version.rep') as
-    select * from query_log
-    where query_id in (select query_id from unstable_query_runs);
-
-create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
-        'unstable-run-metrics.$version.rep') as
-    select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
-    from query_log array join ProfileEvents
-    where query_id in (select query_id from unstable_query_runs)
-    ;
-
-create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
-        'unstable-run-metrics-2.$version.rep') as
-    select v, n, query_id, query
-    from
-        (select
-            ['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
-            [memory_usage, read_bytes, written_bytes, query_duration_ms] v,
-            query,
-            query_id
-        from query_log
-        where query_id in (select query_id from unstable_query_runs))
-    array join n, v;
-
 create table unstable_run_traces engine File(TSVWithNamesAndTypes,
        'unstable-run-traces.$version.rep') as
    select
+        test, query_index, query_id,
        count() value,
-        joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric,
-        unstable_query_runs.query_id,
-        any(unstable_query_runs.query) query
-    from unstable_query_runs
-    join trace_log on trace_log.query_id = unstable_query_runs.query_id
-    group by unstable_query_runs.query_id, metric
+        joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric
+    from trace_log
+    join unstable_query_runs using query_id
+    group by test, query_index, query_id, metric
    order by count() desc
    ;

 create table metric_devation engine File(TSVWithNamesAndTypes,
        'metric-deviation.$version.rep') as
-    select query, floor((q[3] - q[1])/q[2], 3) d,
-        quantilesExact(0, 0.5, 1)(value) q, metric
-    from (select * from unstable_run_metrics
-        union all select * from unstable_run_traces
-        union all select * from unstable_run_metrics_2) mm
-    join queries_for_flamegraph using query
-    group by query, metric
-    having d > 0.5
-    order by query desc, d desc
+    -- first goes the key used to split the file with grep
+    select test, query_index, query_display_name,
+        d, q, metric
+    from (
+        select
+            test, query_index,
+            floor((q[3] - q[1])/q[2], 3) d,
+            quantilesExact(0, 0.5, 1)(value) q, metric
+        from (select * from unstable_run_metrics
+            union all select * from unstable_run_traces
+            union all select * from unstable_run_metrics_2) mm
+        group by test, query_index, metric
+        having d > 0.5
+    ) metrics
+    left join unstable_query_runs using (test, query_index)
+    order by test, query_index, d desc
    ;

 create table stacks engine File(TSV, 'stacks.$version.rep') as
    select
-        query,
+        -- first goes the key used to split the file with grep
+        test, query_index, any(query_display_name),
+        -- next go the stacks in flamegraph format: 'func1;...;funcN count'
        arrayStringConcat(
-            arrayMap(x -> joinGet(addresses_join_$version, 'name', x),
+            arrayMap(addr -> replaceRegexpOne(
+                    joinGet(addresses_join_$version, 'name', addr),
+                    -- This function is at the base of the stack, and its name changes
+                    -- surprisingly often between builds, e.g. '__clone' or 'clone' or
+                    -- even '__GI__clone'. This breaks differential flame graphs, so
+                    -- filter it out here.
+                    '^clone\\.S.*', 'clone.S (name filtered by comparison script)'
+                ),
                arrayReverse(trace)
            ),
            ';'
        ) readable_trace,
-        count()
+        count() c
    from trace_log
    join unstable_query_runs using query_id
-    group by query, trace
+    group by test, query_index, trace
+    order by test, query_index, trace
    ;
 " 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
 done
 wait

+# Create per-query flamegraphs and files with metrics
 IFS=$'\n'
 for version in {right,left}
 do
-    for query in $(cut -d'	' -f1 "stacks.$version.rep" | sort | uniq)
+    for query in $(cut -d'	' -f1,2,3 "stacks.$version.rep" | sort | uniq)
    do
-        query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
+        query_file=$(echo "$query" | cut -c-120 | sed 's/[/	]/_/g')
+        echo "$query_file" >> report/query-files.txt

        # Build separate .svg flamegraph for each query.
+        # -F is somewhat unsafe because it might match not the beginning of the
+        # string, but this is unlikely and escaping the query for grep is a pain.
        grep -F "$query	" "stacks.$version.rep" \
-            | cut -d'	' -f 2- \
+            | cut -f 4- \
            | sed 's/\t/ /g' \
            | tee "$query_file.stacks.$version.rep" \
-            | ~/fg/flamegraph.pl > "$query_file.$version.svg" &
+            | ~/fg/flamegraph.pl --hash > "$query_file.$version.svg" &

        # Copy metric stats into separate files as well.
+        # Ditto the above comment about -F.
        grep -F "$query	" "metric-deviation.$version.rep" \
-            | cut -f2- > "$query_file.$version.metrics.rep" &
+            | cut -f4- > "$query_file.$version.metrics.rep" &
    done
 done
 wait
 unset IFS

+# Create differential flamegraphs.
+IFS=$'\n'
+for query_file in $(cat report/query-files.txt)
+do
+    ~/fg/difffolded.pl "$query_file.stacks.left.rep" "$query_file.stacks.right.rep" \
+        | tee "$query_file.stacks.diff.rep" \
+        | ~/fg/flamegraph.pl > "$query_file.diff.svg" &
+done
+unset IFS
+wait
+
 # Remember that grep sets error code when nothing is found, hence the bayan
 # operator.
 grep -H -m2 -i '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||:
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -46,7 +46,13 @@ function download
    done

    mkdir ~/fg ||:
-    cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl &
+    (
+        cd ~/fg
+        wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl"
+        wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/difffolded.pl"
+        chmod +x ~/fg/difffolded.pl
+        chmod +x ~/fg/flamegraph.pl
+    ) &

    wait
 }
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -81,8 +81,13 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
    fi
 ) | tee right-commit.txt

-# Prepare the list of changed tests for use by compare.sh
-git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST"~ master)" -- tests/performance | tee changed-tests.txt
+if [ "$PR_TO_TEST" != "0" ]
+then
+    # Prepare the list of tests changed in the PR for use by compare.sh. Compare to
+    # merge base, because master might be far in the future and have unrelated test
+    # changes.
+    git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST" master)" -- tests/performance | tee changed-tests.txt
+fi

 # Set python output encoding so that we can print queries with Russian letters.
 export PYTHONIOENCODING=utf-8
@ -119,5 +124,5 @@ done

 dmesg -T > dmesg.log

-7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} ./report
+7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
 cp compare.log /output
--- a/docker/test/performance-comparison/eqmed.sql
+++ b/docker/test/performance-comparison/eqmed.sql
@ -1,32 +1,37 @@
-- input is table(query text, run UInt32, version int, time float)
+-- input is table(test text, query text, run UInt32, version int, metrics Array(float))
 select
-   floor(original_medians_array.time_by_version[1], 4) l,
-   floor(original_medians_array.time_by_version[2], 4) r,
-   floor((r - l) / l, 3) diff_percent,
-   floor(threshold / l, 3) threshold_percent,
-   query
+   arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
+   arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
+   arrayMap(x, y -> floor((y - x) / x, 3), l, r) diff_percent,
+   arrayMap(x, y -> floor(x / y, 3), threshold, l) threshold_percent,
+   test, query
 from
   (
      -- quantiles of randomization distributions
-      select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
+      select quantileExactForEach(0.999)(
+        arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
+      ) threshold
      ---- uncomment to see what the distribution is really like
-      --, uniqExact(d) u
+      --, uniqExact(d.1) u
      --, arraySort(x->x.1,
      --      arrayZip(
-      --          (sumMap([d], [1]) as f).1,
+      --          (sumMap([d.1], [1]) as f).1,
      --          f.2)) full_histogram
      from
         (
-            select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
+            -- make array 'random label' -> '[median metric]'
+            select virtual_run, groupArrayInsertAt(median_metrics, random_label) metrics_by_label
            from (
-                  select medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label
+                  -- get [median metric] arrays among virtual runs, grouping by random label
+                  select medianExactForEach(metrics) median_metrics, virtual_run, random_label
                  from (
-                        select *, toUInt32(rowNumberInAllBlocks() % 2) random_label -- randomly relabel measurements
+                        -- randomly relabel measurements
+                        select *, toUInt32(rowNumberInAllBlocks() % 2) random_label
                        from (
-                              select time, number virtual_run 
+                              select metrics, number virtual_run
                              from
                                -- strip the query away before the join -- it might be several kB long;
-                                (select time, run, version from table) no_query,
+                                (select metrics, run, version from table) no_query,
                                -- duplicate input measurements into many virtual runs
                                numbers(1, 100000) nn
                              -- for each virtual run, randomly reorder measurements
@ -40,19 +45,19 @@ from
      -- this select aggregates by virtual_run
   ) rd,
   (
-        select groupArrayInsertAt(median_time, version) time_by_version
+        select groupArrayInsertAt(median_metrics, version) medians_by_version
        from
        (
-            select medianExact(time) median_time, version
+            select medianExactForEach(metrics) median_metrics, version
            from table
            group by version
        ) original_medians
   ) original_medians_array,
   (
-        select any(query) query from table
+        select any(test) test, any(query) query from table
   ) any_query,
   (
-       select throwIf(uniq(query) != 1) from table
+       select throwIf(uniq((test, query)) != 1) from table
   ) check_single_query -- this subselect checks that there is only one query in the input table;
                        -- written this way so that it is not optimized away (#10523)
 ;
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -29,6 +29,8 @@ parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS',
 parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.')
 args = parser.parse_args()

+test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
+
 tree = et.parse(args.file[0])
 root = tree.getroot()

@ -141,19 +143,25 @@ test_queries = substitute_parameters(test_query_templates)

 report_stage_end('substitute2')

-for i, q in enumerate(test_queries):
+for query_index, q in enumerate(test_queries):
+    query_prefix = f'{test_name}.query{query_index}'
+
    # We have some crazy long queries (about 100kB), so trim them to a sane
-    # length.
+    # length. This means we can't use query text as an identifier and have to
+    # use the test name + the test-wide query index.
    query_display_name = q
    if len(query_display_name) > 1000:
-        query_display_name = f'{query_display_name[:1000]}...({i})'
+        query_display_name = f'{query_display_name[:1000]}...({query_index})'
+
+    print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')

    # Prewarm: run once on both servers. Helps to bring the data into memory,
    # precompile the queries, etc.
    try:
        for conn_index, c in enumerate(connections):
-            res = c.execute(q, query_id = f'prewarm {0} {query_display_name}')
-            print(f'prewarm\t{tsv_escape(query_display_name)}\t{conn_index}\t{c.last_query.elapsed}')
+            prewarm_id = f'{query_prefix}.prewarm0'
+            res = c.execute(q, query_id = prewarm_id)
+            print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
    except:
        # If prewarm fails for some query -- skip it, and try to test the others.
        # This might happen if the new test introduces some function that the
@ -170,13 +178,14 @@ for i, q in enumerate(test_queries):
    start_seconds = time.perf_counter()
    server_seconds = 0
    for run in range(0, args.runs):
+        run_id = f'{query_prefix}.run{run}'
        for conn_index, c in enumerate(connections):
-            res = c.execute(q)
-            print(f'query\t{tsv_escape(query_display_name)}\t{run}\t{conn_index}\t{c.last_query.elapsed}')
+            res = c.execute(q, query_id = run_id)
+            print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
            server_seconds += c.last_query.elapsed

    client_seconds = time.perf_counter() - start_seconds
-    print(f'client-time\t{tsv_escape(query_display_name)}\t{client_seconds}\t{server_seconds}')
+    print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')

 report_stage_end('benchmark')

--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -25,6 +25,9 @@ very_unstable_queries = 0
 # max seconds to run one query by itself, not counting preparation
 allowed_single_run_time = 2

+color_bad='#ffb0c0'
+color_good='#b0d050'
+
 header_template = """
 <!DOCTYPE html>
 <html>
@ -179,6 +182,16 @@ if args.report == 'main':

    print_tested_commits()

+    run_error_rows = tsvRows('run-errors.tsv')
+    error_tests += len(run_error_rows)
+    printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
+
+    slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
+    error_tests += len(slow_on_client_rows)
+    printSimpleTable('Slow on client',
+                     ['Client time, s', 'Server time, s', 'Ratio', 'Query'],
+                     slow_on_client_rows)
+
    def print_changes():
        rows = tsvRows('report/changed-perf.tsv')
        if not rows:
@ -188,8 +201,8 @@ if args.report == 'main':

        print(tableStart('Changes in performance'))
        columns = [
-            'Old, s.',                                         # 0
-            'New, s.',                                         # 1
+            'Old, s',                                          # 0
+            'New, s',                                          # 1
            'Relative difference (new&nbsp;&minus;&nbsp;old) / old',   # 2
            'p&nbsp;<&nbsp;0.001 threshold',                   # 3
            # Failed                                           # 4
@ -205,10 +218,10 @@ if args.report == 'main':
            if int(row[4]):
                if float(row[2]) < 0.:
                    faster_queries += 1
-                    attrs[2] = 'style="background: #00ff00"'
+                    attrs[2] = f'style="background: {color_good}"'
                else:
                    slower_queries += 1
-                    attrs[2] = 'style="background: #ff0000"'
+                    attrs[2] = f'style="background: {color_bad}"'
            else:
                attrs[2] = ''

@ -218,12 +231,6 @@ if args.report == 'main':

    print_changes()

-    slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
-    error_tests += len(slow_on_client_rows)
-    printSimpleTable('Slow on client',
-        ['Client time, s.', 'Server time, s.', 'Ratio', 'Query'],
-        slow_on_client_rows)
-
    def print_unstable_queries():
        global unstable_queries
        global very_unstable_queries
@ -252,7 +259,7 @@ if args.report == 'main':
        for r in unstable_rows:
            if int(r[4]):
                very_unstable_queries += 1
-                attrs[3] = 'style="background: #ffb0a0"'
+                attrs[3] = f'style="background: {color_bad}"'
            else:
                attrs[3] = ''

@ -262,11 +269,7 @@ if args.report == 'main':

    print_unstable_queries()

-    run_error_rows = tsvRows('run-errors.tsv')
-    error_tests += len(run_error_rows)
-    printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
-
-    skipped_tests_rows = tsvRows('skipped-tests.tsv')
+    skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
    printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)

    printSimpleTable('Tests with most unstable queries',
@ -281,13 +284,13 @@ if args.report == 'main':

        columns = [
            'Test',                                          #0
-            'Wall clock time, s.',                           #1
-            'Total client time, s.',                         #2
+            'Wall clock time, s',                            #1
+            'Total client time, s',                          #2
            'Total queries',                                 #3
            'Ignored short queries',                         #4
-            'Longest query<br>(sum for all runs), s.',       #5
-            'Avg wall clock time<br>(sum for all runs), s.', #6
-            'Shortest query<br>(sum for all runs), s.',      #7
+            'Longest query<br>(sum for all runs), s',        #5
+            'Avg wall clock time<br>(sum for all runs), s',  #6
+            'Shortest query<br>(sum for all runs), s',       #7
            ]

        print(tableStart('Test times'))
@ -300,13 +303,13 @@ if args.report == 'main':
            if float(r[6]) > 1.5 * total_runs:
                # FIXME should be 15s max -- investigate parallel_insert
                slow_average_tests += 1
-                attrs[6] = 'style="background: #ffb0a0"'
+                attrs[6] = f'style="background: {color_bad}"'
            else:
                attrs[6] = ''

            if float(r[5]) > allowed_single_run_time * total_runs:
                slow_average_tests += 1
-                attrs[5] = 'style="background: #ffb0a0"'
+                attrs[5] = f'style="background: {color_bad}"'
            else:
                attrs[5] = ''

@ -320,9 +323,9 @@ if args.report == 'main':

    print("""
    <p class="links">
-    <a href="output.7z">Test output</a>
    <a href="all-queries.html">All queries</a>
    <a href="compare.log">Log</a>
+    <a href="output.7z">Test output</a>
    </p>
    </body>
    </html>
@ -382,8 +385,8 @@ elif args.report == 'all-queries':
        columns = [
            # Changed #0
            # Unstable #1
-            'Old, s.', #2
-            'New, s.', #3
+            'Old, s', #2
+            'New, s', #3
            'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4
            'Times speedup / slowdown',                 #5
            'p&nbsp;<&nbsp;0.001 threshold',          #6
@ -399,21 +402,21 @@ elif args.report == 'all-queries':
        attrs[1] = None
        for r in rows:
            if int(r[1]):
-                attrs[6] = 'style="background: #ffb0a0"'
+                attrs[6] = f'style="background: {color_bad}"'
            else:
                attrs[6] = ''

            if int(r[0]):
                if float(r[4]) > 0.:
-                    attrs[4] = 'style="background: #ffb0a0"'
+                    attrs[4] = f'style="background: {color_bad}"'
                else:
-                    attrs[4] = 'style="background: #adbdff"'
+                    attrs[4] = f'style="background: {color_good}"'
            else:
                attrs[4] = ''

            if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
-                attrs[2] = 'style="background: #ffb0a0"'
-                attrs[3] = 'style="background: #ffb0a0"'
+                attrs[2] = f'style="background: {color_bad}"'
+                attrs[3] = f'style="background: {color_bad}"'
            else:
                attrs[2] = ''
                attrs[3] = ''
@ -428,9 +431,9 @@ elif args.report == 'all-queries':

    print("""
    <p class="links">
-    <a href="output.7z">Test output</a>
    <a href="report.html">Main report</a>
    <a href="compare.log">Log</a>
+    <a href="output.7z">Test output</a>
    </p>
    </body>
    </html>
--- a/docs/en/engines/index.md
+++ b/docs/en/engines/index.md
@ -1,6 +1,8 @@
 ---
 toc_folder_title: Engines
 toc_priority: 25
+toc_title: hidden
+toc_hidden: true
 ---

 {## [Original article](https://clickhouse.tech/docs/en/engines/) ##}
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@ -72,7 +72,7 @@ Examples:
                            kafka_format = 'JSONEachRow',
                            kafka_num_consumers = 4;

-  CREATE TABLE queue2 (
+  CREATE TABLE queue3 (
    timestamp UInt64,
    level String,
    message String
--- a/docs/en/faq/index.md
+++ b/docs/en/faq/index.md
@ -1,6 +1,8 @@
 ---
 toc_folder_title: F.A.Q.
 toc_priority: 76
+toc_title: hidden
+toc_hidden: true
 ---


--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@ -28,9 +28,10 @@ There may be any number of space symbols between syntactical constructions (incl

 ## Comments {#comments}

-ClickHouse supports either SQL-style and C-style comments.
-SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
-C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
+ClickHouse supports either SQL-style and C-style comments:
+
+-   SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
+-   C-style are from `/*` to `*/`and can be multiline, spaces are not required either.

 ## Keywords {#syntax-keywords}

--- a/docs/ru/whats-new/extended-roadmap.md
+++ b/docs/ru/whats-new/extended-roadmap.md
@ -174,7 +174,7 @@ Upd. Всё ещё ждём удаление старого кода, котор

 ### 2.3. Перенос столбцового ser/de из DataType в Column {#perenos-stolbtsovogo-serde-iz-datatype-v-column}

-В очереди.
+В очереди. Антон Попов.

 ### 2.4. Перевод LowCardinality из DataType в Column. Добавление ColumnSparse {#perevod-lowcardinality-iz-datatype-v-column-dobavlenie-columnsparse}

@ -977,10 +977,10 @@ Q2.

 [Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1.

-### 12.6. Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
+### 12.6. + Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}

 [Виталий Баранов](https://github.com/vitlibar). Требует 12.1.
-Есть pull request. Q2.
+Есть pull request. Q2. Готово.


 ## 13. Разделение ресурсов, multi-tenancy {#razdelenie-resursov-multi-tenancy}
--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@ -58,17 +58,6 @@ def build_for_lang(lang, args):
            'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
            'language': lang,
            'direction': 'rtl' if lang == 'fa' else 'ltr',
-            # TODO: cleanup
-            'feature': {
-                'tabs': False
-            },
-            'palette': {
-                'primary': 'white',
-                'accent': 'white'
-            },
-            'font': False,
-            'logo': 'images/logo.svg',
-            'favicon': 'assets/images/favicon.ico',
            'static_templates': ['404.html'],
            'extra': {
                'now': int(time.mktime(datetime.datetime.now().timetuple()))  # TODO better way to avoid caching
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3
 mkdocs-macros-plugin==0.4.9
 nltk==3.5
 nose==1.3.7
-protobuf==3.12.0
+protobuf==3.12.1
 numpy==1.18.4
 Pygments==2.5.2
 pymdown-extensions==7.1
@ -30,7 +30,7 @@ PyYAML==5.3.1
 repackage==0.7.3
 requests==2.23.0
 singledispatch==3.4.0.3
-six==1.14.0
+six==1.15.0
 soupsieve==2.0.1
 termcolor==1.1.0
 tornado==5.1.1
--- a/docs/zh/getting-started/install.md
+++ b/docs/zh/getting-started/install.md
@ -46,7 +46,7 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/
 sudo yum install clickhouse-server clickhouse-client
 ```

-您也可以从此处手动下载和安装软件包：https://repo.yandex.ru/clickhouse/rpm/stable/x86\_64。
+您也可以从此处手动下载和安装软件包：https://repo.yandex.ru/clickhouse/rpm/stable/x86_64。

 ### 来自Docker {#from-docker-image}

--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -1,3 +1,7 @@
+if (USE_CLANG_TIDY)
+    set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
+endif ()
+
 # 'clickhouse' binary is a multi purpose tool,
 # that contain multiple execution modes (client, server, etc.)
 # each of them is built and linked as a separate library, defined below.
@ -201,3 +205,9 @@ endif ()
 if (TARGET clickhouse-server AND TARGET copy-headers)
    add_dependencies(clickhouse-server copy-headers)
 endif ()
+
+if (ENABLE_TESTS AND USE_GTEST)
+    set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor expression_analyzer)
+    add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS})
+    add_dependencies(clickhouse-bundle clickhouse-tests)
+endif()
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -289,7 +289,7 @@ private:
                    connection_entries.emplace_back(std::make_shared<Entry>(
                            connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings))));

-                pool.scheduleOrThrowOnError(std::bind(&Benchmark::thread, this, connection_entries));
+                pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); });
            }
        }
        catch (...)
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -485,7 +485,7 @@ private:
                history_file = config().getString("history_file");
            else
            {
-                auto history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
+                auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
                if (history_file_from_env)
                    history_file = history_file_from_env;
                else if (!home_path.empty())
@ -1480,7 +1480,7 @@ private:
            "\033[1m↗\033[0m",
        };

-        auto indicator = indicators[increment % 8];
+        const char * indicator = indicators[increment % 8];

        if (!send_logs && written_progress_chars)
            message << '\r';
--- a/programs/client/ConnectionParameters.cpp
+++ b/programs/client/ConnectionParameters.cpp
@ -51,7 +51,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
    {
        std::string prompt{"Password for user (" + user + "): "};
        char buf[1000] = {};
-        if (auto result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
+        if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
            password = result;
    }

--- a/programs/client/Suggest.h
+++ b/programs/client/Suggest.h
@ -5,6 +5,7 @@
 #include <Client/Connection.h>
 #include <IO/ConnectionTimeouts.h>
 #include <common/LineReader.h>
+#include <thread>


 namespace DB
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@ -442,7 +442,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons

    /// Collect all shards that contain partition piece number piece_number.
    Strings piece_status_paths;
-    for (auto & shard : shards_with_partition)
+    for (const auto & shard : shards_with_partition)
    {
        ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second;
        ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number];
@ -702,7 +702,7 @@ ASTPtr ClusterCopier::removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast

    auto new_columns_list = std::make_shared<ASTColumns>();
    new_columns_list->set(new_columns_list->columns, new_columns);
-    if (auto indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
+    if (const auto * indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
        new_columns_list->set(new_columns_list->indices, indices->clone());

    new_query.replace(new_query.columns_list, new_columns_list);
--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@ -94,7 +94,7 @@ void ClusterCopierApp::mainImpl()
    StatusFile status_file(process_path + "/status");
    ThreadStatus thread_status;

-    auto log = &logger();
+    auto * log = &logger();
    LOG_INFO(log, "Starting clickhouse-copier ("
        << "id " << process_id << ", "
        << "host_id " << host_id << ", "
--- a/programs/copier/Internals.cpp
+++ b/programs/copier/Internals.cpp
@ -260,7 +260,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
        return res;

    res.is_remote = 1;
-    for (auto & replica : replicas)
+    for (const auto & replica : replicas)
    {
        if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
        {
@ -270,7 +270,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
    }

    res.hostname_difference = std::numeric_limits<size_t>::max();
-    for (auto & replica : replicas)
+    for (const auto & replica : replicas)
    {
        size_t difference = getHostNameDifference(local_hostname, replica.host_name);
        res.hostname_difference = std::min(difference, res.hostname_difference);
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -8,7 +8,6 @@
 #include <Poco/NullChannel.h>
 #include <Databases/DatabaseMemory.h>
 #include <Storages/System/attachSystemTables.h>
-#include <Interpreters/Context.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/loadMetadata.h>
--- a/programs/local/LocalServer.h
+++ b/programs/local/LocalServer.h
@ -4,13 +4,12 @@
 #include <Poco/Util/Application.h>
 #include <memory>
 #include <loggers/Loggers.h>
+#include <Interpreters/Context.h>


 namespace DB
 {

-class Context;
-
 /// Lightweight Application for clickhouse-local
 /// No networking, no extra configs and working directories, no pid and status files, no dictionaries, no logging.
 /// Quiet mode by default
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@ -937,10 +937,10 @@ public:
        if (typeid_cast<const DataTypeFixedString *>(&data_type))
            return std::make_unique<FixedStringModel>(seed);

-        if (auto type = typeid_cast<const DataTypeArray *>(&data_type))
+        if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
            return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));

-        if (auto type = typeid_cast<const DataTypeNullable *>(&data_type))
+        if (const auto * type = typeid_cast<const DataTypeNullable *>(&data_type))
            return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params));

        throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED);
--- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp
+++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp
@ -24,8 +24,8 @@ namespace
        query.table_id.table_name = table_name;
        query.columns = std::make_shared<ASTExpressionList>(',');
        query.children.push_back(query.columns);
-        for (size_t i = 0; i < columns.size(); ++i)
-            query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(columns[i].name));
+        for (const auto & column : columns)
+            query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));

        std::stringstream ss;
        IAST::FormatSettings settings(ss, true);
--- a/programs/server/HTTPHandler.cpp
+++ b/programs/server/HTTPHandler.cpp
@ -195,7 +195,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
    std::vector<ReadBufferPtr> read_buffers;
    std::vector<ReadBuffer *> read_buffers_raw_ptr;

-    auto cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
+    auto * cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
    if (!cascade_buffer)
        throw Exception("Expected CascadeWriteBuffer", ErrorCodes::LOGICAL_ERROR);

@ -383,7 +383,7 @@ void HTTPHandler::processQuery(
        {
            auto push_memory_buffer_and_continue = [next_buffer = used_output.out_maybe_compressed] (const WriteBufferPtr & prev_buf)
            {
-                auto prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
+                auto * prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
                if (!prev_memory_buffer)
                    throw Exception("Expected MemoryWriteBuffer", ErrorCodes::LOGICAL_ERROR);

--- a/programs/server/HTTPHandlerFactory.cpp
+++ b/programs/server/HTTPHandlerFactory.cpp
@ -28,7 +28,7 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string &
 {
 }

-Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request) // override
+Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
 {
    LOG_TRACE(log, "HTTP Request for " << name << ". "
        << "Method: " << request.getMethod()
@ -40,7 +40,7 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand

    for (auto & handler_factory : child_factories)
    {
-        auto handler = handler_factory->createRequestHandler(request);
+        auto * handler = handler_factory->createRequestHandler(request);
        if (handler != nullptr)
            return handler;
    }
@ -72,80 +72,98 @@ HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler

 static inline auto createHandlersFactoryFromConfig(IServer & server, const std::string & name, const String & prefix)
 {
-    auto main_handler_factory = new HTTPRequestHandlerFactoryMain(name);
+    auto main_handler_factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);

-    try
+    Poco::Util::AbstractConfiguration::Keys keys;
+    server.config().keys(prefix, keys);
+
+    for (const auto & key : keys)
    {
-        Poco::Util::AbstractConfiguration::Keys keys;
-        server.config().keys(prefix, keys);
+        if (!startsWith(key, "rule"))
+            throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);

-        for (const auto & key : keys)
-        {
-            if (!startsWith(key, "rule"))
-                throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+        const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");

-            const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");
-
-            if (handler_type == "static")
-                main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
-            else if (handler_type == "dynamic_query_handler")
-                main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
-            else if (handler_type == "predefined_query_handler")
-                main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
-            else if (handler_type.empty())
-                throw Exception("Handler type in config is not specified here: " +
-                                prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
-            else
-                throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
-                                prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
-        }
-
-        return main_handler_factory;
-    }
-    catch (...)
-    {
-        delete main_handler_factory;
-        throw;
+        if (handler_type == "static")
+            main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
+        else if (handler_type == "dynamic_query_handler")
+            main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
+        else if (handler_type == "predefined_query_handler")
+            main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
+        else if (handler_type.empty())
+            throw Exception("Handler type in config is not specified here: " +
+                            prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
+        else
+            throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
+                            prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
    }
+
+    return main_handler_factory.release();
 }

 static const auto ping_response_expression = "Ok.\n";
 static const auto root_response_expression = "config://http_server_default_response";

-static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
+static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(
+    IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
 {
    if (server.config().has("http_handlers"))
        return createHandlersFactoryFromConfig(server, name, "http_handlers");
    else
    {
-        auto factory = (new HTTPRequestHandlerFactoryMain(name))
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
-                ->attachStrictPath("/")->allowGetAndHeadRequest())
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
-                ->attachStrictPath("/ping")->allowGetAndHeadRequest())
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
-                ->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
-            ->addHandler((new HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>(server, "query"))->allowPostAndGetParamsRequest());
+        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);

+        auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
+        root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
+        factory->addHandler(root_handler.release());
+
+        auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
+        ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
+        factory->addHandler(ping_handler.release());
+
+        auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+        replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
+        factory->addHandler(replicas_status_handler.release());
+
+        auto query_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
+        query_handler->allowPostAndGetParamsRequest();
+        factory->addHandler(query_handler.release());
+
+        /// We check that prometheus handler will be served on current (default) port.
+        /// Otherwise it will be created separately, see below.
        if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0)
-            factory->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
-                server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
-                    ->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
+        {
+            auto prometheus_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+                server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
+            prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
+            factory->addHandler(prometheus_handler.release());
+        }

-        return factory;
+        return factory.release();
    }
 }

 static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
 {
-    return (new HTTPRequestHandlerFactoryMain(name))
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
-            ->attachStrictPath("/")->allowGetAndHeadRequest())
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
-            ->attachStrictPath("/ping")->allowGetAndHeadRequest())
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
-            ->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
-        ->addHandler((new HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>(server))->allowPostAndGetParamsRequest());
+    auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+
+    auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
+    root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
+    factory->addHandler(root_handler.release());
+
+    auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
+    ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
+    factory->addHandler(ping_handler.release());
+
+    auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+    replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
+    factory->addHandler(replicas_status_handler.release());
+
+    auto main_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
+    main_handler->allowPostAndGetParamsRequest();
+    factory->addHandler(main_handler.release());
+
+    return factory.release();
 }

 Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
@ -155,9 +173,14 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As
    else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory")
        return createInterserverHTTPHandlerFactory(server, name);
    else if (name == "PrometheusHandler-factory")
-        return (new HTTPRequestHandlerFactoryMain(name))->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
-            server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
-                ->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
+    {
+        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+        auto handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+            server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
+        handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
+        factory->addHandler(handler.release());
+        return factory.release();
+    }

    throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR);
 }
--- a/programs/server/ReplicasStatusHandler.cpp
+++ b/programs/server/ReplicasStatusHandler.cpp
@ -46,7 +46,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request

            for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next())
            {
-                auto & table = iterator->table();
+                const auto & table = iterator->table();
                StorageReplicatedMergeTree * table_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get());

                if (!table_replicated)
--- a/programs/server/TCPHandler.cpp
+++ b/programs/server/TCPHandler.cpp
@ -28,7 +28,7 @@
 #include <Compression/CompressionFactory.h>
 #include <common/logger_useful.h>

-#include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/Executors/PullingAsyncPipelineExecutor.h>

 #include "TCPHandler.h"

@ -278,8 +278,11 @@ void TCPHandler::runImpl()
            sendLogs();
            sendEndOfStream();

-            query_scope.reset();
+            /// QueryState should be cleared before QueryScope, since otherwise
+            /// the MemoryTracker will be wrong for possible deallocations.
+            /// (i.e. deallocations from the Aggregator with two-level aggregation)
            state.reset();
+            query_scope.reset();
        }
        catch (const Exception & e)
        {
@ -359,8 +362,11 @@ void TCPHandler::runImpl()

        try
        {
-            query_scope.reset();
+            /// QueryState should be cleared before QueryScope, since otherwise
+            /// the MemoryTracker will be wrong for possible deallocations.
+            /// (i.e. deallocations from the Aggregator with two-level aggregation)
            state.reset();
+            query_scope.reset();
        }
        catch (...)
        {
@ -560,7 +566,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
    }

    {
-        PullingPipelineExecutor executor(pipeline);
+        PullingAsyncPipelineExecutor executor(pipeline);
        CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};

        Block block;
--- a/src/AggregateFunctions/AggregateFunctionNull.h
+++ b/src/AggregateFunctions/AggregateFunctionNull.h
@ -4,6 +4,7 @@
 #include <AggregateFunctions/IAggregateFunction.h>
 #include <Columns/ColumnNullable.h>
 #include <Common/assert_cast.h>
+#include <Columns/ColumnsCommon.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
@ -53,13 +54,13 @@ protected:

    static void initFlag(AggregateDataPtr place) noexcept
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            place[0] = 0;
    }

    static void setFlag(AggregateDataPtr place) noexcept
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            place[0] = 1;
    }

@ -72,7 +73,7 @@ public:
    AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
        : IAggregateFunctionHelper<Derived>(arguments, params), nested_function{nested_function_}
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            prefix_size = nested_function->alignOfData();
        else
            prefix_size = 0;
@ -128,7 +129,7 @@ public:
    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
    {
        bool flag = getFlag(place);
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            writeBinary(flag, buf);
        if (flag)
            nested_function->serialize(nestedPlace(place), buf);
@ -137,7 +138,7 @@ public:
    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
    {
        bool flag = 1;
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
            readBinary(flag, buf);
        if (flag)
        {
@ -148,7 +149,7 @@ public:

    void insertResultInto(AggregateDataPtr place, IColumn & to) const override
    {
-        if (result_is_nullable)
+        if constexpr (result_is_nullable)
        {
            ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
            if (getFlag(place))
@ -194,13 +195,26 @@ public:
    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
    {
        const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
+        const IColumn * nested_column = &column->getNestedColumn();
        if (!column->isNullAt(row_num))
        {
            this->setFlag(place);
-            const IColumn * nested_column = &column->getNestedColumn();
            this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
        }
    }
+
+    void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
+    {
+        const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
+        const IColumn * nested_column = &column->getNestedColumn();
+        const UInt8 * null_map = column->getNullMapData().data();
+
+        this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), &nested_column, null_map, arena);
+
+        if constexpr (result_is_nullable)
+            if (!memoryIsByte(null_map, batch_size, 1))
+                this->setFlag(place);
+    }
 };


--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@ -20,11 +20,72 @@ struct AggregateFunctionSumData
 {
    T sum{};

-    void add(T value)
+    void ALWAYS_INLINE add(T value)
    {
        sum += value;
    }

+    /// Vectorized version
+    template <typename Value>
+    void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
+    {
+        /// Compiler cannot unroll this loop, do it manually.
+        /// (at least for floats, most likely due to the lack of -fassociative-math)
+
+        /// Something around the number of SSE registers * the number of elements fit in register.
+        constexpr size_t unroll_count = 128 / sizeof(T);
+        T partial_sums[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                partial_sums[i] += ptr[i];
+            ptr += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            sum += partial_sums[i];
+
+        while (ptr < end)
+        {
+            sum += *ptr;
+            ++ptr;
+        }
+    }
+
+    template <typename Value>
+    void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
+    {
+        constexpr size_t unroll_count = 128 / sizeof(T);
+        T partial_sums[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                if (!null_map[i])
+                    partial_sums[i] += ptr[i];
+            ptr += unroll_count;
+            null_map += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            sum += partial_sums[i];
+
+        while (ptr < end)
+        {
+            if (!*null_map)
+                sum += *ptr;
+            ++ptr;
+            ++null_map;
+        }
+    }
+
    void merge(const AggregateFunctionSumData & rhs)
    {
        sum += rhs.sum;
@ -55,21 +116,95 @@ struct AggregateFunctionSumKahanData
    T sum{};
    T compensation{};

-    void add(T value)
+    template <typename Value>
+    void ALWAYS_INLINE addImpl(Value value, T & out_sum, T & out_compensation)
    {
-        auto compensated_value = value - compensation;
-        auto new_sum = sum + compensated_value;
-        compensation = (new_sum - sum) - compensated_value;
-        sum = new_sum;
+        auto compensated_value = value - out_compensation;
+        auto new_sum = out_sum + compensated_value;
+        out_compensation = (new_sum - out_sum) - compensated_value;
+        out_sum = new_sum;
+    }
+
+    void ALWAYS_INLINE add(T value)
+    {
+        addImpl(value, sum, compensation);
+    }
+
+    /// Vectorized version
+    template <typename Value>
+    void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
+    {
+        /// Less than in ordinary sum, because the algorithm is more complicated and too large loop unrolling is questionable.
+        /// But this is just a guess.
+        constexpr size_t unroll_count = 4;
+        T partial_sums[unroll_count]{};
+        T partial_compensations[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
+            ptr += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
+
+        while (ptr < end)
+        {
+            addImpl(*ptr, sum, compensation);
+            ++ptr;
+        }
+    }
+
+    template <typename Value>
+    void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
+    {
+        constexpr size_t unroll_count = 4;
+        T partial_sums[unroll_count]{};
+        T partial_compensations[unroll_count]{};
+
+        const auto * end = ptr + count;
+        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+        while (ptr < unrolled_end)
+        {
+            for (size_t i = 0; i < unroll_count; ++i)
+                if (!null_map[i])
+                    addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
+            ptr += unroll_count;
+            null_map += unroll_count;
+        }
+
+        for (size_t i = 0; i < unroll_count; ++i)
+            mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
+
+        while (ptr < end)
+        {
+            if (!*null_map)
+                addImpl(*ptr, sum, compensation);
+            ++ptr;
+            ++null_map;
+        }
+    }
+
+    void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation)
+    {
+        auto raw_sum = to_sum + from_sum;
+        auto rhs_compensated = raw_sum - to_sum;
+        /// Kahan summation is tricky because it depends on non-associativity of float arithmetic.
+        /// Do not simplify this expression if you are not sure.
+        auto compensations = ((from_sum - rhs_compensated) + (to_sum - (raw_sum - rhs_compensated))) + compensation + from_compensation;
+        to_sum = raw_sum + compensations;
+        to_compensation = compensations - (to_sum - raw_sum);
    }

    void merge(const AggregateFunctionSumKahanData & rhs)
    {
-        auto raw_sum = sum + rhs.sum;
-        auto rhs_compensated = raw_sum - sum;
-        auto compensations = ((rhs.sum - rhs_compensated) + (sum - (raw_sum - rhs_compensated))) + compensation + rhs.compensation;
-        sum = raw_sum + compensations;
-        compensation = compensations - (sum - raw_sum);
+        mergeImpl(sum, compensation, rhs.sum, rhs.compensation);
    }

    void write(WriteBuffer & buf) const
@ -141,6 +276,20 @@ public:
        this->data(place).add(column.getData()[row_num]);
    }

+    /// Vectorized version when there is no GROUP BY keys.
+    void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *) const override
+    {
+        const auto & column = static_cast<const ColVecType &>(*columns[0]);
+        this->data(place).addMany(column.getData().data(), batch_size);
+    }
+
+    void addBatchSinglePlaceNotNull(
+        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *) const override
+    {
+        const auto & column = static_cast<const ColVecType &>(*columns[0]);
+        this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size);
+    }
+
    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
    {
        this->data(place).merge(this->data(rhs));
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -145,6 +145,11 @@ public:
      */
    virtual void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;

+    /** The same for single place when need to aggregate only filtered data.
+      */
+    virtual void addBatchSinglePlaceNotNull(
+        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0;
+
    /** In addition to addBatch, this method collects multiple rows of arguments into array "places"
      *  as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
      *  -Array combinator. It might also be used generally to break data dependency when array
@ -201,6 +206,14 @@ public:
            static_cast<const Derived *>(this)->add(place, columns, i, arena);
    }

+    void addBatchSinglePlaceNotNull(
+        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const override
+    {
+        for (size_t i = 0; i < batch_size; ++i)
+            if (!null_map[i])
+                static_cast<const Derived *>(this)->add(place, columns, i, arena);
+    }
+
    void addBatchArray(
        size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
        const override
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@ -50,6 +50,8 @@ class Connection;
 using ConnectionPtr = std::shared_ptr<Connection>;
 using Connections = std::vector<ConnectionPtr>;

+using Scalars = std::map<String, Block>;
+

 /// Packet that could be received from server.
 struct Packet
--- a/src/Common/tests/cow_columns.cpp
+++ b/src/Common/tests/cow_columns.cpp
@ -56,8 +56,8 @@ int main(int, char **)
        MutableColumnPtr mut = IColumn::mutate(std::move(y));
        mut->set(2);

-        std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
-        std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
+        std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
+        std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n";
        y = std::move(mut);
    }

@ -75,8 +75,8 @@ int main(int, char **)
        MutableColumnPtr mut = IColumn::mutate(std::move(y));
        mut->set(3);

-        std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
-        std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
+        std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
+        std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n";
        y = std::move(mut);
    }

--- a/src/Common/tests/cow_compositions.cpp
+++ b/src/Common/tests/cow_compositions.cpp
@ -75,8 +75,8 @@ int main(int, char **)
        MutableColumnPtr mut = IColumn::mutate(std::move(y));
        mut->set(2);

-        std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
-        std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
+        std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
+        std::cerr << "addresses: " << x.get() << ", " << mut.get() << "\n";
        y = std::move(mut);
    }

@ -94,8 +94,8 @@ int main(int, char **)
        MutableColumnPtr mut = IColumn::mutate(std::move(y));
        mut->set(3);

-        std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n";
-        std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n";
+        std::cerr << "refcounts: " << x->use_count() << ", " << mut->use_count() << "\n";
+        std::cerr << "addresses: " << x.get() << ", " << ", " << mut.get() << "\n";
        y = std::move(mut);
    }

--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -52,6 +52,8 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingUInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
    M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
    M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
+    M(SettingUInt64, min_insert_block_size_rows_for_materialized_views, 0, "Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows)", 0) \
+    M(SettingUInt64, min_insert_block_size_bytes_for_materialized_views, 0, "Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes)", 0) \
    M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
    M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
    M(SettingUInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
@ -421,6 +423,7 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingBool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \
    M(SettingBool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \
    M(SettingSeconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \
+    M(SettingBool, materialize_ttl_after_modify, true, "Apply TTL for old data, after ALTER MODIFY TTL query", 0) \
    \
    /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
    \
--- a/src/DataStreams/AddingDefaultBlockOutputStream.h
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.h
@ -3,12 +3,12 @@
 #include <DataStreams/IBlockOutputStream.h>
 #include <Columns/ColumnConst.h>
 #include <Storages/ColumnDefault.h>
-#include <Interpreters/Context.h>


 namespace DB
 {

+class Context;

 /** This stream adds three types of columns into block
  * 1. Columns, that are missed inside request, but present in table without defaults (missed columns)
--- a/src/DataStreams/AddingDefaultsBlockInputStream.h
+++ b/src/DataStreams/AddingDefaultsBlockInputStream.h
@ -2,12 +2,13 @@

 #include <DataStreams/IBlockInputStream.h>
 #include <Storages/ColumnDefault.h>
-#include <Interpreters/Context.h>


 namespace DB
 {

+class Context;
+
 /// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream.
 class AddingDefaultsBlockInputStream : public IBlockInputStream
 {
--- a/src/DataStreams/BlockIO.cpp
+++ b/src/DataStreams/BlockIO.cpp
@ -1,9 +1,31 @@
 #include <DataStreams/BlockIO.h>
 #include <Interpreters/ProcessList.h>
+#include <Processors/Executors/PipelineExecutingBlockInputStream.h>

 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+BlockInputStreamPtr BlockIO::getInputStream()
+{
+    if (out)
+        throw Exception("Cannot get input stream from BlockIO because output stream is not empty",
+                        ErrorCodes::LOGICAL_ERROR);
+
+    if (in)
+        return in;
+
+    if (pipeline.initialized())
+        return std::make_shared<PipelineExecutingBlockInputStream>(std::move(pipeline));
+
+    throw Exception("Cannot get input stream from BlockIO because query pipeline was not initialized",
+                    ErrorCodes::LOGICAL_ERROR);
+}
+
 void BlockIO::reset()
 {
    /** process_list_entry should be destroyed after in, after out and after pipeline,
--- a/src/DataStreams/BlockIO.h
+++ b/src/DataStreams/BlockIO.h
@ -50,6 +50,9 @@ struct BlockIO
            exception_callback();
    }

+    /// Returns in or converts pipeline to stream. Throws if out is not empty.
+    BlockInputStreamPtr getInputStream();
+
 private:
    void reset();
 };
--- a/src/DataStreams/CreatingSetsBlockInputStream.cpp
+++ b/src/DataStreams/CreatingSetsBlockInputStream.cpp
@ -1,4 +1,5 @@
 #include <Interpreters/Set.h>
+#include <Interpreters/Context.h>
 #include <DataStreams/materializeBlock.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <DataStreams/CreatingSetsBlockInputStream.h>
--- a/src/DataStreams/ParallelParsingBlockInputStream.h
+++ b/src/DataStreams/ParallelParsingBlockInputStream.h
@ -8,7 +8,6 @@
 #include <IO/ReadBuffer.h>
 #include <Processors/Formats/IRowInputFormat.h>
 #include <Processors/Formats/InputStreamFromInputFormat.h>
-#include <Interpreters/Context.h>

 namespace DB
 {
--- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@ -5,6 +5,7 @@
 #include <DataTypes/NestedUtils.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterInsertQuery.h>
+#include <Interpreters/Context.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Common/CurrentThread.h>
 #include <Common/setThreadName.h>
@ -40,10 +41,20 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
    /// We need special context for materialized views insertions
    if (!dependencies.empty())
    {
-        views_context = std::make_unique<Context>(context);
+        select_context = std::make_unique<Context>(context);
+        insert_context = std::make_unique<Context>(context);
+
+        const auto & insert_settings = insert_context->getSettingsRef();
+
        // Do not deduplicate insertions into MV if the main insertion is Ok
        if (disable_deduplication_for_children)
-            views_context->setSetting("insert_deduplicate", false);
+            insert_context->setSetting("insert_deduplicate", false);
+
+        // Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
+        if (insert_settings.min_insert_block_size_rows_for_materialized_views.changed)
+            insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value);
+        if (insert_settings.min_insert_block_size_bytes_for_materialized_views.changed)
+            insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value);
    }

    for (const auto & database_table : dependencies)
@ -67,7 +78,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
            insert->table_id = inner_table_id;

            /// Get list of columns we get from select query.
-            auto header = InterpreterSelectQuery(query, *views_context, SelectQueryOptions().analyze())
+            auto header = InterpreterSelectQuery(query, *select_context, SelectQueryOptions().analyze())
                    .getSampleBlock();

            /// Insert only columns returned by select.
@ -81,14 +92,14 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
            insert->columns = std::move(list);

            ASTPtr insert_query_ptr(insert.release());
-            InterpreterInsertQuery interpreter(insert_query_ptr, *views_context);
+            InterpreterInsertQuery interpreter(insert_query_ptr, *insert_context);
            BlockIO io = interpreter.execute();
            out = io.out;
        }
        else if (dynamic_cast<const StorageLiveView *>(dependent_table.get()))
-            out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *views_context, ASTPtr(), true);
+            out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr(), true);
        else
-            out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *views_context, ASTPtr());
+            out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr());

        views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr});
    }
@ -258,7 +269,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n
            ///  but it will contain single block (that is INSERT-ed into main table).
            /// InterpreterSelectQuery will do processing of alias columns.

-            Context local_context = *views_context;
+            Context local_context = *select_context;
            local_context.addViewSource(
                StorageValues::create(
                    storage->getStorageID(), storage->getColumns(), block, storage->getVirtuals()));
--- a/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.h
@ -44,7 +44,8 @@ private:
    };

    std::vector<ViewInfo> views;
-    std::unique_ptr<Context> views_context;
+    std::unique_ptr<Context> select_context;
+    std::unique_ptr<Context> insert_context;

    void process(const Block & block, size_t view_num);
 };
--- a/src/DataStreams/RemoteBlockOutputStream.h
+++ b/src/DataStreams/RemoteBlockOutputStream.h
@ -4,6 +4,7 @@
 #include <DataStreams/IBlockOutputStream.h>
 #include <Common/Throttler.h>
 #include <IO/ConnectionTimeouts.h>
+#include <Interpreters/ClientInfo.h>


 namespace DB
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@ -34,22 +34,25 @@ TTLBlockInputStream::TTLBlockInputStream(

    const auto & storage_columns = storage.getColumns();
    const auto & column_defaults = storage_columns.getDefaults();
+
    ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
+    for (const auto & [name, _] : storage.column_ttl_entries_by_name)
+    {
+        auto it = column_defaults.find(name);
+        if (it != column_defaults.end())
+        {
+            auto column = storage_columns.get(name);
+            auto expression = it->second.expression->clone();
+            default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first));
+        }
+    }
+
    for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
    {
        if (force || isTTLExpired(ttl_info.min))
        {
            new_ttl_infos.columns_ttl.emplace(name, IMergeTreeDataPart::TTLInfo{});
            empty_columns.emplace(name);
-
-            auto it = column_defaults.find(name);
-
-            if (it != column_defaults.end())
-            {
-                auto column = storage_columns.get(name);
-                auto expression = it->second.expression->clone();
-                default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first));
-            }
        }
        else
            new_ttl_infos.columns_ttl.emplace(name, ttl_info);
--- a/src/DataTypes/DataTypeString.cpp
+++ b/src/DataTypes/DataTypeString.cpp
@ -376,8 +376,10 @@ void registerDataTypeString(DataTypeFactory & factory)
    /// These synonyms are added for compatibility.

    factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive);
+    factory.registerAlias("NCHAR", "String", DataTypeFactory::CaseInsensitive);
    factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive);
    factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive);
+    factory.registerAlias("NVARCHAR", "String", DataTypeFactory::CaseInsensitive);
    factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle
    factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive);
    factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive);
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@ -8,6 +8,7 @@
 #include <Parsers/formatAST.h>
 #include <Common/renameat2.h>
 #include <Storages/StorageMaterializedView.h>
+#include <Interpreters/Context.h>
 #include <filesystem>


--- a/src/Databases/DatabaseLazy.h
+++ b/src/Databases/DatabaseLazy.h
@ -1,7 +1,6 @@
 #pragma once

 #include <Databases/DatabaseOnDisk.h>
-#include <Interpreters/Context.h>
 #include <Parsers/ASTCreateQuery.h>


@ -10,6 +9,7 @@ namespace DB


 class DatabaseLazyIterator;
+class Context;

 /** Lazy engine of databases.
  * Works like DatabaseOrdinary, but stores in memory only cache.
--- a/src/Databases/DatabaseMemory.cpp
+++ b/src/Databases/DatabaseMemory.cpp
@ -5,6 +5,7 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Storages/IStorage.h>
 #include <Poco/File.h>
+#include <filesystem>


 namespace DB
@ -84,4 +85,10 @@ UUID DatabaseMemory::tryGetTableUUID(const String & table_name) const
    return UUIDHelpers::Nil;
 }

+void DatabaseMemory::drop(const Context & context)
+{
+    /// Remove data on explicit DROP DATABASE
+    std::filesystem::remove_all(context.getPath() + data_path);
+}
+
 }
--- a/src/Databases/DatabaseMemory.h
+++ b/src/Databases/DatabaseMemory.h
@ -46,6 +46,8 @@ public:

    UUID tryGetTableUUID(const String & table_name) const override;

+    void drop(const Context & context) override;
+
 private:
    String data_path;
    using NameToASTCreate = std::unordered_map<String, ASTPtr>;
--- a/src/Databases/DatabaseMySQL.h
+++ b/src/Databases/DatabaseMySQL.h
@ -5,14 +5,16 @@

 #include <mysqlxx/Pool.h>
 #include <Databases/DatabasesCommon.h>
-#include <Interpreters/Context.h>
 #include <memory>
 #include <Parsers/ASTCreateQuery.h>
+#include <Common/ThreadPool.h>


 namespace DB
 {

+class Context;
+
 /** Real-time access to table list and table structure from remote MySQL
 *  It doesn't make any manipulations with filesystem.
 *  All tables are created by calling code after real-time pull-out structure from remote MySQL
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@ -3,7 +3,6 @@
 #include <Common/escapeForFileName.h>
 #include <Common/quoteString.h>
 #include <Databases/DatabasesCommon.h>
-#include <Interpreters/Context.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Storages/IStorage.h>

@ -11,6 +10,8 @@
 namespace DB
 {

+class Context;
+
 std::pair<String, StoragePtr> createTableFromAST(
    ASTCreateQuery ast_create_query,
    const String & database_name,
--- a/src/Databases/DatabaseWithDictionaries.h
+++ b/src/Databases/DatabaseWithDictionaries.h
@ -5,6 +5,9 @@
 namespace DB
 {

+class Context;
+class ExternalDictionariesLoader;
+

 class DatabaseWithDictionaries : public DatabaseOnDisk
 {
--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@ -131,10 +131,10 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadAll()
      */
    if (is_local)
    {
-        BlockIO res = executeQuery(load_all_query, context, true, QueryProcessingStage::Complete, false, false);
+        auto stream = executeQuery(load_all_query, context, true).getInputStream();
        /// FIXME res.in may implicitly use some objects owned be res, but them will be destructed after return
-        res.in = std::make_shared<ConvertingBlockInputStream>(res.in, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position);
-        return res.in;
+        stream = std::make_shared<ConvertingBlockInputStream>(stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position);
+        return stream;
    }
    return std::make_shared<RemoteBlockInputStream>(pool, load_all_query, sample_block, context);
 }
@ -144,9 +144,9 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadUpdatedAll()
    std::string load_update_query = getUpdateFieldAndDate();
    if (is_local)
    {
-        auto res = executeQuery(load_update_query, context, true, QueryProcessingStage::Complete, false, false);
-        res.in = std::make_shared<ConvertingBlockInputStream>(res.in, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position);
-        return res.in;
+        auto stream = executeQuery(load_update_query, context, true).getInputStream();
+        stream = std::make_shared<ConvertingBlockInputStream>(stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position);
+        return stream;
    }
    return std::make_shared<RemoteBlockInputStream>(pool, load_update_query, sample_block, context);
 }
@ -191,10 +191,10 @@ BlockInputStreamPtr ClickHouseDictionarySource::createStreamForSelectiveLoad(con
 {
    if (is_local)
    {
-        auto res = executeQuery(query, context, true, QueryProcessingStage::Complete, false, false);
-        res.in = std::make_shared<ConvertingBlockInputStream>(
-            res.in, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position);
-        return res.in;
+        auto res = executeQuery(query, context, true).getInputStream();
+        res = std::make_shared<ConvertingBlockInputStream>(
+            res, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Position);
+        return res;
    }

    return std::make_shared<RemoteBlockInputStream>(pool, query, sample_block, context);
@ -206,8 +206,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
    if (is_local)
    {
        Context query_context = context;
-        auto input_block = executeQuery(request, query_context, true,
-                                        QueryProcessingStage::Complete, false, false).in;
+        auto input_block = executeQuery(request, query_context, true).getInputStream();
        return readInvalidateQuery(*input_block);
    }
    else
--- a/src/Dictionaries/DictionaryFactory.cpp
+++ b/src/Dictionaries/DictionaryFactory.cpp
@ -5,6 +5,8 @@
 #include "DictionaryStructure.h"
 #include "getDictionaryConfigurationFromAST.h"

+#include <common/logger_useful.h>
+
 namespace DB
 {
 namespace ErrorCodes
@ -41,6 +43,9 @@ DictionaryPtr DictionaryFactory::create(
    const DictionaryStructure dict_struct{config, config_prefix + ".structure"};

    DictionarySourcePtr source_ptr = DictionarySourceFactory::instance().create(name, config, config_prefix + ".source", dict_struct, context, check_source_config);
+    LOG_TRACE(&Poco::Logger::get("DictionaryFactory"),
+        "Created dictionary source '" << source_ptr->toString()
+        << "' for dictionary '" << name << "'");

    const auto & layout_type = keys.front();

--- a/src/Dictionaries/MongoDBDictionarySource.cpp
+++ b/src/Dictionaries/MongoDBDictionarySource.cpp
@ -8,25 +8,40 @@ namespace DB

 void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
 {
-    auto create_table_source = [=](const DictionaryStructure & dict_struct,
-                                   const Poco::Util::AbstractConfiguration & config,
-                                   const std::string & config_prefix,
-                                   Block & sample_block,
-                                   const Context & /* context */,
-                                   bool /* check_config */) -> DictionarySourcePtr {
-        return std::make_unique<MongoDBDictionarySource>(dict_struct, config, config_prefix + ".mongodb", sample_block);
+    auto create_mongo_db_dictionary = [](
+        const DictionaryStructure & dict_struct,
+        const Poco::Util::AbstractConfiguration & config,
+        const std::string & root_config_prefix,
+        Block & sample_block,
+        const Context &,
+        bool /* check_config */)
+    {
+        const auto config_prefix = root_config_prefix + ".mongodb";
+        return std::make_unique<MongoDBDictionarySource>(dict_struct,
+            config.getString(config_prefix + ".uri", ""),
+            config.getString(config_prefix + ".host", ""),
+            config.getUInt(config_prefix + ".port", 0),
+            config.getString(config_prefix + ".user", ""),
+            config.getString(config_prefix + ".password", ""),
+            config.getString(config_prefix + ".method", ""),
+            config.getString(config_prefix + ".db", ""),
+            config.getString(config_prefix + ".collection"),
+            sample_block);
    };
-    factory.registerSource("mongodb", create_table_source);
+
+    factory.registerSource("mongodb", create_mongo_db_dictionary);
 }

 }


+#include <common/logger_useful.h>
 #include <Poco/MongoDB/Array.h>
 #include <Poco/MongoDB/Connection.h>
 #include <Poco/MongoDB/Cursor.h>
 #include <Poco/MongoDB/Database.h>
 #include <Poco/MongoDB/ObjectId.h>
+#include <Poco/URI.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Poco/Version.h>

@ -155,6 +170,7 @@ authenticate(Poco::MongoDB::Connection & connection, const std::string & databas

 MongoDBDictionarySource::MongoDBDictionarySource(
    const DictionaryStructure & dict_struct_,
+    const std::string & uri_,
    const std::string & host_,
    UInt16 port_,
    const std::string & user_,
@ -164,6 +180,7 @@ MongoDBDictionarySource::MongoDBDictionarySource(
    const std::string & collection_,
    const Block & sample_block_)
    : dict_struct{dict_struct_}
+    , uri{uri_}
    , host{host_}
    , port{port_}
    , user{user_}
@ -172,43 +189,56 @@ MongoDBDictionarySource::MongoDBDictionarySource(
    , db{db_}
    , collection{collection_}
    , sample_block{sample_block_}
-    , connection{std::make_shared<Poco::MongoDB::Connection>(host, port)}
+    , connection{std::make_shared<Poco::MongoDB::Connection>()}
 {
-    if (!user.empty())
+    if (!uri.empty())
    {
-#if POCO_VERSION >= 0x01070800
-        Poco::MongoDB::Database poco_db(db);
-        if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method))
-            throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
-#else
-        authenticate(*connection, db, user, password);
-#endif
+        Poco::URI poco_uri(uri);
+
+        // Parse database from URI. This is required for correctness -- the
+        // cursor is created using database name and colleciton name, so we have
+        // to specify them properly.
+        db = poco_uri.getPath();
+        // getPath() may return a leading slash, remove it.
+        if (!db.empty() && db[0] == '/')
+        {
+            db.erase(0, 1);
+        }
+
+        // Parse some other parts from URI, for logging and display purposes.
+        host = poco_uri.getHost();
+        port = poco_uri.getPort();
+        user = poco_uri.getUserInfo();
+        if (size_t separator = user.find(':'); separator != std::string::npos)
+        {
+            user.resize(separator);
+        }
+
+        // Connect with URI.
+        Poco::MongoDB::Connection::SocketFactory socket_factory;
+        connection->connect(uri, socket_factory);
+    }
+    else
+    {
+        // Connect with host/port/user/etc.
+        connection->connect(host, port);
+        if (!user.empty())
+        {
+#if POCO_VERSION >= 0x01070800
+            Poco::MongoDB::Database poco_db(db);
+            if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method))
+                throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
+#else
+            authenticate(*connection, db, user, password);
+#endif
+        }
    }
-}
-
-
-MongoDBDictionarySource::MongoDBDictionarySource(
-    const DictionaryStructure & dict_struct_,
-    const Poco::Util::AbstractConfiguration & config,
-    const std::string & config_prefix,
-    Block & sample_block_)
-    : MongoDBDictionarySource(
-        dict_struct_,
-        config.getString(config_prefix + ".host"),
-        config.getUInt(config_prefix + ".port"),
-        config.getString(config_prefix + ".user", ""),
-        config.getString(config_prefix + ".password", ""),
-        config.getString(config_prefix + ".method", ""),
-        config.getString(config_prefix + ".db", ""),
-        config.getString(config_prefix + ".collection"),
-        sample_block_)
-{
 }


 MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other)
    : MongoDBDictionarySource{
-        other.dict_struct, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block}
+        other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block}
 {
 }

--- a/src/Dictionaries/MongoDBDictionarySource.h
+++ b/src/Dictionaries/MongoDBDictionarySource.h
@ -29,8 +29,10 @@ namespace ErrorCodes
 /// Allows loading dictionaries from a MongoDB collection
 class MongoDBDictionarySource final : public IDictionarySource
 {
+public:
    MongoDBDictionarySource(
        const DictionaryStructure & dict_struct_,
+        const std::string & uri_,
        const std::string & host_,
        UInt16 port_,
        const std::string & user_,
@ -40,13 +42,6 @@ class MongoDBDictionarySource final : public IDictionarySource
        const std::string & collection_,
        const Block & sample_block_);

-public:
-    MongoDBDictionarySource(
-        const DictionaryStructure & dict_struct,
-        const Poco::Util::AbstractConfiguration & config,
-        const std::string & config_prefix,
-        Block & sample_block);
-
    MongoDBDictionarySource(const MongoDBDictionarySource & other);

    ~MongoDBDictionarySource() override;
@ -76,12 +71,13 @@ public:

 private:
    const DictionaryStructure dict_struct;
-    const std::string host;
-    const UInt16 port;
-    const std::string user;
+    const std::string uri;
+    std::string host;
+    UInt16 port;
+    std::string user;
    const std::string password;
    const std::string method;
-    const std::string db;
+    std::string db;
    const std::string collection;
    Block sample_block;

--- a/src/Disks/DiskSelector.cpp
+++ b/src/Disks/DiskSelector.cpp
@ -5,6 +5,7 @@
 #include <Common/escapeForFileName.h>
 #include <Common/quoteString.h>
 #include <common/logger_useful.h>
+#include <Interpreters/Context.h>

 #include <set>

--- a/src/Disks/DiskSelector.h
+++ b/src/Disks/DiskSelector.h
@ -1,6 +1,5 @@
 #pragma once

-#include <Interpreters/Context.h>
 #include <Disks/DiskFactory.h>
 #include <Disks/IDisk.h>

@ -10,6 +9,7 @@

 namespace DB
 {
+class Context;
 class DiskSelector;
 using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;

--- a/src/Disks/S3/ProxyResolverConfiguration.cpp
+++ b/src/Disks/S3/ProxyResolverConfiguration.cpp
@ -7,6 +7,11 @@
 #include <Poco/Net/HTTPResponse.h>
 #include <common/logger_useful.h>

+namespace DB::ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 namespace DB::S3
 {
 ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
@ -30,13 +35,16 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
    Aws::Client::ClientConfigurationPerRequest cfg;
    try
    {
-        /// It should be just empty GET / request.
-        Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_1_1);
+        /// It should be just empty GET request.
+        Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
        session->sendRequest(request);

        Poco::Net::HTTPResponse response;
        auto & response_body_stream = session->receiveResponse(response);

+        if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
+            throw Exception("Proxy resolver returned not OK status: " + response.getReason(), ErrorCodes::BAD_ARGUMENTS);
+
        String proxy_host;
        /// Read proxy host as string from response body.
        Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
--- a/src/Disks/S3/ProxyResolverConfiguration.h
+++ b/src/Disks/S3/ProxyResolverConfiguration.h
@ -6,7 +6,7 @@ namespace DB::S3
 {
 /**
 * Proxy configuration where proxy host is obtained each time from specified endpoint.
- * For each request to S3 it makes GET request to specified endpoint and reads proxy host from a response body.
+ * For each request to S3 it makes GET request to specified endpoint URL and reads proxy host from a response body.
 * Specified scheme and port added to obtained proxy host to form completed proxy URL.
 */
 class ProxyResolverConfiguration : public ProxyConfiguration
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@ -37,13 +37,14 @@ namespace

    void checkRemoveAccess(IDisk & disk) { disk.remove("test_acl"); }

-    std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(const Poco::Util::AbstractConfiguration * proxy_resolver_config)
+    std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
+        const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config)
    {
-        auto endpoint = Poco::URI(proxy_resolver_config->getString("endpoint"));
-        auto proxy_scheme = proxy_resolver_config->getString("proxy_scheme");
+        auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint"));
+        auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme");
        if (proxy_scheme != "http" && proxy_scheme != "https")
            throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
-        auto proxy_port = proxy_resolver_config->getUInt("proxy_port");
+        auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");

        LOG_DEBUG(
            &Logger::get("DiskS3"), "Configured proxy resolver: " << endpoint.toString() << ", Scheme: " << proxy_scheme << ", Port: " << proxy_port);
@ -51,16 +52,17 @@ namespace
        return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
    }

-    std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(const Poco::Util::AbstractConfiguration * proxy_config)
+    std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
+        const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config)
    {
        std::vector<String> keys;
-        proxy_config->keys(keys);
+        proxy_config.keys(prefix, keys);

        std::vector<Poco::URI> proxies;
        for (const auto & key : keys)
            if (startsWith(key, "uri"))
            {
-                Poco::URI proxy_uri(proxy_config->getString(key));
+                Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key));

                if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
                    throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS);
@ -78,25 +80,23 @@ namespace
        return nullptr;
    }

-    std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const Poco::Util::AbstractConfiguration * config)
+    std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config)
    {
-        if (!config->has("proxy"))
+        if (!config.has(prefix + ".proxy"))
            return nullptr;

-        const auto * proxy_config = config->createView("proxy");
-
        std::vector<String> config_keys;
-        proxy_config->keys(config_keys);
+        config.keys(prefix + ".proxy", config_keys);

        if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver"))
        {
            if (resolver_configs > 1)
                throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS);

-            return getProxyResolverConfiguration(proxy_config->createView("resolver"));
+            return getProxyResolverConfiguration(prefix + ".proxy.resolver", config);
        }

-        return getProxyListConfiguration(proxy_config);
+        return getProxyListConfiguration(prefix + ".proxy", config);
    }
 }

@ -107,27 +107,25 @@ void registerDiskS3(DiskFactory & factory)
                      const Poco::Util::AbstractConfiguration & config,
                      const String & config_prefix,
                      const Context & context) -> DiskPtr {
-        const auto * disk_config = config.createView(config_prefix);
-
        Poco::File disk{context.getPath() + "disks/" + name};
        disk.createDirectories();

        Aws::Client::ClientConfiguration cfg;

-        S3::URI uri(Poco::URI(disk_config->getString("endpoint")));
+        S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
        if (uri.key.back() != '/')
            throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);

        cfg.endpointOverride = uri.endpoint;

-        auto proxy_config = getProxyConfiguration(disk_config);
+        auto proxy_config = getProxyConfiguration(config_prefix, config);
        if (proxy_config)
            cfg.perRequestConfiguration = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };

        auto client = S3::ClientFactory::instance().create(
            cfg,
-            disk_config->getString("access_key_id", ""),
-            disk_config->getString("secret_access_key", ""));
+            config.getString(config_prefix + ".access_key_id", ""),
+            config.getString(config_prefix + ".secret_access_key", ""));

        String metadata_path = context.getPath() + "disks/" + name + "/";

--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -1110,6 +1110,8 @@ public:
        std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>> ||
        std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>;

+    static constexpr bool to_datetime64 = std::is_same_v<ToDataType, DataTypeDateTime64>;
+
    static FunctionPtr create(const Context &) { return std::make_shared<FunctionConvertFromString>(); }
    static FunctionPtr create() { return std::make_shared<FunctionConvertFromString>(); }

@ -1126,67 +1128,17 @@ public:

    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
    {
-        if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2))
-            throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) +
-                ", should be 1 or 2. Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-
-        if (!isStringOrFixedString(arguments[0].type))
-        {
-            if (this->getName().find("OrZero") != std::string::npos ||
-                this->getName().find("OrNull") != std::string::npos)
-                throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() +
-                        ". Conversion functions with postfix 'OrZero' or 'OrNull'  should take String argument",
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-            else
-                throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(),
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        }
-
-        if (arguments.size() == 2)
-        {
-            if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
-            {
-                if (!isString(arguments[1].type))
-                    throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(),
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-            }
-            else if constexpr (to_decimal)
-            {
-                if (!isInteger(arguments[1].type))
-                    throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(),
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-                if (!arguments[1].column)
-                    throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN);
-            }
-            else
-            {
-                throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
-                    + toString(arguments.size()) + ", should be 1. Second argument makes sense only for DateTime and Decimal.",
-                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-            }
-        }
-
        DataTypePtr res;
-
-        if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
-            res = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
-        else if constexpr (to_decimal)
+        if constexpr (to_datetime64)
        {
-            UInt64 scale = extractToDecimalScale(arguments[1]);
+            validateFunctionArgumentTypes(*this, arguments,
+                FunctionArgumentDescriptors{{"string", isStringOrFixedString, nullptr, "String or FixedString"}},
+                // optional
+                FunctionArgumentDescriptors{
+                    {"precision", isUInt8, isColumnConst, "const UInt8"},
+                    {"timezone", isStringOrFixedString, isColumnConst, "const String or FixedString"},
+                });

-            if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>>)
-                res = createDecimal<DataTypeDecimal>(9, scale);
-            else if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>>)
-                res = createDecimal<DataTypeDecimal>(18, scale);
-            else if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
-                res = createDecimal<DataTypeDecimal>(38, scale);
-
-            if (!res)
-                throw Exception("Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()", ErrorCodes::LOGICAL_ERROR);
-        }
-        else if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
-        {
            UInt64 scale = DataTypeDateTime64::default_scale;
            if (arguments.size() > 1)
                scale = extractToDecimalScale(arguments[1]);
@ -1194,7 +1146,67 @@ public:
            res = std::make_shared<DataTypeDateTime64>(scale, timezone);
        }
        else
-            res = std::make_shared<ToDataType>();
+        {
+            if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2))
+                throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) +
+                    ", should be 1 or 2. Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).",
+                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+            if (!isStringOrFixedString(arguments[0].type))
+            {
+                if (this->getName().find("OrZero") != std::string::npos ||
+                    this->getName().find("OrNull") != std::string::npos)
+                    throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() +
+                            ". Conversion functions with postfix 'OrZero' or 'OrNull'  should take String argument",
+                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                else
+                    throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(),
+                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            }
+
+            if (arguments.size() == 2)
+            {
+                if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
+                {
+                    if (!isString(arguments[1].type))
+                        throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(),
+                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                }
+                else if constexpr (to_decimal)
+                {
+                    if (!isInteger(arguments[1].type))
+                        throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(),
+                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                    if (!arguments[1].column)
+                        throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN);
+                }
+                else
+                {
+                    throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+                        + toString(arguments.size()) + ", should be 1. Second argument makes sense only for DateTime and Decimal.",
+                        ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                }
+            }
+
+            if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
+                res = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
+            else if constexpr (to_decimal)
+            {
+                UInt64 scale = extractToDecimalScale(arguments[1]);
+
+                if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>>)
+                    res = createDecimal<DataTypeDecimal>(9, scale);
+                else if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>>)
+                    res = createDecimal<DataTypeDecimal>(18, scale);
+                else if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
+                    res = createDecimal<DataTypeDecimal>(38, scale);
+
+                if (!res)
+                    throw Exception("Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()", ErrorCodes::LOGICAL_ERROR);
+            }
+            else
+                res = std::make_shared<ToDataType>();
+        }

        if constexpr (exception_mode == ConvertFromStringExceptionMode::Null)
            res = std::make_shared<DataTypeNullable>(res);
@ -1207,12 +1219,9 @@ public:
        const IDataType * from_type = block.getByPosition(arguments[0]).type.get();

        bool ok = true;
-        if constexpr (to_decimal || std::is_same_v<ToDataType, DataTypeDateTime64>)
+        if constexpr (to_decimal || to_datetime64)
        {
-            if (arguments.size() != 2)
-                throw Exception{"Function " + getName() + " expects 2 arguments for Decimal.", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
-
-            UInt32 scale = extractToDecimalScale(block.getByPosition(arguments[1]));
+            const UInt32 scale = assert_cast<const ToDataType &>(*removeNullable(block.getByPosition(result).type)).getScale();

            if (checkAndGetDataType<DataTypeString>(from_type))
            {
@ -1241,7 +1250,6 @@ public:
            }
            else
                ok = false;
-
        }

        if (!ok)
@ -1252,7 +1260,6 @@ public:
    }
 };

-
 /** Conversion to fixed string is implemented only for strings.
  */
 class FunctionToFixedString : public IFunction
--- a/src/Functions/addressToLine.cpp
+++ b/src/Functions/addressToLine.cpp
@ -118,9 +118,7 @@ private:
                writeChar(':', out);
                writeIntText(location.line, out);

-                StringRef out_str = out.finish();
-                out_str.data = arena.insert(out_str.data, out_str.size);
-                return out_str;
+                return out.finish();
            }
            else
            {
--- a/src/Functions/pointInPolygon.cpp
+++ b/src/Functions/pointInPolygon.cpp
@ -18,6 +18,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/ExpressionActions.h>
+#include <Interpreters/Context.h>

 #include <string>
 #include <memory>
@ -44,7 +45,7 @@ namespace
 {

 template <typename Polygon, typename PointInPolygonImpl>
-ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, Polygon & polygon)
+UInt8 callPointInPolygonImplWithPool(Float64 x, Float64 y, Polygon & polygon)
 {
    using Pool = ObjectPoolMap<PointInPolygonImpl, std::string>;
    /// C++11 has thread-safe function-local statics on most modern compilers.
@ -63,19 +64,19 @@ ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, P
    std::string serialized_polygon = serialize(polygon);
    auto impl = known_polygons.get(serialized_polygon, factory);

-    return pointInPolygon(x, y, *impl);
+    return impl->contains(x, y);
 }

 template <typename Polygon, typename PointInPolygonImpl>
-ColumnPtr callPointInPolygonImpl(const IColumn & x, const IColumn & y, Polygon & polygon)
+UInt8 callPointInPolygonImpl(Float64 x, Float64 y, Polygon & polygon)
 {
    PointInPolygonImpl impl(polygon);
-    return pointInPolygon(x, y, impl);
+    return impl.contains(x, y);
 }

 }

-template <typename PointInPolygonImpl, bool use_object_pool>
+template <typename PointInConstPolygonImpl, typename PointInNonConstPolygonImpl>
 class FunctionPointInPolygon : public IFunction
 {
 public:
@ -91,7 +92,8 @@ public:

    static FunctionPtr create(const Context & context)
    {
-        return std::make_shared<FunctionPointInPolygon<PointInPolygonImpl, use_object_pool>>(context.getSettingsRef().validate_polygons);
+        return std::make_shared<FunctionPointInPolygon<PointInConstPolygonImpl, PointInNonConstPolygonImpl>>(
+            context.getSettingsRef().validate_polygons);
    }

    String getName() const override
@ -116,74 +118,192 @@ public:
            throw Exception("Too few arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION);
        }

-        auto get_message_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); };
-
-        for (size_t i = 1; i < arguments.size(); ++i)
+        auto validate_tuple = [this](size_t i, const DataTypeTuple * tuple)
        {
-            const auto * array = checkAndGetDataType<DataTypeArray>(arguments[i].get());
-            if (array == nullptr && i != 1)
-                throw Exception(get_message_prefix(i) + " must be array of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-            const auto * tuple = checkAndGetDataType<DataTypeTuple>(array ? array->getNestedType().get() : arguments[i].get());
            if (tuple == nullptr)
-                throw Exception(get_message_prefix(i) + " must contains tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                throw Exception(getMessagePrefix(i) + " must contain a tuple", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

            const DataTypes & elements = tuple->getElements();

            if (elements.size() != 2)
-                throw Exception(get_message_prefix(i) + " must have exactly two elements.", ErrorCodes::BAD_ARGUMENTS);
+                throw Exception(getMessagePrefix(i) + " must have exactly two elements", ErrorCodes::BAD_ARGUMENTS);

            for (auto j : ext::range(0, elements.size()))
            {
                if (!isNativeNumber(elements[j]))
                {
-                    throw Exception(get_message_prefix(i) + " must contains numeric tuple at position " + toString(j + 1),
+                    throw Exception(getMessagePrefix(i) + " must contain numeric tuple at position " + toString(j + 1),
                                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
                }
            }
+        };
+
+        validate_tuple(0, checkAndGetDataType<DataTypeTuple>(arguments[0].get()));
+
+        if (arguments.size() == 2)
+        {
+            const auto * array = checkAndGetDataType<DataTypeArray>(arguments[1].get());
+            if (array == nullptr)
+                throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples.",
+                                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+            const auto * nested_array = checkAndGetDataType<DataTypeArray>(array->getNestedType().get());
+            if (nested_array != nullptr)
+            {
+                array = nested_array;
+            }
+
+            validate_tuple(1, checkAndGetDataType<DataTypeTuple>(array->getNestedType().get()));
+        }
+        else
+        {
+            for (size_t i = 1; i < arguments.size(); i++)
+            {
+                const auto * array = checkAndGetDataType<DataTypeArray>(arguments[i].get());
+                if (array == nullptr)
+                    throw Exception(getMessagePrefix(i) + " must contain an array of tuples",
+                                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+                validate_tuple(i, checkAndGetDataType<DataTypeTuple>(array->getNestedType().get()));
+            }
        }

        return std::make_shared<DataTypeUInt8>();
    }

-    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
    {
        const IColumn * point_col = block.getByPosition(arguments[0]).column.get();
        const auto * const_tuple_col = checkAndGetColumn<ColumnConst>(point_col);
        if (const_tuple_col)
            point_col = &const_tuple_col->getDataColumn();
-        const auto * tuple_col = checkAndGetColumn<ColumnTuple>(point_col);

+        const auto * tuple_col = checkAndGetColumn<ColumnTuple>(point_col);
        if (!tuple_col)
            throw Exception("First argument for function " + getName() + " must be constant array of tuples.",
                            ErrorCodes::ILLEGAL_COLUMN);

-        auto & result_column = block.safeGetByPosition(result).column;
-
        const auto & tuple_columns = tuple_col->getColumns();
-        result_column = executeForType(*tuple_columns[0], *tuple_columns[1], block, arguments);

-        if (const_tuple_col)
+        const IColumn * poly_col = block.getByPosition(arguments[1]).column.get();
+        const auto * const_poly_col = checkAndGetColumn<ColumnConst>(poly_col);
+
+        bool point_is_const = const_tuple_col != nullptr;
+        bool poly_is_const = const_poly_col != nullptr;
+
+        auto call_impl = poly_is_const
+            ? callPointInPolygonImplWithPool<Polygon, PointInConstPolygonImpl>
+            : callPointInPolygonImpl<Polygon, PointInNonConstPolygonImpl>;
+
+        size_t size = point_is_const && poly_is_const ? 1 : input_rows_count;
+        auto execution_result = ColumnVector<UInt8>::create(size);
+        auto & data = execution_result->getData();
+
+        Polygon polygon;
+        for (auto i : ext::range(0, size))
+        {
+            if (!poly_is_const || i == 0)
+            {
+                polygon = parsePolygon(block, arguments, i);
+            }
+
+            size_t point_index = point_is_const ? 0 : i;
+            data[i] = call_impl(tuple_columns[0]->getFloat64(point_index), tuple_columns[1]->getFloat64(point_index), polygon);
+        }
+
+        auto & result_column = block.safeGetByPosition(result).column;
+        result_column = std::move(execution_result);
+        if (point_is_const && poly_is_const)
            result_column = ColumnConst::create(result_column, const_tuple_col->size());
    }

+
 private:
    bool validate;

-    ColumnPtr executeForType(const IColumn & x, const IColumn & y, Block & block, const ColumnNumbers & arguments)
+    std::string getMessagePrefix(size_t i) const
+    {
+        return "Argument " + toString(i + 1) + " for function " + getName();
+    }
+
+    Polygon parsePolygonFromSingleColumn(Block & block, const ColumnNumbers & arguments, size_t i) const
+    {
+        const auto & poly = block.getByPosition(arguments[1]).column.get();
+        const auto * column_const = checkAndGetColumn<ColumnConst>(poly);
+        const auto * array_col =
+            column_const ? checkAndGetColumn<ColumnArray>(column_const->getDataColumn()) : checkAndGetColumn<ColumnArray>(poly);
+
+        if (!array_col)
+            throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples",
+                            ErrorCodes::ILLEGAL_COLUMN);
+
+        const auto * nested_array_col = checkAndGetColumn<ColumnArray>(array_col->getData());
+        const auto & tuple_data = nested_array_col ? nested_array_col->getData() : array_col->getData();
+        const auto & tuple_col = checkAndGetColumn<ColumnTuple>(tuple_data);
+        if (!tuple_col)
+            throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples",
+                            ErrorCodes::ILLEGAL_COLUMN);
+
+        const auto & tuple_columns = tuple_col->getColumns();
+        const auto & x_column = tuple_columns[0];
+        const auto & y_column = tuple_columns[1];
+
+        auto parse_polygon_part = [&x_column, &y_column](auto & container, size_t l, size_t r)
+        {
+            for (auto j : ext::range(l, r))
+            {
+                CoordinateType x_coord = x_column->getFloat64(j);
+                CoordinateType y_coord = y_column->getFloat64(j);
+
+                container.push_back(Point(x_coord, y_coord));
+            }
+        };
+
+        Polygon polygon;
+        if (nested_array_col)
+        {
+            for (auto j : ext::range(array_col->getOffsets()[i - 1], array_col->getOffsets()[i]))
+            {
+                size_t l = nested_array_col->getOffsets()[j - 1];
+                size_t r = nested_array_col->getOffsets()[j];
+                if (polygon.outer().empty())
+                {
+                    parse_polygon_part(polygon.outer(), l, r);
+                }
+                else
+                {
+                    polygon.inners().emplace_back();
+                    parse_polygon_part(polygon.inners().back(), l, r);
+                }
+            }
+        }
+        else
+        {
+            size_t l = array_col->getOffsets()[i - 1];
+            size_t r = array_col->getOffsets()[i];
+
+            parse_polygon_part(polygon.outer(), l, r);
+        }
+
+        return polygon;
+    }
+
+    Polygon parsePolygonFromMultipleColumns(Block & block, const ColumnNumbers & arguments, size_t) const
    {
        Polygon polygon;

-        auto get_message_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); };
-
        for (size_t i = 1; i < arguments.size(); ++i)
        {
            const auto * const_col = checkAndGetColumn<ColumnConst>(block.getByPosition(arguments[i]).column.get());
-            const auto * array_col = const_col ? checkAndGetColumn<ColumnArray>(&const_col->getDataColumn()) : nullptr;
+            if (!const_col)
+                throw Exception("Multi-argument version of function " + getName() + " works only with const polygon",
+                            ErrorCodes::BAD_ARGUMENTS);
+
+            const auto * array_col = checkAndGetColumn<ColumnArray>(&const_col->getDataColumn());
            const auto * tuple_col = array_col ? checkAndGetColumn<ColumnTuple>(&array_col->getData()) : nullptr;

            if (!tuple_col)
-                throw Exception(get_message_prefix(i) + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN);
+                throw Exception(getMessagePrefix(i) + " must be constant array of tuples", ErrorCodes::ILLEGAL_COLUMN);

            const auto & tuple_columns = tuple_col->getColumns();
            const auto & column_x = tuple_columns[0];
@ -197,7 +317,7 @@ private:
            auto size = column_x->size();

            if (size == 0)
-                throw Exception(get_message_prefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN);
+                throw Exception(getMessagePrefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN);

            for (auto j : ext::range(0, size))
            {
@ -207,6 +327,21 @@ private:
            }
        }

+        return polygon;
+    }
+
+    Polygon parsePolygon(Block & block, const ColumnNumbers & arguments, size_t i) const
+    {
+        Polygon polygon;
+        if (arguments.size() == 2)
+        {
+            polygon = parsePolygonFromSingleColumn(block, arguments, i);
+        }
+        else
+        {
+            polygon = parsePolygonFromMultipleColumns(block, arguments, i);
+        }
+
        boost::geometry::correct(polygon);

 #if !defined(__clang_analyzer__) /// It does not like boost.
@ -218,19 +353,14 @@ private:
                throw Exception("Polygon is not valid: " + failure_message, ErrorCodes::BAD_ARGUMENTS);
        }
 #endif
-
-        auto call_impl = use_object_pool
-            ? callPointInPolygonImplWithPool<Polygon, PointInPolygonImpl>
-            : callPointInPolygonImpl<Polygon, PointInPolygonImpl>;
-
-        return call_impl(x, y, polygon);
+        return polygon;
    }
 };


 void registerFunctionPointInPolygon(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionPointInPolygon<PointInPolygonWithGrid<Float64>, true>>();
+    factory.registerFunction<FunctionPointInPolygon<PointInPolygonWithGrid<Float64>, PointInPolygonTrivial<Float64>>>();
 }

 }
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@ -14,6 +14,7 @@
 #include <Poco/URI.h>
 #include <Poco/Version.h>
 #include <Common/DNSResolver.h>
+#include <Common/RemoteHostFilter.h>
 #include <common/logger_useful.h>
 #include <Poco/URIStreamFactory.h>

--- a/src/IO/parseDateTimeBestEffort.cpp
+++ b/src/IO/parseDateTimeBestEffort.cpp
@ -562,8 +562,17 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf
 {
    time_t whole;
    DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string
-    if (!parseDateTimeBestEffortImpl<bool>(whole, in, local_time_zone, utc_time_zone, &subsecond))
-        return ReturnType(false);
+
+    if constexpr (std::is_same_v<ReturnType, bool>)
+    {
+        if (!parseDateTimeBestEffortImpl<bool>(whole, in, local_time_zone, utc_time_zone, &subsecond))
+            return false;
+    }
+    else
+    {
+        parseDateTimeBestEffortImpl<ReturnType>(whole, in, local_time_zone, utc_time_zone, &subsecond);
+    }
+

    DateTime64::NativeType fractional = subsecond.value;
    if (scale < subsecond.digits)
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@ -32,6 +32,7 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>

+#include <Interpreters/Context.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/misc.h>
 #include <Interpreters/ActionsVisitor.h>
--- a/src/Interpreters/AsynchronousMetrics.cpp
+++ b/src/Interpreters/AsynchronousMetrics.cpp
@ -1,6 +1,7 @@
 #include <Interpreters/AsynchronousMetrics.h>
 #include <Interpreters/ExpressionJIT.h>
 #include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/Context.h>
 #include <Common/Exception.h>
 #include <Common/setThreadName.h>
 #include <Common/CurrentMetrics.h>
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@ -2,11 +2,14 @@

 #include <Core/QueryProcessingStage.h>
 #include <Interpreters/ClusterProxy/IStreamFactory.h>
+#include <Interpreters/StorageID.h>
 #include <Storages/IStorage_fwd.h>

 namespace DB
 {

+using Scalars = std::map<String, Block>;
+
 namespace ClusterProxy
 {

--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@ -17,6 +17,7 @@
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
+#include <Interpreters/Context.h>
 #include <Access/AccessRightsElement.h>
 #include <Common/DNSResolver.h>
 #include <Common/Macros.h>
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@ -1,5 +1,5 @@
 #pragma once
-#include <Interpreters/Context.h>
+
 #include <Interpreters/Cluster.h>
 #include <DataStreams/BlockIO.h>
 #include <Common/CurrentThread.h>
@ -13,9 +13,15 @@
 #include <mutex>
 #include <thread>

+namespace zkutil
+{
+    class ZooKeeper;
+}
+
 namespace DB
 {

+class Context;
 class ASTAlterQuery;
 class AccessRightsElements;
 struct DDLLogEntry;
--- a/src/Interpreters/ExpressionActions.cpp
+++ b/src/Interpreters/ExpressionActions.cpp
@ -4,6 +4,7 @@
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionJIT.h>
 #include <Interpreters/TableJoin.h>
+#include <Interpreters/Context.h>
 #include <Columns/ColumnsNumber.h>
 #include <Common/typeid_cast.h>
 #include <DataTypes/DataTypeArray.h>
@ -509,6 +510,33 @@ std::string ExpressionAction::toString() const
    return ss.str();
 }

+ExpressionActions::ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_)
+    : input_columns(input_columns_), settings(context_.getSettingsRef())
+{
+    for (const auto & input_elem : input_columns)
+        sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name));
+
+#if USE_EMBEDDED_COMPILER
+compilation_cache = context_.getCompiledExpressionCache();
+#endif
+}
+
+/// For constant columns the columns themselves can be contained in `input_columns_`.
+ExpressionActions::ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_)
+    : settings(context_.getSettingsRef())
+{
+    for (const auto & input_elem : input_columns_)
+    {
+        input_columns.emplace_back(input_elem.name, input_elem.type);
+        sample_block.insert(input_elem);
+    }
+#if USE_EMBEDDED_COMPILER
+    compilation_cache = context_.getCompiledExpressionCache();
+#endif
+}
+
+ExpressionActions::~ExpressionActions() = default;
+
 void ExpressionActions::checkLimits(Block & block) const
 {
    if (settings.max_temporary_columns && block.columns() > settings.max_temporary_columns)
--- a/src/Interpreters/ExpressionActions.h
+++ b/src/Interpreters/ExpressionActions.h
@ -4,7 +4,6 @@
 #include <Core/ColumnWithTypeAndName.h>
 #include <Core/Names.h>
 #include <Core/Settings.h>
-#include <Interpreters/Context.h>
 #include <Common/SipHash.h>
 #include <Common/UInt128.h>
 #include <unordered_map>
@ -25,6 +24,7 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

+class Context;
 class TableJoin;
 class IJoin;
 using JoinPtr = std::shared_ptr<IJoin>;
@ -42,6 +42,7 @@ class IDataType;
 using DataTypePtr = std::shared_ptr<const IDataType>;

 class ExpressionActions;
+class CompiledExpressionCache;

 /** Action on the block.
  */
@ -155,30 +156,12 @@ class ExpressionActions
 public:
    using Actions = std::vector<ExpressionAction>;

-    ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_)
-        : input_columns(input_columns_), settings(context_.getSettingsRef())
-    {
-        for (const auto & input_elem : input_columns)
-            sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name));
-
-#if USE_EMBEDDED_COMPILER
-    compilation_cache = context_.getCompiledExpressionCache();
-#endif
-    }
+    ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_);

    /// For constant columns the columns themselves can be contained in `input_columns_`.
-    ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_)
-        : settings(context_.getSettingsRef())
-    {
-        for (const auto & input_elem : input_columns_)
-        {
-            input_columns.emplace_back(input_elem.name, input_elem.type);
-            sample_block.insert(input_elem);
-        }
-#if USE_EMBEDDED_COMPILER
-        compilation_cache = context_.getCompiledExpressionCache();
-#endif
-    }
+    ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_);
+
+    ~ExpressionActions();

    /// Add the input column.
    /// The name of the column must not match the names of the intermediate columns that occur when evaluating the expression.
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -32,6 +32,7 @@
 #include <Interpreters/HashJoin.h>
 #include <Interpreters/MergeJoin.h>
 #include <Interpreters/DictionaryReader.h>
+#include <Interpreters/Context.h>

 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/parseAggregateFunctionParameters.h>
--- a/src/Interpreters/GlobalSubqueriesVisitor.h
+++ b/src/Interpreters/GlobalSubqueriesVisitor.h
@ -7,7 +7,6 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTIdentifier.h>
-#include <Interpreters/Context.h>
 #include <Interpreters/interpretSubquery.h>
 #include <Common/typeid_cast.h>
 #include <Core/Block.h>
@ -17,6 +16,7 @@
 #include <IO/WriteHelpers.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/IdentifierSemantic.h>
+#include <Interpreters/Context.h>

 namespace DB
 {
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@ -7,6 +7,7 @@
 #include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/getTableExpressions.h>
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
+#include <Interpreters/Context.h>
 #include <Parsers/DumpASTNode.h>
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/queryToString.h>
--- a/src/Interpreters/InterpreterExplainQuery.h
+++ b/src/Interpreters/InterpreterExplainQuery.h
@ -1,6 +1,5 @@
 #pragma once

-#include <Interpreters/Context.h>
 #include <Interpreters/IInterpreter.h>
 #include <Parsers/IAST_fwd.h>

@ -8,6 +7,8 @@
 namespace DB
 {

+class Context;
+
 /// Returns single row with explain results
 class InterpreterExplainQuery : public IInterpreter
 {
--- a/src/Interpreters/InterpreterFactory.cpp
+++ b/src/Interpreters/InterpreterFactory.cpp
@ -61,6 +61,7 @@
 #include <Interpreters/InterpreterUseQuery.h>
 #include <Interpreters/InterpreterWatchQuery.h>
 #include <Interpreters/InterpreterGrantQuery.h>
+#include <Interpreters/Context.h>

 #include <Parsers/ASTSystemQuery.h>

--- a/src/Interpreters/InterpreterInsertQuery.h
+++ b/src/Interpreters/InterpreterInsertQuery.h
@ -2,13 +2,14 @@

 #include <DataStreams/IBlockOutputStream.h>
 #include <DataStreams/BlockIO.h>
-#include <Interpreters/Context.h>
 #include <Interpreters/IInterpreter.h>
 #include <Parsers/ASTInsertQuery.h>

 namespace DB
 {

+class Context;
+

 /** Interprets the INSERT query.
  */
--- a/src/Interpreters/InterpreterKillQueryQuery.cpp
+++ b/src/Interpreters/InterpreterKillQueryQuery.cpp
@ -302,10 +302,11 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S
    if (where_expression)
        select_query += " WHERE " + queryToString(where_expression);

-    BlockIO block_io = executeQuery(select_query, context.getGlobalContext(), true, QueryProcessingStage::Complete, false, false);
-    Block res = block_io.in->read();
+    BlockIO block_io = executeQuery(select_query, context.getGlobalContext(), true);
+    auto stream = block_io.getInputStream();
+    Block res = stream->read();

-    if (res && block_io.in->read())
+    if (res && stream->read())
        throw Exception("Expected one block from input stream", ErrorCodes::LOGICAL_ERROR);

    return res;
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@ -5,7 +5,6 @@
 #include <Core/QueryProcessingStage.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <DataStreams/IBlockStream_fwd.h>
-#include <Interpreters/Context.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/IInterpreter.h>
@ -25,6 +24,7 @@ namespace DB

 struct SubqueryForSet;
 class InterpreterSelectWithUnionQuery;
+class Context;

 struct SyntaxAnalyzerResult;
 using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@ -1,5 +1,6 @@
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
 #include <Interpreters/InterpreterSelectQuery.h>
+#include <Interpreters/Context.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <DataStreams/UnionBlockInputStream.h>
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h
@ -1,15 +1,16 @@
 #pragma once

 #include <Core/QueryProcessingStage.h>
-#include <Interpreters/Context.h>
 #include <Interpreters/IInterpreter.h>
 #include <Interpreters/SelectQueryOptions.h>
+#include <Parsers/IAST_fwd.h>

 #include <Processors/QueryPipeline.h>

 namespace DB
 {

+class Context;
 class InterpreterSelectQuery;


--- a/Show More
+++ b/Show More