Merge branch 'master' into keeper-more-reduce

2024-09-20 00:30:49 +00:00 · 2024-02-07 14:21:31 +01:00 · 2024-02-07 14:21:31 +01:00 · 15c89bdd66
commit 15c89bdd66
parent 1279964866 7edcb86c08
164 changed files with 2609 additions and 741 deletions
--- a/.github/ISSUE_TEMPLATE/85_bug-report.md
+++ b/.github/ISSUE_TEMPLATE/85_bug-report.md
@ -17,7 +17,7 @@ assignees: ''

 > A link to reproducer in [https://fiddle.clickhouse.com/](https://fiddle.clickhouse.com/).

-**Does it reproduce on recent release?**
+**Does it reproduce on the most recent release?**

 [The list of releases](https://github.com/ClickHouse/ClickHouse/blob/master/utils/list-versions/version_date.tsv)

@ -34,11 +34,11 @@ assignees: ''
 **How to reproduce**

 * Which ClickHouse server version to use
-* Which interface to use, if matters
+* Which interface to use, if it matters
 * Non-default settings, if any
 * `CREATE TABLE` statements for all tables involved
 * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
-* Queries to run that lead to unexpected result
+* Queries to run that lead to an unexpected result

 **Expected behavior**

--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -55,7 +55,6 @@ jobs:
    uses: ./.github/workflows/reusable_docker.yml
    with:
      data: ${{ needs.RunConfig.outputs.data }}
-      set_latest: true
  StyleCheck:
    needs: [RunConfig, BuildDockers]
    if: ${{ !failure() && !cancelled() }}
@ -362,14 +361,6 @@ jobs:
      test_name: Stateless tests (release)
      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestReleaseDatabaseOrdinary:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (release, DatabaseOrdinary)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
  FunctionalStatelessTestReleaseDatabaseReplicated:
    needs: [RunConfig, BuilderDebRelease]
    if: ${{ !failure() && !cancelled() }}
@ -733,7 +724,6 @@ jobs:
      - MarkReleaseReady
      - FunctionalStatelessTestDebug
      - FunctionalStatelessTestRelease
-      - FunctionalStatelessTestReleaseDatabaseOrdinary
      - FunctionalStatelessTestReleaseDatabaseReplicated
      - FunctionalStatelessTestReleaseAnalyzer
      - FunctionalStatelessTestReleaseS3
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -28,7 +28,7 @@ jobs:
        id: runconfig
        run: |
            echo "::group::configure CI run"
-            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --skip-jobs --rebuild-all-docker --outfile ${{ runner.temp }}/ci_run_data.json
+            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --skip-jobs --outfile ${{ runner.temp }}/ci_run_data.json
            echo "::endgroup::"

            echo "::group::CI run configure results"
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -785,6 +785,15 @@ jobs:
      test_name: Integration tests (release)
      runner_type: stress-tester
      data: ${{ needs.RunConfig.outputs.data }}
+  IntegrationTestsAarch64:
+    needs: [RunConfig, BuilderDebAarch64]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Integration tests (aarch64)
+      # FIXME: there is no stress-tester for aarch64. func-tester-aarch64 is ok?
+      runner_type: func-tester-aarch64
+      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsFlakyCheck:
    needs: [RunConfig, BuilderDebAsan]
    if: ${{ !failure() && !cancelled() }}
@ -924,6 +933,7 @@ jobs:
      - IntegrationTestsAnalyzerAsan
      - IntegrationTestsTsan
      - IntegrationTestsRelease
+      - IntegrationTestsAarch64
      - IntegrationTestsFlakyCheck
      - PerformanceComparisonX86
      - PerformanceComparisonAarch
--- a/.github/workflows/reusable_docker.yml
+++ b/.github/workflows/reusable_docker.yml
@ -46,7 +46,7 @@ jobs:
    needs: [DockerBuildAmd64, DockerBuildAarch64]
    runs-on: [self-hosted, style-checker]
    if: |
-      !failure() && !cancelled() && toJson(fromJson(inputs.data).docker_data.missing_multi) != '[]'
+      !failure() && !cancelled() && (toJson(fromJson(inputs.data).docker_data.missing_multi) != '[]' || inputs.set_latest)
    steps:
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
@ -55,14 +55,12 @@ jobs:
      - name: Build images
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
+          FLAG_LATEST=''
          if [ "${{ inputs.set_latest }}" == "true" ]; then
+            FLAG_LATEST='--set-latest'
            echo "latest tag will be set for resulting manifests"
+          fi
          python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
            --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
            --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' \
-              --set-latest
-          else
-            python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
-              --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
-              --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}'
-          fi
+            $FLAG_LATEST
--- a/.gitmessage
+++ b/.gitmessage
@ -11,6 +11,7 @@
 ## To run specified set of tests in CI:
 #ci_set_<SET_NAME>
 #ci_set_reduced
+#ci_set_arm

 ## To run specified job in CI:
 #job_<JOB NAME>
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,8 +6,6 @@

 ### <a id="241"></a> ClickHouse release 24.1, 2024-01-30

-### ClickHouse release master (b4a5b6060ea) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
-
 #### Backward Incompatible Change
 * The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).
@ -24,7 +22,6 @@
 * Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
 * Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)).
 * Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
 * Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
 * Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).
--- a/README.md
+++ b/README.md
@ -37,7 +37,7 @@ Keep an eye out for upcoming meetups around the world. Somewhere else you want u

 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
-* **Recording available**: [**v23.10 Release Webinar**](https://www.youtube.com/watch?v=PGQS6uPb970) All the features of 23.10, one convenient video! Watch it now!
+* **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now!
 * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)

  
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@ -17,6 +17,7 @@ set (SRCS
    getMemoryAmount.cpp
    getPageSize.cpp
    getThreadId.cpp
+    int8_to_string.cpp
    JSON.cpp
    mremap.cpp
    phdr_cache.cpp
--- a/base/base/bit_cast.h
+++ b/base/base/bit_cast.h
@ -1,5 +1,6 @@
 #pragma once

+#include <bit>
 #include <cstring>
 #include <algorithm>
 #include <type_traits>
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@ -1,8 +1,11 @@
-#include <stdexcept>
-#include <fstream>
 #include <base/getMemoryAmount.h>
+
 #include <base/getPageSize.h>

+#include <fstream>
+#include <sstream>
+#include <stdexcept>
+
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/param.h>
@ -11,6 +14,80 @@
 #endif


+namespace
+{
+
+std::optional<uint64_t> getCgroupsV2MemoryLimit()
+{
+#if defined(OS_LINUX)
+    const std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
+
+    /// This file exists iff the host has cgroups v2 enabled.
+    std::ifstream controllers_file(default_cgroups_mount / "cgroup.controllers");
+    if (!controllers_file.is_open())
+        return {};
+
+    /// Make sure that the memory controller is enabled.
+    /// - cgroup.controllers defines which controllers *can* be enabled.
+    /// - cgroup.subtree_control defines which controllers *are* enabled.
+    /// (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
+    /// Caveat: nested groups may disable controllers. For simplicity, check only the top-level group.
+    /// ReadBufferFromFile subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
+    /// std::string subtree_control;
+    /// readString(subtree_control, subtree_control_file);
+    /// if (subtree_control.find("memory") == std::string::npos)
+    ///     return {};
+    std::ifstream subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
+    std::stringstream subtree_control_buf;
+    subtree_control_buf << subtree_control_file.rdbuf();
+    std::string subtree_control = subtree_control_buf.str();
+    if (subtree_control.find("memory") == std::string::npos)
+        return {};
+
+    /// Identify the cgroup the process belongs to
+    /// All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
+    /// A simpler way to get the membership is:
+    std::ifstream cgroup_name_file("/proc/self/cgroup");
+    if (!cgroup_name_file.is_open())
+        return {};
+
+    std::stringstream cgroup_name_buf;
+    cgroup_name_buf << cgroup_name_file.rdbuf();
+    std::string cgroup_name = cgroup_name_buf.str();
+    if (!cgroup_name.empty() && cgroup_name.back() == '\n')
+        cgroup_name.pop_back(); /// remove trailing newline, if any
+    /// With cgroups v2, there will be a *single* line with prefix "0::/"
+    const std::string v2_prefix = "0::/";
+    if (!cgroup_name.starts_with(v2_prefix))
+        return {};
+    cgroup_name = cgroup_name.substr(v2_prefix.length());
+
+    std::filesystem::path current_cgroup = cgroup_name.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup_name);
+
+    /// Open the bottom-most nested memory limit setting file. If there is no such file at the current
+    /// level, try again at the parent level as memory settings are inherited.
+    while (current_cgroup != default_cgroups_mount.parent_path())
+    {
+        std::ifstream setting_file(current_cgroup / "memory.max");
+        if (setting_file.is_open())
+        {
+            uint64_t value;
+            if (setting_file >> value)
+                return {value};
+            else
+                return {}; /// e.g. the cgroups default "max"
+        }
+        current_cgroup = current_cgroup.parent_path();
+    }
+
+    return {};
+#else
+    return {};
+#endif
+}
+
+}
+
 /** Returns the size of physical memory (RAM) in bytes.
  * Returns 0 on unsupported platform
  */
@ -26,34 +103,27 @@ uint64_t getMemoryAmountOrZero()

    uint64_t memory_amount = num_pages * page_size;

-#if defined(OS_LINUX)
-    // Try to lookup at the Cgroup limit
-
-    // CGroups v2
-    std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
-    if (cgroupv2_limit.is_open())
-    {
-        uint64_t memory_limit = 0;
-        cgroupv2_limit >> memory_limit;
-        if (memory_limit > 0 && memory_limit < memory_amount)
-            memory_amount = memory_limit;
-    }
+    /// Respect the memory limit set by cgroups v2.
+    auto limit_v2 = getCgroupsV2MemoryLimit();
+    if (limit_v2.has_value() && *limit_v2 < memory_amount)
+         memory_amount = *limit_v2;
    else
    {
-        // CGroups v1
-        std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
-        if (cgroup_limit.is_open())
+        /// Cgroups v1 were replaced by v2 in 2015. The only reason we keep supporting v1 is that the transition to v2
+        /// has been slow. Caveat : Hierarchical groups as in v2 are not supported for v1, the location of the memory
+        /// limit (virtual) file is hard-coded.
+        /// TODO: check at the end of 2024 if we can get rid of v1.
+        std::ifstream limit_file_v1("/sys/fs/cgroup/memory/memory.limit_in_bytes");
+        if (limit_file_v1.is_open())
        {
-            uint64_t memory_limit = 0; // in case of read error
-            cgroup_limit >> memory_limit;
-            if (memory_limit > 0 && memory_limit < memory_amount)
-                memory_amount = memory_limit;
+            uint64_t limit_v1;
+            if (limit_file_v1 >> limit_v1)
+                if (limit_v1 < memory_amount)
+                    memory_amount = limit_v1;
        }
    }
-#endif

    return memory_amount;
-
 }


--- a/base/base/int8_to_string.cpp
+++ b/base/base/int8_to_string.cpp
@ -0,0 +1,9 @@
+#include <base/int8_to_string.h>
+
+namespace std
+{
+std::string to_string(Int8 v) /// NOLINT (cert-dcl58-cpp)
+{
+    return to_string(int8_t{v});
+}
+}
--- a/base/base/int8_to_string.h
+++ b/base/base/int8_to_string.h
@ -0,0 +1,17 @@
+#pragma once
+
+#include <base/defines.h>
+#include <base/types.h>
+
+#include <fmt/format.h>
+
+template <>
+struct fmt::formatter<Int8> : fmt::formatter<int8_t>
+{
+};
+
+
+namespace std
+{
+std::string to_string(Int8 v); /// NOLINT (cert-dcl58-cpp)
+}
--- a/base/base/types.h
+++ b/base/base/types.h
@ -3,14 +3,29 @@
 #include <cstdint>
 #include <string>

-/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
+/// Using char8_t more strict aliasing (https://stackoverflow.com/a/57453713)
 using UInt8 = char8_t;

+/// Same for using signed _BitInt(8) (there isn't a signed char8_t, which would be more convenient)
+/// See https://godbolt.org/z/fafnWEnnf
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wbit-int-extension"
+using Int8 = signed _BitInt(8);
+#pragma clang diagnostic pop
+
+namespace std
+{
+template <>
+struct hash<Int8> /// NOLINT (cert-dcl58-cpp)
+{
+    size_t operator()(const Int8 x) const { return std::hash<int8_t>()(int8_t{x}); }
+};
+}
+
 using UInt16 = uint16_t;
 using UInt32 = uint32_t;
 using UInt64 = uint64_t;

-using Int8 = int8_t;
 using Int16 = int16_t;
 using Int32 = int32_t;
 using Int64 = int64_t;
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@ -6,6 +6,7 @@

 #include "throwError.h"

+#include <bit>
 #include <cmath>
 #include <cfloat>
 #include <cassert>
--- a/contrib/aws
+++ b/contrib/aws
@ -1 +1 @@
-Subproject commit 4ec215f3607c2111bf2cc91ba842046a6b5eb0c4
+Subproject commit 9eb5097a0abfa837722cca7a5114a25837817bf2
--- a/docker/images.json
+++ b/docker/images.json
@ -62,7 +62,6 @@
        "dependent": []
    },
    "docker/test/integration/runner": {
-        "only_amd64": true,
        "name": "clickhouse/integration-tests-runner",
        "dependent": []
    },
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -72,7 +72,7 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
        zstd \
        zip \
    && apt-get clean \
-    && rm -rf /var/lib/apt/lists
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 # Download toolchain and SDK for Darwin
 RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -23,10 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
        tzdata \
        wget \
    && apt-get clean \
-    && rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -13,7 +13,10 @@ RUN apt-get update \
        zstd \
        locales \
        sudo \
-        --yes --no-install-recommends
+        --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+

 # Sanitizer options for services (clickhouse-server)
 # Set resident memory limit for TSAN to 45GiB (46080MiB) to avoid OOMs in Stress tests
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@ -20,7 +20,9 @@ RUN apt-get update \
        pv \
        jq \
        zstd \
-    --yes --no-install-recommends
+        --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3

@ -31,12 +33,14 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
  && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
  && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
  && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
-  && rm -rf /tmp/clickhouse-odbc-tmp \
+  && rm -rf /tmp/clickhouse-odbc-tmp
+
+# Give suid to gdb to grant it attach permissions
+# chmod 777 to make the container user independent
+RUN chmod u+s /usr/bin/gdb \
  && mkdir -p /var/lib/clickhouse \
  && chmod 777 /var/lib/clickhouse

-# chmod 777 to make the container user independent
-
 ENV TZ=Europe/Amsterdam
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@ -29,7 +29,7 @@ RUN apt-get update \
            wget \
    && apt-get autoremove --yes \
    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 RUN pip3 install Jinja2

--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -389,8 +389,8 @@ fi
 rg --text -F '<Fatal>' server.log > fatal.log ||:
 dmesg -T > dmesg.log ||:

-zstd --threads=0 server.log
-zstd --threads=0 fuzzer.log
+zstd --threads=0 --rm server.log
+zstd --threads=0 --rm fuzzer.log

 cat > report.html <<EOF ||:
 <!DOCTYPE html>
--- a/docker/test/install/deb/Dockerfile
+++ b/docker/test/install/deb/Dockerfile
@ -10,13 +10,13 @@ ENV \
  init=/lib/systemd/systemd

 # install systemd packages
-RUN apt-get update && \
-  apt-get install -y --no-install-recommends \
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends \
    sudo \
    systemd \
-    && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists
+    \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 # configure systemd
 # remove systemd 'wants' triggers
--- a/docker/test/integration/hive_server/Dockerfile
+++ b/docker/test/integration/hive_server/Dockerfile
@ -1,31 +1,27 @@
 FROM ubuntu:20.04
 MAINTAINER lgbo-ustc <lgbo.ustc@gmail.com>

-RUN apt-get update 
-RUN apt-get install -y wget openjdk-8-jre
-
-RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \
-        tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz
-RUN wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \
-        tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz
-RUN apt install -y vim
-
-RUN apt install -y openssh-server openssh-client
-
-RUN apt install -y mysql-server
-
-RUN mkdir -p /root/.ssh && \
-        ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa && \
-        cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys && \
-        cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key && \
-        cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub
-
-RUN wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz &&\
-        tar -xf mysql-connector-java-8.0.27.tar.gz && \
-        mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ && \
-        rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27
-
-RUN apt install -y iputils-ping net-tools
+RUN apt-get update \
+  && apt-get install -y wget openjdk-8-jre \
+  && wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz \
+  && tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz \
+  && wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz \
+  && tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz \
+  && apt install -y vim \
+  && apt install -y openssh-server openssh-client \
+  && apt install -y mysql-server \
+  && mkdir -p /root/.ssh \
+  && ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa \
+  && cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys \
+  && cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key \
+  && cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub \
+  && wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz \
+  && tar -xf mysql-connector-java-8.0.27.tar.gz \
+  && mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ \
+  && rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27 \
+  && apt install -y iputils-ping net-tools \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 ENV JAVA_HOME=/usr
 ENV HADOOP_HOME=/hadoop-3.1.0
@ -44,4 +40,3 @@ COPY demo_data.txt /
 ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
 RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format
 COPY start.sh /
-
--- a/docker/test/integration/postgresql_java_client/Dockerfile
+++ b/docker/test/integration/postgresql_java_client/Dockerfile
@ -3,14 +3,10 @@

 FROM ubuntu:18.04

-RUN apt-get update && \
-    apt-get install -y software-properties-common build-essential openjdk-8-jdk curl
-
-RUN rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/* \
-RUN apt-get clean
+RUN apt-get update \
+    && apt-get install -y software-properties-common build-essential openjdk-8-jdk curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 ARG ver=42.2.12
 RUN curl -L -o /postgresql-java-${ver}.jar https://repo1.maven.org/maven2/org/postgresql/postgresql/${ver}/postgresql-${ver}.jar
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -37,11 +37,8 @@ RUN apt-get update \
    libkrb5-dev \
    krb5-user \
    g++ \
-    && rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/* \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 ENV TZ=Etc/UTC
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
@ -62,6 +59,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
    && dockerd --version; docker --version


+# kazoo 2.10.0 is broken
+# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
 RUN python3 -m pip install --no-cache-dir \
    PyMySQL \
    aerospike==11.1.0 \
@ -70,7 +69,7 @@ RUN python3 -m pip install --no-cache-dir \
    azure-storage-blob \
    boto3 \
    cassandra-driver \
-    confluent-kafka==1.9.2 \
+    confluent-kafka==2.3.0 \
    delta-spark==2.3.0 \
    dict2xml \
    dicttoxml \
@ -79,7 +78,7 @@ RUN python3 -m pip install --no-cache-dir \
    grpcio \
    grpcio-tools \
    kafka-python \
-    kazoo \
+    kazoo==2.9.0 \
    lz4 \
    minio \
    nats-py \
--- a/docker/test/keeper-jepsen/Dockerfile
+++ b/docker/test/keeper-jepsen/Dockerfile
@ -24,7 +24,10 @@ RUN mkdir "/root/.ssh"
 RUN touch "/root/.ssh/known_hosts"

 # install java
-RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends
+RUN apt-get update && \
+    apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 # install clojure
 RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \
--- a/docker/test/libfuzzer/Dockerfile
+++ b/docker/test/libfuzzer/Dockerfile
@ -27,7 +27,7 @@ RUN apt-get update \
            wget \
    && apt-get autoremove --yes \
    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 RUN pip3 install Jinja2

--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@ -37,7 +37,7 @@ RUN apt-get update \
    && apt-get purge --yes python3-dev g++ \
    && apt-get autoremove --yes \
    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 COPY run.sh /

--- a/docker/test/server-jepsen/Dockerfile
+++ b/docker/test/server-jepsen/Dockerfile
@ -31,7 +31,9 @@ RUN mkdir "/root/.ssh"
 RUN touch "/root/.ssh/known_hosts"

 # install java
-RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends
+RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 # install clojure
 RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@ -5,9 +5,10 @@ FROM ubuntu:22.04
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

-RUN apt-get update --yes && \
-	env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends && \
-	apt-get clean
+RUN apt-get update --yes \
+    && env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends  \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 # We need to get the repository's HEAD each time despite, so we invalidate layers' cache
 ARG CACHE_INVALIDATOR=0
--- a/docker/test/sqllogic/Dockerfile
+++ b/docker/test/sqllogic/Dockerfile
@ -15,7 +15,8 @@ RUN apt-get update --yes \
            unixodbc-dev \
            odbcinst \
            sudo \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 RUN pip3 install \
    numpy \
--- a/docker/test/sqltest/Dockerfile
+++ b/docker/test/sqltest/Dockerfile
@ -11,7 +11,8 @@ RUN apt-get update --yes \
            python3-dev \
            python3-pip \
            sudo \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 RUN pip3 install \
    pyyaml \
--- a/docker/test/stateful/Dockerfile
+++ b/docker/test/stateful/Dockerfile
@ -9,7 +9,8 @@ RUN apt-get update -y \
        python3-requests \
        nodejs \
        npm \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 COPY create.sql /
 COPY run.sh /
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -44,7 +44,8 @@ RUN apt-get update -y \
            pv \
            zip \
            p7zip-full \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 RUN pip3 install numpy scipy pandas Jinja2 pyarrow

--- a/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile
+++ b/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile
@ -11,4 +11,6 @@ VOLUME /packages
 CMD apt-get update ;\
    DEBIAN_FRONTEND=noninteractive \
    apt install -y /packages/clickhouse-common-static_*.deb \
-		/packages/clickhouse-client_*.deb
+        /packages/clickhouse-client_*.deb \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@ -19,7 +19,8 @@ RUN apt-get update -y \
            openssl \
            netcat-openbsd \
            brotli \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 COPY run.sh /

--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -21,6 +21,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    locales \
    && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \
    && rm -rf /root/.cache/pip

 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
--- a/docker/test/upgrade/Dockerfile
+++ b/docker/test/upgrade/Dockerfile
@ -19,7 +19,8 @@ RUN apt-get update -y \
            openssl \
            netcat-openbsd \
            brotli \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

 COPY run.sh /

--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@ -27,7 +27,9 @@ RUN apt-get update \
    && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
        /etc/apt/sources.list \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+

 # Install cmake 3.20+ for rust support
 # Used https://askubuntu.com/a/1157132 as reference
@ -60,7 +62,9 @@ RUN apt-get update \
        software-properties-common \
        tzdata \
        --yes --no-install-recommends \
-    && apt-get clean
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+

 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
--- a/docs/en/engines/table-engines/integrations/nats.md
+++ b/docs/en/engines/table-engines/integrations/nats.md
@ -38,6 +38,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    [nats_username = 'user',]
    [nats_password = 'password',]
    [nats_token = 'clickhouse',]
+    [nats_credential_file = '/var/nats_credentials',]
    [nats_startup_connect_tries = '5']
    [nats_max_rows_per_message = 1,]
    [nats_handle_error_mode = 'default']
@ -63,6 +64,7 @@ Optional parameters:
 - `nats_username` - NATS username.
 - `nats_password` - NATS password.
 - `nats_token` - NATS auth token.
+- `nats_credential_file` - Path to a NATS credentials file.
 - `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`.
 - `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`).
 - `nats_handle_error_mode` — How to handle errors for RabbitMQ engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`).
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -2097,7 +2097,7 @@ SELECT * FROM test_table

 ## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views}

-Allows to update `insert_deduplication_token` with table identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set.
+Allows to update `insert_deduplication_token` with view identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set.

 Possible values:

--- a/docs/en/sql-reference/data-types/variant.md
+++ b/docs/en/sql-reference/data-types/variant.md
@ -1,5 +1,5 @@
 ---
-slug: /en/sql-reference/data-types/json
+slug: /en/sql-reference/data-types/variant
 sidebar_position: 55
 sidebar_label: Variant
 ---
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@ -6,11 +6,67 @@ sidebar_label: Time Series

 # Time Series Functions

-Below functions are used for time series analysis.
+Below functions are used for series data analysis.
+
+## seriesOutliersDetectTukey
+
+Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
+
+**Syntax**
+
+``` sql
+seriesOutliersDetectTukey(series);
+seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
+```
+
+**Arguments**
+
+- `series` - An array of numeric values.
+- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
+- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
+- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5.
+
+At least four data points are required in `series` to detect outliers.
+
+**Returned value**
+
+- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
+
+Type: [Array](../../sql-reference/data-types/array.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
+```
+
+Result:
+
+``` text
+┌───────────print_0─────────────────┐
+│[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] │
+└───────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
+```
+
+Result:
+
+``` text
+┌─print_0──────────────────────────────┐
+│ [0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0] │
+└──────────────────────────────────────┘
+```

 ## seriesPeriodDetectFFT

-Finds the period of the given time series data using FFT
+Finds the period of the given series data data using FFT
 FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform)

 **Syntax**
@ -25,7 +81,7 @@ seriesPeriodDetectFFT(series);

 **Returned value**

- A real value equal to the period of time series
+- A real value equal to the period of series data
 - Returns NAN when number of data points are less than four.

 Type: [Float64](../../sql-reference/data-types/float.md).
@ -60,7 +116,7 @@ Result:

 ## seriesDecomposeSTL

-Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. 
+Decomposes a series data using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. 

 **Syntax**

--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -139,8 +139,8 @@ ALTER TABLE visits COMMENT COLUMN browser 'This column shows the browser used fo
 ## MODIFY COLUMN

 ``` sql
-MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
-ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
+MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [settings] [AFTER name_after | FIRST]
+ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [settings] [AFTER name_after | FIRST]
 ```

 This query changes the `name` column properties:
@ -153,10 +153,14 @@ This query changes the `name` column properties:

 - TTL

+- Column-level Settings
+
 For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs).

 For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).

+For examples of column-level settings modifying, see [Column-level Settings](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#column-level-settings).
+
 If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.

 When changing the type, values are converted as if the [toType](/docs/en/sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query does not do anything complex, and is completed almost instantly.
@ -209,7 +213,7 @@ The `ALTER` query for changing columns is replicated. The instructions are saved

 ## MODIFY COLUMN REMOVE

-Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTING`.
+Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTINGS`.

 Syntax:

@ -237,7 +241,7 @@ Modify a column setting.
 Syntax:

 ```sql
-ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
+ALTER TABLE table_name MODIFY COLUMN column_name MODIFY SETTING name=value,...;
 ```

 **Example**
@ -245,7 +249,7 @@ ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
 Modify column's `max_compress_block_size` to `1MB`:

 ```sql
-ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING max_compress_block_size = 1048576;
+ALTER TABLE table_name MODIFY COLUMN column_name MODIFY SETTING max_compress_block_size = 1048576;
 ```

 ## MODIFY COLUMN RESET SETTING
@ -255,15 +259,15 @@ Reset a column setting, also removes the setting declaration in the column expre
 Syntax:

 ```sql
-ALTER TABLE table_name MODIFY COLUMN RESET SETTING name,...;
+ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING name,...;
 ```

 **Example**

-Remove column setting `max_compress_block_size` to `1MB`:
+Reset column setting `max_compress_block_size` to it's default value:

 ```sql
-ALTER TABLE table_name MODIFY COLUMN REMOVE SETTING max_compress_block_size;
+ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING max_compress_block_size;
 ```

 ## MATERIALIZE COLUMN
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -504,7 +504,7 @@ void Client::connect()
                        << "It may lack support for new features." << std::endl
                        << std::endl;
        }
-        else if (client_version_tuple > server_version_tuple)
+        else if (client_version_tuple > server_version_tuple && server_display_name != "clickhouse-cloud")
        {
            std::cout << "ClickHouse server version is older than ClickHouse client. "
                        << "It may indicate that the server is out of date and can be upgraded." << std::endl
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -826,6 +826,11 @@ try
        0, // We don't need any threads one all the parts will be deleted
        server_settings.max_parts_cleaning_thread_pool_size);

+    getDatabaseReplicatedCreateTablesThreadPool().initialize(
+        server_settings.max_database_replicated_create_table_thread_pool_size,
+        0, // We don't need any threads once all the tables will be created
+        server_settings.max_database_replicated_create_table_thread_pool_size);
+
    /// Initialize global local cache for remote filesystem.
    if (config().has("local_cache_for_remote_fs"))
    {
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@ -1380,11 +1380,13 @@ document.getElementById('params').onsubmit = function(event) {
    event.preventDefault();
 }

+const decodeState = (x) => JSON.parse(LZString.decompressFromEncodedURIComponent(x) || atob(x));
+const encodeState = (x) => LZString.compressToEncodedURIComponent(JSON.stringify(x));

 function saveState() {
    const state = { host, user, queries, params, search_query, customized };
    history.pushState(state, '',
-        window.location.pathname + (window.location.search || '') + '#' + LZString.compressToEncodedURIComponent(JSON.stringify(state)));
+        window.location.pathname + (window.location.search || '') + '#' + encodeState(state));
 }

 async function searchQueries() {
@ -1450,12 +1452,7 @@ window.onpopstate = function(event) {
 if (window.location.hash) {
    try {
        let search_query_, customized_;
-        try {
-            ({host, user, queries, params, search_query_, customized_} = JSON.parse(LZString.decompressFromEncodedURIComponent(window.location.hash.substring(1))));
-        } catch {
-            // For compatibility with uncompressed state
-            ({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1))));
-        }
+        ({host, user, queries, params, search_query_, customized_} = decodeState(window.location.hash.substring(1)));

        // For compatibility with old URLs' hashes
        search_query = search_query_ !== undefined ? search_query_ : search_query;
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@ -82,7 +82,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
        return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
    }

-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = -1;
    auto const & column = *columns[0];
    if (if_argument_pos >= 0)
    {
@ -95,7 +95,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(

        for (size_t i = index + 1; i < row_end; i++)
        {
-            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
                index = i;
        }
        this->data(place).changeIfGreater(column, index, arena);
@ -111,7 +111,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
            size_t index = row_begin;
            for (size_t i = index + 1; i < row_end; i++)
            {
-                if (column.compareAt(i, index, column, nan_direction_hint) > 0)
+                if (column.compareAt(i, index, column, nan_null_direction_hint) > 0)
                    index = i;
            }
            this->data(place).changeIfGreater(column, index, arena);
@ -122,7 +122,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
            constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
            IColumn::Permutation permutation;
            constexpr UInt64 limit = 1;
-            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
            this->data(place).changeIfGreater(column, permutation[0], arena);
        }
    }
@ -177,7 +177,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
        return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
    }

-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = -1;
    auto const & column = *columns[0];
    if (if_argument_pos >= 0)
    {
@ -190,7 +190,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(

        for (size_t i = index + 1; i < row_end; i++)
        {
-            if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+            if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
                index = i;
        }
        this->data(place).changeIfGreater(column, index, arena);
@ -205,7 +205,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(

        for (size_t i = index + 1; i < row_end; i++)
        {
-            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
                index = i;
        }
        this->data(place).changeIfGreater(column, index, arena);
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@ -83,7 +83,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
        return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
    }

-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = 1;
    auto const & column = *columns[0];
    if (if_argument_pos >= 0)
    {
@ -96,7 +96,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(

        for (size_t i = index + 1; i < row_end; i++)
        {
-            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
                index = i;
        }
        this->data(place).changeIfLess(column, index, arena);
@ -112,7 +112,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
            size_t index = row_begin;
            for (size_t i = index + 1; i < row_end; i++)
            {
-                if (column.compareAt(i, index, column, nan_direction_hint) < 0)
+                if (column.compareAt(i, index, column, nan_null_direction_hint) < 0)
                    index = i;
            }
            this->data(place).changeIfLess(column, index, arena);
@ -123,7 +123,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
            constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
            IColumn::Permutation permutation;
            constexpr UInt64 limit = 1;
-            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
            this->data(place).changeIfLess(column, permutation[0], arena);
        }
    }
@ -178,7 +178,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
        return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
    }

-    constexpr int nan_direction_hint = 1;
+    constexpr int nan_null_direction_hint = 1;
    auto const & column = *columns[0];
    if (if_argument_pos >= 0)
    {
@ -191,7 +191,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(

        for (size_t i = index + 1; i < row_end; i++)
        {
-            if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+            if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
                index = i;
        }
        this->data(place).changeIfLess(column, index, arena);
@ -206,7 +206,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(

        for (size_t i = index + 1; i < row_end; i++)
        {
-            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
                index = i;
        }
        this->data(place).changeIfLess(column, index, arena);
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@ -234,6 +234,9 @@ public:

    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
    {
+        if (!this->data(rhs).value.size())
+            return;
+
        auto & set = this->data(place).value;
        if (set.capacity() != reserved)
            set.resize(reserved);
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -424,6 +424,7 @@ struct TableExpressionData
    bool should_qualify_columns = true;
    NamesAndTypes column_names_and_types;
    ColumnNameToColumnNodeMap column_name_to_column_node;
+    std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns
    std::unordered_set<std::string, StringTransparentHash, std::equal_to<>> column_identifier_first_parts;

    bool hasFullIdentifierName(IdentifierView identifier_view) const
@ -1306,6 +1307,12 @@ private:
        const QueryTreeNodePtr & table_expression_node,
        IdentifierResolveScope & scope);

+    QueryTreeNodePtr matchArrayJoinSubcolumns(
+        const QueryTreeNodePtr & array_join_column_inner_expression,
+        const ColumnNode & array_join_column_expression_typed,
+        const QueryTreeNodePtr & resolved_expression,
+        IdentifierResolveScope & scope);
+
    QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
        const QueryTreeNodePtr & table_expression_node,
        IdentifierResolveScope & scope);
@ -2759,7 +2766,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
    {
        if (identifier_lookup.isExpressionLookup())
        {
-            return tryResolveIdentifierFromCompoundExpression(identifier_lookup.identifier, 1 /*identifier_bind_size*/, it->second, {}, scope);
+            return tryResolveIdentifierFromCompoundExpression(
+                identifier_lookup.identifier,
+                1 /*identifier_bind_size*/,
+                it->second,
+                {} /* compound_expression_source */,
+                scope,
+                identifier_resolve_settings.allow_to_check_join_tree /* can_be_not_found */);
        }
        else if (identifier_lookup.isFunctionLookup() || identifier_lookup.isTableExpressionLookup())
        {
@ -2913,8 +2926,23 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
    QueryTreeNodePtr result_expression;
    bool match_full_identifier = false;

-    auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.getFullName());
-    if (it != table_expression_data.column_name_to_column_node.end())
+    const auto & identifier_full_name = identifier_without_column_qualifier.getFullName();
+    auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name);
+    bool can_resolve_directly_from_storage = it != table_expression_data.column_name_to_column_node.end();
+    if (can_resolve_directly_from_storage && table_expression_data.subcolumn_names.contains(identifier_full_name))
+    {
+        /** In the case when we have an ARRAY JOIN, we should not resolve subcolumns directly from storage.
+          * For example, consider the following SQL query:
+          * SELECT ProfileEvents.Values FROM system.query_log ARRAY JOIN ProfileEvents
+          * In this case, ProfileEvents.Values should also be array joined, not directly resolved from storage.
+          */
+        auto * nearest_query_scope = scope.getNearestQueryScope();
+        auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
+        if (nearest_query_scope_query_node && nearest_query_scope_query_node->getJoinTree()->getNodeType() == QueryTreeNodeType::ARRAY_JOIN)
+            can_resolve_directly_from_storage = false;
+    }
+
+    if (can_resolve_directly_from_storage)
    {
        match_full_identifier = true;
        result_expression = it->second;
@ -3397,6 +3425,68 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
    return resolved_identifier;
 }

+QueryTreeNodePtr QueryAnalyzer::matchArrayJoinSubcolumns(
+    const QueryTreeNodePtr & array_join_column_inner_expression,
+    const ColumnNode & array_join_column_expression_typed,
+    const QueryTreeNodePtr & resolved_expression,
+    IdentifierResolveScope & scope)
+{
+    const auto * resolved_function = resolved_expression->as<FunctionNode>();
+    if (!resolved_function || resolved_function->getFunctionName() != "getSubcolumn")
+        return {};
+
+    const auto * array_join_parent_column = array_join_column_inner_expression.get();
+
+    /** If both resolved and array-joined expressions are subcolumns, try to match them:
+      * For example, in `SELECT t.map.values FROM (SELECT * FROM tbl) ARRAY JOIN t.map`
+      * Identifier `t.map.values` is resolved into `getSubcolumn(t, 'map.values')` and t.map is resolved into `getSubcolumn(t, 'map')`
+      * Since we need to perform array join on `getSubcolumn(t, 'map')`, `t.map.values` should become `getSubcolumn(getSubcolumn(t, 'map'), 'values')`
+      *
+      * Note: It doesn't work when subcolumn in ARRAY JOIN is transformed by another expression, for example
+      * SELECT c.map, c.map.values FROM (SELECT * FROM tbl) ARRAY JOIN mapApply(x -> x, t.map);
+      */
+    String array_join_subcolumn_prefix;
+    auto * array_join_column_inner_expression_function = array_join_column_inner_expression->as<FunctionNode>();
+    if (array_join_column_inner_expression_function &&
+        array_join_column_inner_expression_function->getFunctionName() == "getSubcolumn")
+    {
+        const auto & argument_nodes = array_join_column_inner_expression_function->getArguments().getNodes();
+        if (argument_nodes.size() == 2 && argument_nodes.at(1)->getNodeType() == QueryTreeNodeType::CONSTANT)
+        {
+            const auto & constant_node = argument_nodes.at(1)->as<ConstantNode &>();
+            const auto & constant_node_value = constant_node.getValue();
+            if (constant_node_value.getType() == Field::Types::String)
+            {
+                array_join_subcolumn_prefix = constant_node_value.get<String>() + ".";
+                array_join_parent_column = argument_nodes.at(0).get();
+            }
+        }
+    }
+
+    const auto & argument_nodes = resolved_function->getArguments().getNodes();
+    if (argument_nodes.size() != 2 && !array_join_parent_column->isEqual(*argument_nodes.at(0)))
+        return {};
+
+    const auto * second_argument = argument_nodes.at(1)->as<ConstantNode>();
+    if (!second_argument || second_argument->getValue().getType() != Field::Types::String)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree());
+
+    const auto & resolved_subcolumn_path = second_argument->getValue().get<String &>();
+    if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix))
+        return {};
+
+    auto get_subcolumn_function = std::make_shared<FunctionNode>("getSubcolumn");
+    get_subcolumn_function->getArguments().getNodes().push_back(
+        std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()));
+    get_subcolumn_function->getArguments().getNodes().push_back(
+        std::make_shared<ConstantNode>(resolved_subcolumn_path.substr(array_join_subcolumn_prefix.size())));
+
+    QueryTreeNodePtr function_query_node = get_subcolumn_function;
+    resolveFunction(function_query_node, scope);
+
+    return function_query_node;
+}
+
 QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
    const QueryTreeNodePtr & table_expression_node,
    IdentifierResolveScope & scope)
@ -3465,8 +3555,12 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(con
                array_join_column_expression_typed.getColumnSource());
            break;
        }
-    }

+        /// When we select subcolumn of array joined column it also should be array joined
+        array_join_resolved_expression = matchArrayJoinSubcolumns(array_join_column_inner_expression, array_join_column_expression_typed, resolved_expression, scope);
+        if (array_join_resolved_expression)
+            break;
+    }
    return array_join_resolved_expression;
 }

@ -5570,6 +5664,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
                column = function_base->getConstantResultForNonConstArguments(argument_columns, result_type);
            }

+            if (column && column->getDataType() != result_type->getColumnType())
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Unexpected return type from {}. Expected {}. Got {}",
+                    function->getName(),
+                    result_type->getColumnType(),
+                    column->getDataType());
+
            /** Do not perform constant folding if there are aggregate or arrayJoin functions inside function.
              * Example: SELECT toTypeName(sum(number)) FROM numbers(10);
              */
@ -6427,6 +6529,8 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
          */
        for (const auto & column_name_and_type : table_expression_data.column_names_and_types)
        {
+            for (const auto & subcolumn : columns_description.getSubcolumns(column_name_and_type.name))
+                table_expression_data.subcolumn_names.insert(subcolumn.name);
            const auto & column_default = columns_description.getDefault(column_name_and_type.name);

            if (column_default && column_default->kind == ColumnDefaultKind::Alias)
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@ -903,6 +903,68 @@ void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
        remove_fuzzed_table(insert->getTable());
 }

+ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
+{
+    auto * l = child->as<ASTLiteral>();
+    chassert(l);
+    auto type = l->value.getType();
+    if (type == Field::Types::Which::String && fuzz_rand() % 7 == 0)
+    {
+        String value = l->value.get<String>();
+        child = makeASTFunction(
+            "toFixedString", std::make_shared<ASTLiteral>(value), std::make_shared<ASTLiteral>(static_cast<UInt64>(value.size())));
+    }
+
+    if (fuzz_rand() % 7 == 0)
+        child = makeASTFunction("toNullable", child);
+
+    if (fuzz_rand() % 7 == 0)
+        child = makeASTFunction("toLowCardinality", child);
+
+    if (fuzz_rand() % 7 == 0)
+        child = makeASTFunction("materialize", child);
+
+    return child;
+}
+
+/// Tries to remove the functions added in fuzzLiteralUnderExpressionList
+/// Note that it removes them even if the child is not a literal
+ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child)
+{
+    if (auto * function = child.get()->as<ASTFunction>())
+    {
+        std::unordered_set<String> can_be_reverted{"toNullable", "toLowCardinality", "materialize"};
+        if (can_be_reverted.contains(function->name) && function->children.size() == 1)
+        {
+            if (fuzz_rand() % 7 == 0)
+                return function->children[0];
+        }
+    }
+
+    return nullptr;
+}
+
+
+void QueryFuzzer::fuzzExpressionList(ASTExpressionList & expr_list)
+{
+    for (auto & child : expr_list.children)
+    {
+        if (auto * literal = typeid_cast<ASTLiteral *>(child.get()))
+        {
+            if (fuzz_rand() % 13 == 0)
+                child = fuzzLiteralUnderExpressionList(child);
+        }
+        else
+        {
+            auto new_child = reverseLiteralFuzzing(child);
+            if (new_child)
+                child = new_child;
+            else
+                fuzz(child);
+        }
+    }
+}
+
 void QueryFuzzer::fuzz(ASTs & asts)
 {
    for (auto & ast : asts)
@ -989,7 +1051,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
    }
    else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
    {
-        fuzz(expr_list->children);
+        fuzzExpressionList(*expr_list);
    }
    else if (auto * order_by_element = typeid_cast<ASTOrderByElement *>(ast.get()))
    {
@ -1108,7 +1170,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
    }
    /*
     * The time to fuzz the settings has not yet come.
-     * Apparently we don't have any infractructure to validate the values of
+     * Apparently we don't have any infrastructure to validate the values of
     * the settings, and the first query with max_block_size = -1 breaks
     * because of overflows here and there.
     *//*
@ -1131,9 +1193,8 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
        // are ASTPtr -- this is redundant ownership, but hides the error if the
        // child field is replaced. Others can be ASTLiteral * or the like, which
        // leads to segfault if the pointed-to AST is replaced.
-        // Replacing children is safe in case of ASTExpressionList. In a more
-        // general case, we can change the value of ASTLiteral, which is what we
-        // do here.
+        // Replacing children is safe in case of ASTExpressionList (done in fuzzExpressionList). In a more
+        // general case, we can change the value of ASTLiteral, which is what we do here
        if (fuzz_rand() % 11 == 0)
        {
            literal->value = fuzzField(literal->value);
--- a/src/Client/QueryFuzzer.h
+++ b/src/Client/QueryFuzzer.h
@ -95,6 +95,9 @@ struct QueryFuzzer
    void fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind);
    void fuzzColumnDeclaration(ASTColumnDeclaration & column);
    void fuzzTableName(ASTTableExpression & table);
+    ASTPtr fuzzLiteralUnderExpressionList(ASTPtr child);
+    ASTPtr reverseLiteralFuzzing(ASTPtr child);
+    void fuzzExpressionList(ASTExpressionList & expr_list);
    void fuzz(ASTs & asts);
    void fuzz(ASTPtr & ast);
    void collectFuzzInfoMain(ASTPtr ast);
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -178,6 +178,9 @@
    M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
    M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
    M(MergeTreePartsCleanerThreadsScheduled, "Number of queued or active jobs in the MergeTree parts cleaner thread pool.") \
+    M(DatabaseReplicatedCreateTablesThreads, "Number of threads in the threadpool for table creation in DatabaseReplicated.") \
+    M(DatabaseReplicatedCreateTablesThreadsActive, "Number of active threads in the threadpool for table creation in DatabaseReplicated.") \
+    M(DatabaseReplicatedCreateTablesThreadsScheduled, "Number of queued or active jobs in the threadpool for table creation in DatabaseReplicated.") \
    M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \
    M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \
    M(IDiskCopierThreadsScheduled, "Number of queued or active jobs for copying data between disks of different types.") \
--- a/src/Common/ElapsedTimeProfileEventIncrement.h
+++ b/src/Common/ElapsedTimeProfileEventIncrement.h
@ -14,12 +14,13 @@ enum Time
    Seconds,
 };

-template <Time time>
+template <Time unit>
 struct ProfileEventTimeIncrement
 {
    explicit ProfileEventTimeIncrement<time>(ProfileEvents::Event event_)
        : event(event_), watch(CLOCK_MONOTONIC) {}

+    template <Time time = unit>
    UInt64 elapsed()
    {
        if constexpr (time == Time::Nanoseconds)
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@ -1,6 +1,7 @@
 #pragma once

 #include <cerrno>
+#include <exception>
 #include <vector>
 #include <memory>

@ -8,10 +9,11 @@

 #include <base/defines.h>
 #include <base/errnoToString.h>
+#include <base/int8_to_string.h>
 #include <base/scope_guard.h>
-#include <Common/LoggingFormatStringHelpers.h>
-#include <Common/Logger.h>
 #include <Common/AtomicLogger.h>
+#include <Common/Logger.h>
+#include <Common/LoggingFormatStringHelpers.h>
 #include <Common/StackTrace.h>

 #include <fmt/format.h>
--- a/src/Common/MultiVersion.h
+++ b/src/Common/MultiVersion.h
@ -2,6 +2,7 @@

 #include <atomic>
 #include <memory>
+#include <mutex>
 #include <base/defines.h>


@ -20,6 +21,9 @@
  * }   // now we finish own current version; if the version is outdated and no one else is using it - it will be destroyed.
  *
  * All methods are thread-safe.
+  *
+  * Standard library does not have atomic_shared_ptr, and we do not use std::atomic* operations on shared_ptr,
+  * because standard library implementation uses fixed table of mutexes, and it is better to avoid contention here.
  */
 template <typename T>
 class MultiVersion
@ -42,25 +46,37 @@ public:
    MultiVersion & operator=(MultiVersion && src)
    {
        if (this != &src)
-            std::atomic_store(&current_version, std::atomic_exchange(&src.current_version, Version{}));
+        {
+            Version version;
+
+            {
+                std::lock_guard<std::mutex> lock(src.mutex);
+                src.current_version.swap(version);
+            }
+
+            std::lock_guard<std::mutex> lock(mutex);
+            current_version = std::move(version);
+        }
+
        return *this;
    }

    /// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version.
    Version get() const
    {
-        return std::atomic_load(&current_version);
+        std::lock_guard<std::mutex> lock(mutex);
+        return current_version;
    }

-    /// TODO: replace atomic_load/store() on shared_ptr (which is deprecated as of C++20) by C++20 std::atomic<std::shared_ptr>.
-    /// Clang 15 currently does not support it.
-
    /// Update an object with new version.
    void set(std::unique_ptr<const T> && value)
    {
-        std::atomic_store(&current_version, Version{std::move(value)});
+        Version version{std::move(value)};
+        std::lock_guard<std::mutex> lock(mutex);
+        current_version = std::move(version);
    }

 private:
+    mutable std::mutex mutex;
    Version current_version;
 };
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -197,6 +197,15 @@
    M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.") \
    M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.") \
    \
+    M(MergeTreeDataWriterSkipIndicesCalculationMicroseconds, "Time spent calculating skip indices") \
+    M(MergeTreeDataWriterStatisticsCalculationMicroseconds, "Time spent calculating statistics") \
+    M(MergeTreeDataWriterSortingBlocksMicroseconds, "Time spent sorting blocks") \
+    M(MergeTreeDataWriterMergingBlocksMicroseconds, "Time spent merging input blocks (for special MergeTree engines)") \
+    M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
+    M(MergeTreeDataProjectionWriterSortingBlocksMicroseconds, "Time spent sorting blocks (for projection it might be a key different from table's sorting key)") \
+    M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging blocks") \
+    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
+    \
    M(InsertedWideParts, "Number of parts inserted in Wide format.") \
    M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
    M(MergedIntoWideParts, "Number of parts merged into Wide format.") \
--- a/src/Common/SpaceSaving.h
+++ b/src/Common/SpaceSaving.h
@ -206,6 +206,9 @@ public:
     */
    void merge(const Self & rhs)
    {
+        if (!rhs.size())
+            return;
+
        UInt64 m1 = 0;
        UInt64 m2 = 0;

--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@ -1,7 +1,6 @@
 #include "getNumberOfPhysicalCPUCores.h"
 #include <filesystem>

-#include "config.h"
 #if defined(OS_LINUX)
 #    include <cmath>
 #    include <fstream>
@ -34,9 +33,9 @@ int32_t readFrom(const std::filesystem::path & filename, int default_value)
 uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
 {
    uint32_t quota_count = default_cpu_count;
-    std::filesystem::path prefix = "/sys/fs/cgroup";
+    std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
    /// cgroupsv2
-    std::ifstream contr_file(prefix / "cgroup.controllers");
+    std::ifstream contr_file(default_cgroups_mount / "cgroup.controllers");
    if (contr_file.is_open())
    {
        /// First, we identify the cgroup the process belongs
@ -51,16 +50,15 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)

        std::filesystem::path current_cgroup;
        if (cgroup_name.empty())
-            current_cgroup = prefix;
+            current_cgroup = default_cgroups_mount;
        else
-            current_cgroup = prefix / cgroup_name;
+            current_cgroup = default_cgroups_mount / cgroup_name;

        // Looking for cpu.max in directories from the current cgroup to the top level
        // It does not stop on the first time since the child could have a greater value than parent
-        while (current_cgroup != prefix.parent_path())
+        while (current_cgroup != default_cgroups_mount.parent_path())
        {
            std::ifstream cpu_max_file(current_cgroup / "cpu.max");
-            current_cgroup = current_cgroup.parent_path();
            if (cpu_max_file.is_open())
            {
                std::string cpu_limit_str;
@ -72,10 +70,11 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
                    quota_count = std::min(static_cast<uint32_t>(ceil(cpu_limit / cpu_period)), quota_count);
                }
            }
+            current_cgroup = current_cgroup.parent_path();
        }
-        current_cgroup = prefix / cgroup_name;
+        current_cgroup = default_cgroups_mount / cgroup_name;
        // Looking for cpuset.cpus.effective in directories from the current cgroup to the top level
-        while (current_cgroup != prefix.parent_path())
+        while (current_cgroup != default_cgroups_mount.parent_path())
        {
            std::ifstream cpuset_cpus_file(current_cgroup / "cpuset.cpus.effective");
            current_cgroup = current_cgroup.parent_path();
@ -113,8 +112,8 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
    /// cgroupsv1
    /// Return the number of milliseconds per period process is guaranteed to run.
    /// -1 for no quota
-    int cgroup_quota = readFrom(prefix / "cpu/cpu.cfs_quota_us", -1);
-    int cgroup_period = readFrom(prefix / "cpu/cpu.cfs_period_us", -1);
+    int cgroup_quota = readFrom(default_cgroups_mount / "cpu/cpu.cfs_quota_us", -1);
+    int cgroup_period = readFrom(default_cgroups_mount / "cpu/cpu.cfs_period_us", -1);
    if (cgroup_quota > -1 && cgroup_period > 0)
        quota_count = static_cast<uint32_t>(ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period)));

@ -178,24 +177,25 @@ catch (...)

 unsigned getNumberOfPhysicalCPUCoresImpl()
 {
-    unsigned cpu_count = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)
+    unsigned cores = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)

+
+#if defined(__x86_64__) && defined(OS_LINUX)
    /// Most x86_64 CPUs have 2-way SMT (Hyper-Threading).
    /// Aarch64 and RISC-V don't have SMT so far.
    /// POWER has SMT and it can be multi-way (e.g. 8-way), but we don't know how ClickHouse really behaves, so use all of them.
-
-#if defined(__x86_64__) && defined(OS_LINUX)
+    ///
    /// On really big machines, SMT is detrimental to performance (+ ~5% overhead in ClickBench). On such machines, we limit ourself to the physical cores.
    /// Few cores indicate it is a small machine, runs in a VM or is a limited cloud instance --> it is reasonable to use all the cores.
-    if (cpu_count >= 32)
-        cpu_count = physical_concurrency();
+    if (cores >= 32)
+        cores = physical_concurrency();
 #endif

 #if defined(OS_LINUX)
-    cpu_count = getCGroupLimitedCPUCores(cpu_count);
+    cores = getCGroupLimitedCPUCores(cores);
 #endif

-    return cpu_count;
+    return cores;
 }

 }
@ -203,6 +203,6 @@ unsigned getNumberOfPhysicalCPUCoresImpl()
 unsigned getNumberOfPhysicalCPUCores()
 {
    /// Calculate once.
-    static auto res = getNumberOfPhysicalCPUCoresImpl();
-    return res;
+    static auto cores = getNumberOfPhysicalCPUCoresImpl();
+    return cores;
 }
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -216,9 +216,8 @@ using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
 template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
 template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
 template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
-#ifdef __cpp_char8_t
 template <> struct NearestFieldTypeImpl<char8_t> { using Type = UInt64; };
-#endif
+template <> struct NearestFieldTypeImpl<Int8> { using Type = Int64; };

 template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; };
 template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; };
@ -306,7 +305,6 @@ static constexpr auto DBMS_MIN_FIELD_SIZE = 32;
  */
 class Field
 {
-    static constexpr int nan_direction_hint = 1; // When comparing Floats NaN are considered to be larger than all numbers
 public:
    struct Types
    {
@ -511,6 +509,7 @@ public:
            case Types::IPv4:    return get<IPv4>()    < rhs.get<IPv4>();
            case Types::IPv6:    return get<IPv6>()    < rhs.get<IPv6>();
            case Types::Float64:
+                static constexpr int nan_direction_hint = 1; /// Put NaN at the end
                return FloatCompareHelper<Float64>::less(get<Float64>(), rhs.get<Float64>(), nan_direction_hint);
            case Types::String:  return get<String>()  < rhs.get<String>();
            case Types::Array:   return get<Array>()   < rhs.get<Array>();
@ -555,6 +554,7 @@ public:
            case Types::IPv6:    return get<IPv6>()    <= rhs.get<IPv6>();
            case Types::Float64:
            {
+                static constexpr int nan_direction_hint = 1; /// Put NaN at the end
                Float64 f1 = get<Float64>();
                Float64 f2 = get<Float64>();
                return FloatCompareHelper<Float64>::less(f1, f2, nan_direction_hint)
@ -595,6 +595,7 @@ public:
            case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
            case Types::Int64:   return get<Int64>() == rhs.get<Int64>();
            case Types::Float64:
+                static constexpr int nan_direction_hint = 1; /// Put NaN at the end
                return FloatCompareHelper<Float64>::equals(get<Float64>(), rhs.get<Float64>(), nan_direction_hint);
            case Types::UUID:    return get<UUID>()    == rhs.get<UUID>();
            case Types::IPv4:    return get<IPv4>()    == rhs.get<IPv4>();
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@ -114,6 +114,7 @@ namespace DB
    M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
    M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
    M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
+    M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Value less than two means tables will be created sequentially.", 0) \

    /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -559,7 +559,7 @@ class IColumn;
    M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
    \
    M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
-    M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
+    M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
    M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
    M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
    M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
@ -748,7 +748,7 @@ class IColumn;
    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
    M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
    M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
-    M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
+    M(UInt64, async_insert_max_data_size, 10485760, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
    M(UInt64, async_insert_max_query_number, 450, "Maximum number of insert queries before being inserted", 0) \
    M(Milliseconds, async_insert_poll_timeout_ms, 10, "Timeout for polling data from asynchronous insert queue", 0) \
    M(Bool, async_insert_use_adaptive_busy_timeout, true, "If it is set to true, use adaptive busy timeout for asynchronous inserts", 0) \
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -84,7 +84,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"24.2", {{"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
+    {"24.2", {{"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
+              {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
              {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"},
              {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
              {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
@ -108,6 +109,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
              {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
              {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
              {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
+              {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
              {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
              {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -29,6 +29,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <IO/SharedThreadPools.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTFunction.h>
@ -1091,9 +1092,18 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
    }

    tables_dependencies.checkNoCyclicDependencies();
-    auto tables_to_create = tables_dependencies.getTablesSortedByDependency();

+    auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 1;
+    auto tables_to_create_by_level = tables_dependencies.getTablesSortedByDependencyWithLevels();
+
+    auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");
+    std::vector<std::future<void>> create_table_futures;
+
+    for (const auto & [_, tables_to_create] : tables_to_create_by_level)
+    {
        for (const auto & table_id : tables_to_create)
+        {
+            auto task = [&]()
            {
                auto table_name = table_id.getTableName();
                auto metadata_it = table_name_to_metadata.find(table_name);
@ -1102,20 +1112,37 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
                    /// getTablesSortedByDependency() may return some not existing tables or tables from other databases
                    LOG_WARNING(log, "Got table name {} when resolving table dependencies, "
                                "but database {} does not have metadata for that table. Ignoring it", table_id.getNameForLogs(), getDatabaseName());
-            continue;
+                    return;
                }

                const auto & create_query_string = metadata_it->second;
                if (isTableExist(table_name, getContext()))
                {
                    assert(create_query_string == readMetadataFile(table_name) || getTableUUIDIfReplicated(create_query_string, getContext()) != UUIDHelpers::Nil);
-            continue;
+                    return;
                }

                auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_query_string);
                LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
                auto create_query_context = make_query_context();
                InterpreterCreateQuery(query_ast, create_query_context).execute();
+            };
+
+            if (allow_concurrent_table_creation)
+                create_table_futures.push_back(create_tables_runner(task, Priority{0}));
+            else
+                task();
+        }
+
+        /// First wait for all tasks to finish.
+        for (auto & future : create_table_futures)
+            future.wait();
+
+        /// Now rethrow the first exception if any.
+        for (auto & future : create_table_futures)
+            future.get();
+
+        create_table_futures.clear();
    }
    LOG_INFO(log, "All tables are created successfully");

--- a/src/Databases/TablesDependencyGraph.cpp
+++ b/src/Databases/TablesDependencyGraph.cpp
@ -699,6 +699,17 @@ std::vector<StorageID> TablesDependencyGraph::getTablesSortedByDependency() cons
 }


+std::map<size_t, std::vector<StorageID>> TablesDependencyGraph::getTablesSortedByDependencyWithLevels() const
+{
+    std::map<size_t, std::vector<StorageID>> tables_by_level;
+    for (const auto * node : getNodesSortedByLevel())
+    {
+        tables_by_level[node->level].emplace_back(node->storage_id);
+    }
+    return tables_by_level;
+}
+
+
 void TablesDependencyGraph::log() const
 {
    if (nodes.empty())
--- a/src/Databases/TablesDependencyGraph.h
+++ b/src/Databases/TablesDependencyGraph.h
@ -107,6 +107,12 @@ public:
    /// tables which depend on the tables which depend on the tables without dependencies, and so on.
    std::vector<StorageID> getTablesSortedByDependency() const;

+    /// Returns a map of lists of tables by the number of dependencies they have:
+    /// tables without dependencies first with level 0, then
+    /// tables with depend on the tables without dependencies with level 1, then
+    /// tables which depend on the tables which depend on the tables without dependencies with level 2, and so on.
+    std::map<size_t, std::vector<StorageID>> getTablesSortedByDependencyWithLevels() const;
+
    /// Outputs information about this graph as a bunch of logging messages.
    void log() const;

--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@ -670,8 +670,7 @@ struct ArrayElementStringImpl
        ColumnArray::Offset current_offset = 0;
        /// get the total result bytes at first, and reduce the cost of result_data.resize.
        size_t total_result_bytes = 0;
-        ColumnString::Chars zero_buf(1);
-        zero_buf.push_back(0);
+        ColumnString::Chars zero_buf(16, '\0'); /// Needs 15 extra bytes for memcpySmallAllowReadWriteOverflow15
        std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
        selected_bufs.reserve(size);
        for (size_t i = 0; i < size; ++i)
@ -737,8 +736,7 @@ struct ArrayElementStringImpl
        size_t size = offsets.size();
        result_offsets.resize(size);

-        ColumnString::Chars zero_buf(1);
-        zero_buf.push_back(0);
+        ColumnString::Chars zero_buf(16, '\0'); /// Needs 15 extra bytes for memcpySmallAllowReadWriteOverflow15
        ColumnArray::Offset current_offset = 0;
        /// get the total result bytes at first, and reduce the cost of result_data.resize.
        size_t total_result_bytes = 0;
--- a/src/Functions/divide/divide.cpp
+++ b/src/Functions/divide/divide.cpp
@ -49,9 +49,9 @@ template void divideImpl<uint32_t, char8_t, uint32_t>(const uint32_t * __restric
 template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t);
-template void divideImpl<int64_t, int8_t, int64_t>(const int64_t * __restrict, int8_t, int64_t * __restrict, size_t);
+template void divideImpl<int64_t, Int8, int64_t>(const int64_t * __restrict, Int8, int64_t * __restrict, size_t);

 template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t);
-template void divideImpl<int32_t, int8_t, int32_t>(const int32_t * __restrict, int8_t, int32_t * __restrict, size_t);
+template void divideImpl<int32_t, Int8, int32_t>(const int32_t * __restrict, Int8, int32_t * __restrict, size_t);
--- a/src/Functions/divide/divideImpl.cpp
+++ b/src/Functions/divide/divideImpl.cpp
@ -12,6 +12,10 @@

 #include <libdivide.h>

+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wbit-int-extension"
+using Int8 = signed _BitInt(8);
+#pragma clang diagnostic pop

 namespace NAMESPACE
 {
@ -62,11 +66,11 @@ template void divideImpl<uint32_t, char8_t, uint32_t>(const uint32_t * __restric
 template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t);
 template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t);
-template void divideImpl<int64_t, int8_t, int64_t>(const int64_t * __restrict, int8_t, int64_t * __restrict, size_t);
+template void divideImpl<int64_t, Int8, int64_t>(const int64_t * __restrict, Int8, int64_t * __restrict, size_t);

 template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t);
 template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t);
-template void divideImpl<int32_t, int8_t, int32_t>(const int32_t * __restrict, int8_t, int32_t * __restrict, size_t);
+template void divideImpl<int32_t, Int8, int32_t>(const int32_t * __restrict, Int8, int32_t * __restrict, size_t);

 }
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@ -1,32 +1,34 @@
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypesDecimal.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnMap.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnVariant.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/MaskOperations.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFixedString.h>
-#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/NumberTraits.h>
 #include <DataTypes/getLeastSupertype.h>
-#include <Columns/ColumnVector.h>
-#include <Columns/ColumnDecimal.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnFixedString.h>
-#include <Columns/ColumnTuple.h>
-#include <Columns/ColumnNullable.h>
-#include <Columns/ColumnVariant.h>
-#include <Columns/MaskOperations.h>
-#include <Common/typeid_cast.h>
-#include <Common/assert_cast.h>
-#include <Functions/IFunction.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/GatherUtils/Algorithms.h>
-#include <Functions/FunctionIfBase.h>
-#include <Interpreters/castColumn.h>
-#include <Interpreters/Context.h>
-
 #include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionIfBase.h>
+#include <Functions/GatherUtils/Algorithms.h>
+#include <Functions/IFunction.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/castColumn.h>
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+
 #include <type_traits>

 namespace DB
@ -36,6 +38,7 @@ namespace ErrorCodes
    extern const int ILLEGAL_COLUMN;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int NOT_IMPLEMENTED;
+    extern const int LOGICAL_ERROR;
 }

 namespace
@ -227,13 +230,8 @@ inline void fillConstantConstant(const ArrayCond & cond, A a, B b, ArrayResult &
 {
    size_t size = cond.size();

-    /// Int8(alias type of uint8_t) has special aliasing properties that prevents compiler from auto-vectorizing for below codes, refer to https://gist.github.com/alexei-zaripov/dcc14c78819c5f1354afe8b70932007c
-    ///
-    /// for (size_t i = 0; i < size; ++i)
-    ///     res[i] = cond[i] ? static_cast<Int8>(a) : static_cast<Int8>(b);
-    ///
-    /// Therefore, we manually optimize it by avoiding branch miss when ResultType is Int8. Other types like (U)Int128|256 or Decimal128/256 also benefit from this optimization.
-    if constexpr (std::is_same_v<ResultType, Int8> || is_over_big_int<ResultType>)
+    /// We manually optimize the loop for types like (U)Int128|256 or Decimal128/256 to avoid branches
+    if constexpr (is_over_big_int<ResultType>)
    {
        alignas(64) const ResultType ab[2] = {static_cast<ResultType>(a), static_cast<ResultType>(b)};
        for (size_t i = 0; i < size; ++i)
@ -724,7 +722,6 @@ private:
                conditional(ConstSource<GenericArraySource>(*col_arr_then_const), ConstSource<GenericArraySource>(*col_arr_else_const), GenericArraySink(col_res->getData(), col_res->getOffsets(), rows), cond_data);
            else
                return nullptr;
-
            return res;
        }

@ -776,6 +773,90 @@ private:
        return ColumnTuple::create(tuple_columns);
    }

+    ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
+    {
+        auto extract_kv_from_map = [](const ColumnMap * map)
+        {
+            const ColumnTuple & tuple = map->getNestedData();
+            const auto & keys = tuple.getColumnPtr(0);
+            const auto & values = tuple.getColumnPtr(1);
+            const auto & offsets = map->getNestedColumn().getOffsetsPtr();
+            return std::make_pair(ColumnArray::create(keys, offsets), ColumnArray::create(values, offsets));
+        };
+
+        /// Extract keys and values from both arguments
+        Columns key_cols(2);
+        Columns value_cols(2);
+        for (size_t i = 0; i < 2; ++i)
+        {
+            const auto & arg = arguments[i + 1];
+            if (const ColumnMap * map = checkAndGetColumn<ColumnMap>(arg.column.get()))
+            {
+                auto [key_col, value_col] = extract_kv_from_map(map);
+                key_cols[i] = std::move(key_col);
+                value_cols[i] = std::move(value_col);
+            }
+            else if (const ColumnConst * const_map = checkAndGetColumnConst<ColumnMap>(arg.column.get()))
+            {
+                const ColumnMap * map_data = assert_cast<const ColumnMap *>(&const_map->getDataColumn());
+                auto [key_col, value_col] = extract_kv_from_map(map_data);
+
+                size_t size = const_map->size();
+                key_cols[i] = ColumnConst::create(std::move(key_col), size);
+                value_cols[i] = ColumnConst::create(std::move(value_col), size);
+            }
+            else
+                return nullptr;
+        }
+
+        /// Compose temporary columns for keys and values
+        ColumnsWithTypeAndName key_columns(3);
+        key_columns[0] = arguments[0];
+        ColumnsWithTypeAndName value_columns(3);
+        value_columns[0] = arguments[0];
+        for (size_t i = 0; i < 2; ++i)
+        {
+            const auto & arg = arguments[i + 1];
+            const DataTypeMap & type = static_cast<const DataTypeMap &>(*arg.type);
+            const auto & key_type = type.getKeyType();
+            const auto & value_type = type.getValueType();
+            key_columns[i + 1] = {key_cols[i], std::make_shared<DataTypeArray>(key_type), {}};
+            value_columns[i + 1] = {value_cols[i], std::make_shared<DataTypeArray>(value_type), {}};
+        }
+
+        /// Calculate function corresponding keys and values in map
+        const DataTypeMap & map_result_type = static_cast<const DataTypeMap &>(*result_type);
+        auto key_result_type = std::make_shared<DataTypeArray>(map_result_type.getKeyType());
+        auto value_result_type = std::make_shared<DataTypeArray>(map_result_type.getValueType());
+        ColumnPtr key_result = executeImpl(key_columns, key_result_type, input_rows_count);
+        ColumnPtr value_result = executeImpl(value_columns, value_result_type, input_rows_count);
+
+        /// key_result and value_result are not constant columns otherwise we won't reach here in executeMap
+        const auto * key_array = assert_cast<const ColumnArray *>(key_result.get());
+        const auto * value_array = assert_cast<const ColumnArray *>(value_result.get());
+        if (!key_array)
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Key result column should be {} instead of {} in executeMap of function {}",
+                key_result_type->getName(),
+                key_result->getName(),
+                getName());
+        if (!value_array)
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Value result column should be {} instead of {} in executeMap of function {}",
+                key_result_type->getName(),
+                value_result->getName(),
+                getName());
+        if (!key_array->hasEqualOffsets(*value_array))
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR, "Key array and value array must have equal sizes in executeMap of function {}", getName());
+
+        auto nested_column = ColumnArray::create(
+            ColumnTuple::create(Columns{key_array->getDataPtr(), value_array->getDataPtr()}), key_array->getOffsetsPtr());
+        return ColumnMap::create(std::move(nested_column));
+    }
+
    static ColumnPtr executeGeneric(
        const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count, bool use_variant_when_no_common_type)
    {
@ -1289,7 +1370,8 @@ public:
            || (res = executeTyped<UUID, UUID>(cond_col, arguments, result_type, input_rows_count))
            || (res = executeString(cond_col, arguments, result_type))
            || (res = executeGenericArray(cond_col, arguments, result_type))
-            || (res = executeTuple(arguments, result_type, input_rows_count))))
+            || (res = executeTuple(arguments, result_type, input_rows_count))
+            || (res = executeMap(arguments, result_type, input_rows_count))))
        {
            return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type);
        }
--- a/src/Functions/isNotNull.cpp
+++ b/src/Functions/isNotNull.cpp
@ -1,14 +1,14 @@
-#include <Functions/IFunction.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/FunctionFactory.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Core/ColumnNumbers.h>
-#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnVariant.h>
+#include <Core/ColumnNumbers.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Functions/PerformanceAdaptors.h>
 #include <Common/assert_cast.h>

-
 namespace DB
 {
 namespace
@ -21,10 +21,7 @@ class FunctionIsNotNull : public IFunction
 public:
    static constexpr auto name = "isNotNull";

-    static FunctionPtr create(ContextPtr)
-    {
-        return std::make_shared<FunctionIsNotNull>();
-    }
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIsNotNull>(); }

    std::string getName() const override
    {
@ -52,9 +49,9 @@ public:
            const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
            auto res = DataTypeUInt8().createColumn();
            auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
-            data.reserve(discriminators.size());
-            for (auto discr : discriminators)
-                data.push_back(discr != ColumnVariant::NULL_DISCRIMINATOR);
+            data.resize(discriminators.size());
+            for (size_t i = 0; i < discriminators.size(); ++i)
+                data[i] = discriminators[i] != ColumnVariant::NULL_DISCRIMINATOR;
            return res;
        }

@ -64,9 +61,9 @@ public:
            const size_t null_index = low_cardinality_column->getDictionary().getNullValueIndex();
            auto res = DataTypeUInt8().createColumn();
            auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
-            data.reserve(low_cardinality_column->size());
+            data.resize(low_cardinality_column->size());
            for (size_t i = 0; i != low_cardinality_column->size(); ++i)
-                data.push_back(low_cardinality_column->getIndexAt(i) != null_index);
+                data[i] = (low_cardinality_column->getIndexAt(i) != null_index);
            return res;
        }

@ -76,10 +73,7 @@ public:
            auto res_column = ColumnUInt8::create(input_rows_count);
            const auto & src_data = nullable->getNullMapData();
            auto & res_data = assert_cast<ColumnUInt8 &>(*res_column).getData();
-
-            for (size_t i = 0; i < input_rows_count; ++i)
-                res_data[i] = !src_data[i];
-
+            vector(src_data, res_data);
            return res_column;
        }
        else
@ -88,8 +82,34 @@ public:
            return DataTypeUInt8().createColumnConst(elem.column->size(), 1u);
        }
    }
-};

+private:
+    MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(static void NO_INLINE), vectorImpl, MULTITARGET_FUNCTION_BODY((const PaddedPODArray<UInt8> & null_map, PaddedPODArray<UInt8> & res) /// NOLINT
+    {
+        size_t size = null_map.size();
+        for (size_t i = 0; i < size; ++i)
+            res[i] = !null_map[i];
+    }))
+
+    static void NO_INLINE vector(const PaddedPODArray<UInt8> & null_map, PaddedPODArray<UInt8> & res)
+    {
+#if USE_MULTITARGET_CODE
+        if (isArchSupported(TargetArch::AVX2))
+        {
+            vectorImplAVX2(null_map, res);
+            return;
+        }
+
+        if (isArchSupported(TargetArch::SSE42))
+        {
+            vectorImplSSE42(null_map, res);
+            return;
+        }
+#endif
+        vectorImpl(null_map, res);
+    }
+};
 }

 REGISTER_FUNCTION(IsNotNull)
--- a/src/Functions/seriesOutliersDetectTukey.cpp
+++ b/src/Functions/seriesOutliersDetectTukey.cpp
@ -0,0 +1,262 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Common/NaNUtils.h>
+#include <cmath>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+extern const int ILLEGAL_COLUMN;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+/// Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
+class FunctionSeriesOutliersDetectTukey : public IFunction
+{
+public:
+    static constexpr auto name = "seriesOutliersDetectTukey";
+
+    static constexpr Float64 min_quartile = 2.0;
+    static constexpr Float64 max_quartile = 98.0;
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesOutliersDetectTukey>(); }
+
+    std::string getName() const override { return name; }
+
+    bool isVariadic() const override { return true; }
+
+    size_t getNumberOfArguments() const override { return 0; }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (arguments.size() != 1 && arguments.size() != 4)
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Function {} needs either 1 or 4 arguments; passed {}.",
+                getName(),
+                arguments.size());
+
+        FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
+        FunctionArgumentDescriptors optional_args{
+            {"min_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
+            {"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
+            {"k", &isNativeNumber<IDataType>, isColumnConst, "Number"}};
+
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
+
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
+    }
+
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        ColumnPtr col = arguments[0].column;
+        const ColumnArray * col_arr = checkAndGetColumn<ColumnArray>(col.get());
+
+        const IColumn & arr_data = col_arr->getData();
+        const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets();
+
+        ColumnPtr col_res;
+        if (input_rows_count == 0)
+            return ColumnArray::create(ColumnFloat64::create());
+
+
+        Float64 min_percentile = 0.25; /// default 25th percentile
+        Float64 max_percentile = 0.75; /// default 75th percentile
+        Float64 k = 1.50;
+
+        if (arguments.size() > 1)
+        {
+            Float64 p_min = arguments[1].column->getFloat64(0);
+            if (isnan(p_min) || !isFinite(p_min) || p_min < min_quartile|| p_min > max_quartile)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName());
+
+            min_percentile = p_min / 100;
+
+            Float64 p_max = arguments[2].column->getFloat64(0);
+            if (isnan(p_max) || !isFinite(p_max) || p_max < min_quartile || p_max > max_quartile || p_max < min_percentile * 100)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
+
+            max_percentile = p_max / 100;
+
+            auto k_val = arguments[3].column->getFloat64(0);
+            if (k_val < 0.0 || isnan(k_val) || !isFinite(k_val))
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName());
+
+            k = k_val;
+        }
+
+        if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<UInt64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int8>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int16>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Int64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Float32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
+            || executeNumber<Float64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res))
+        {
+            return col_res;
+        }
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}",
+                arguments[0].column->getName(),
+                getName());
+    }
+
+private:
+    template <typename T>
+    bool executeNumber(
+        const IColumn & arr_data,
+        const ColumnArray::Offsets & arr_offsets,
+        Float64 min_percentile,
+        Float64 max_percentile,
+        Float64 k,
+        ColumnPtr & res_ptr) const
+    {
+        const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&arr_data);
+        if (!src_data_concrete)
+            return false;
+
+        const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
+
+        auto outliers = ColumnFloat64::create();
+        auto & outlier_data = outliers->getData();
+
+        ColumnArray::ColumnOffsets::MutablePtr res_offsets = ColumnArray::ColumnOffsets::create();
+        auto & res_offsets_data = res_offsets->getData();
+
+        std::vector<Float64> src_sorted;
+
+        ColumnArray::Offset prev_src_offset = 0;
+        for (auto src_offset : arr_offsets)
+        {
+            chassert(prev_src_offset <= src_offset);
+            size_t len = src_offset - prev_src_offset;
+            if (len < 4)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
+
+            src_sorted.assign(src_vec.begin() + prev_src_offset, src_vec.begin() + src_offset);
+            std::sort(src_sorted.begin(), src_sorted.end());
+
+            Float64 q1, q2;
+
+            Float64 p1 = len * min_percentile;
+            if (p1 == static_cast<Int64>(p1))
+            {
+                size_t index = static_cast<size_t>(p1) - 1;
+                q1 = (src_sorted[index] + src_sorted[index + 1]) / 2;
+            }
+            else
+            {
+                size_t index = static_cast<size_t>(std::ceil(p1)) - 1;
+                q1 = src_sorted[index];
+            }
+
+            Float64 p2 = len * max_percentile;
+            if (p2 == static_cast<Int64>(p2))
+            {
+                size_t index = static_cast<size_t>(p2) - 1;
+                q2 = (src_sorted[index] + src_sorted[index + 1]) / 2;
+            }
+            else
+            {
+                size_t index = static_cast<size_t>(std::ceil(p2)) - 1;
+                q2 = src_sorted[index];
+            }
+
+            Float64 iqr = q2 - q1; /// interquantile range
+
+            Float64 lower_fence = q1 - k * iqr;
+            Float64 upper_fence = q2 + k * iqr;
+
+            for (ColumnArray::Offset j = prev_src_offset; j < src_offset; ++j)
+            {
+                auto score = std::min((src_vec[j] - lower_fence), 0.0) + std::max((src_vec[j] - upper_fence), 0.0);
+                outlier_data.push_back(score);
+            }
+            res_offsets_data.push_back(outlier_data.size());
+            prev_src_offset = src_offset;
+        }
+
+        res_ptr = ColumnArray::create(std::move(outliers), std::move(res_offsets));
+        return true;
+    }
+};
+
+REGISTER_FUNCTION(SeriesOutliersDetectTukey)
+{
+    factory.registerFunction<FunctionSeriesOutliersDetectTukey>(FunctionDocumentation{
+        .description = R"(
+Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
+
+**Syntax**
+
+``` sql
+seriesOutliersDetectTukey(series);
+seriesOutliersDetectTukey(series, min_percentile, max_percentile, k);
+```
+
+**Arguments**
+
+- `series` - An array of numeric values.
+- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
+- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
+- `k` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
+
+At least four data points are required in `series` to detect outliers.
+
+**Returned value**
+
+- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
+
+Type: [Array](../../sql-reference/data-types/array.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
+```
+
+Result:
+
+``` text
+┌───────────print_0─────────────────┐
+│[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] │
+└───────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
+```
+
+Result:
+
+``` text
+┌─print_0──────────────────────────────┐
+│ [0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0] │
+└──────────────────────────────────────┘
+```)",
+        .categories{"Time series analysis"}});
+}
+}
--- a/src/IO/SharedThreadPools.cpp
+++ b/src/IO/SharedThreadPools.cpp
@ -20,6 +20,9 @@ namespace CurrentMetrics
    extern const Metric MergeTreeOutdatedPartsLoaderThreads;
    extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive;
    extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled;
+    extern const Metric DatabaseReplicatedCreateTablesThreads;
+    extern const Metric DatabaseReplicatedCreateTablesThreadsActive;
+    extern const Metric DatabaseReplicatedCreateTablesThreadsScheduled;
 }

 namespace DB
@ -148,4 +151,10 @@ StaticThreadPool & getOutdatedPartsLoadingThreadPool()
    return instance;
 }

+StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool()
+{
+    static StaticThreadPool instance("CreateTablesThreadPool", CurrentMetrics::DatabaseReplicatedCreateTablesThreads, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsActive, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsScheduled);
+    return instance;
+}
+
 }
--- a/src/IO/SharedThreadPools.h
+++ b/src/IO/SharedThreadPools.h
@ -64,4 +64,7 @@ StaticThreadPool & getPartsCleaningThreadPool();
 /// the number of threads by calling enableTurboMode() :-)
 StaticThreadPool & getOutdatedPartsLoadingThreadPool();

+/// ThreadPool used for creating tables in DatabaseReplicated.
+StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool();
+
 }
--- a/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp
+++ b/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp
@ -100,8 +100,11 @@ void RewriteSumFunctionWithSumAndCountMatcher::visit(const ASTFunction & functio
        if (!new_ast)
            return;
        else
+        {
+            new_ast->setAlias(ast->tryGetAlias());
            ast = new_ast;
        }
+    }
    else if (column_id == 1)
    {
        const auto new_ast = makeASTFunction(func_plus_minus->name,
@ -116,8 +119,11 @@ void RewriteSumFunctionWithSumAndCountMatcher::visit(const ASTFunction & functio
        if (!new_ast)
            return;
        else
+        {
+            new_ast->setAlias(ast->tryGetAlias());
            ast = new_ast;
        }
    }
+}

 }
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@ -211,16 +211,17 @@ std::shared_ptr<TSystemLog> createSystemLog(
            if (!settings.empty())
                log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings;
        }
-
-        /// Add comment to AST. So it will be saved when the table will be renamed.
-        log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment));
    }

    /// Validate engine definition syntax to prevent some configuration errors.
    ParserStorageWithComment storage_parser;
-
-    parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(),
+    auto storage_ast = parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(),
            "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+    auto & storage_with_comment = storage_ast->as<StorageWithComment &>();
+
+    /// Add comment to AST. So it will be saved when the table will be renamed.
+    if (!storage_with_comment.comment || storage_with_comment.comment->as<ASTLiteral &>().value.safeGet<String>().empty())
+        log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment));

    log_settings.queue_settings.flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds",
                                                                               TSystemLog::getDefaultFlushIntervalMilliseconds());
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@ -104,6 +104,16 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
        {
            settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE " << remove_property;
        }
+        else if (settings_changes)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " MODIFY SETTING " << (settings.hilite ? hilite_none : "");
+            settings_changes->formatImpl(settings, state, frame);
+        }
+        else if (settings_resets)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " RESET SETTING " << (settings.hilite ? hilite_none : "");
+            settings_resets->formatImpl(settings, state, frame);
+        }
        else
        {
            if (first)
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@ -130,6 +130,11 @@ namespace DB
                reinterpret_cast<const uint8_t *>(internal_data.data() + start),
                end - start,
                reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        else if constexpr (std::is_same_v<NumericType, Int8>)
+            status = builder.AppendValues(
+                reinterpret_cast<const int8_t *>(internal_data.data() + start),
+                end - start,
+                reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
        else
            status = builder.AppendValues(internal_data.data() + start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
        checkStatus(status, write_column->getName(), format_name);
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@ -603,6 +603,8 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
            memcpy(buf, istr.position(), bytes_to_copy);
            buf[bytes_to_copy] = 0;

+            const bool hex_like = bytes_to_copy >= 2 && buf[0] == '0' && (buf[1] == 'x' || buf[1] == 'X');
+
            char * pos_double = buf;
            errno = 0;
            Float64 float_value = std::strtod(buf, &pos_double);
@ -614,13 +616,13 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(

            char * pos_integer = buf;
            errno = 0;
-            UInt64 uint_value = std::strtoull(buf, &pos_integer, 0);
+            UInt64 uint_value = std::strtoull(buf, &pos_integer, hex_like ? 16 : 10);
            if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
            {
                istr.position() += pos_integer - buf;
                if (negative && type_info.main_type == Type::Int64)
                    number = static_cast<Int64>(-uint_value);
-                else if (!negative && type_info.main_type == Type::UInt64)
+                else if (type_info.main_type == Type::UInt64 && (!negative || uint_value == 0))
                    number = uint_value;
                else
                    return false;
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@ -247,30 +247,6 @@ Chain buildPushingToViewsChain(
        {
            insert_context->setSetting("insert_deduplicate", Field{false});
        }
-        else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
-            !insert_settings.insert_deduplication_token.value.empty())
-        {
-            /** Update deduplication token passed to dependent MV with current table id. So it is possible to properly handle
-              * deduplication in complex INSERT flows.
-              *
-              * Example:
-              *
-              * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
-              *          |                                     |
-              *          └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
-              *
-              * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
-              * be inserted into `ds_2_1`.
-              */
-            auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
-
-            if (table_id.hasUUID())
-                insert_deduplication_token += "_" + toString(table_id.uuid);
-            else
-                insert_deduplication_token += "_" + table_id.getFullNameNotQuoted();
-
-            insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
-        }

        // Processing of blocks for MVs is done block by block, and there will
        // be no parallel reading after (plus it is not a costless operation)
@ -327,6 +303,46 @@ Chain buildPushingToViewsChain(
        auto & target_name = runtime_stats->target_name;
        auto * view_counter_ms = &runtime_stats->elapsed_ms;

+        const auto & insert_settings = insert_context->getSettingsRef();
+        ContextMutablePtr view_insert_context = insert_context;
+
+        if (!disable_deduplication_for_children &&
+            insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
+            !insert_settings.insert_deduplication_token.value.empty())
+        {
+            /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle
+              * deduplication in complex INSERT flows.
+              *
+              * Example:
+              *
+              * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
+              *          |                                     |
+              *          └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
+              *
+              * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
+              * be inserted into `ds_2_1`.
+              *
+              * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables
+              * are involved.
+              *
+              * Example:
+              *
+              * landing -┬--> mv_1_1 --┬-> ds_1_1
+              *          |             |
+              *          └--> mv_1_2 --┘
+              *
+              */
+            auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
+
+            if (view_id.hasUUID())
+                insert_deduplication_token += "_" + toString(view_id.uuid);
+            else
+                insert_deduplication_token += "_" + view_id.getFullNameNotQuoted();
+
+            view_insert_context = Context::createCopy(insert_context);
+            view_insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
+        }
+
        if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(view.get()))
        {
            auto lock = materialized_view->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
@ -394,7 +410,7 @@ Chain buildPushingToViewsChain(
                    insert_columns.emplace_back(column.name);
            }

-            InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false);
+            InterpreterInsertQuery interpreter(nullptr, view_insert_context, false, false, false);
            out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms);
            out.addStorageHolder(view);
            out.addStorageHolder(inner_table);
@ -404,7 +420,7 @@ Chain buildPushingToViewsChain(
            runtime_stats->type = QueryViewsLogElement::ViewType::LIVE;
            query = live_view->getInnerQuery(); // Used only to log in system.query_views_log
            out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                view, view_metadata_snapshot, view_insert_context, ASTPtr(),
                /* no_destination= */ true,
                thread_status_holder, running_group, view_counter_ms, async_insert, storage_header);
        }
@ -413,13 +429,13 @@ Chain buildPushingToViewsChain(
            runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW;
            query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log
            out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                view, view_metadata_snapshot, view_insert_context, ASTPtr(),
                /* no_destination= */ true,
                thread_status_holder, running_group, view_counter_ms, async_insert);
        }
        else
            out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                view, view_metadata_snapshot, view_insert_context, ASTPtr(),
                /* no_destination= */ false,
                thread_status_holder, running_group, view_counter_ms, async_insert);

--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@ -419,7 +419,11 @@ namespace
        void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override
        {
            if (!query_info.has_value())
+            {
                callback(false);
+                return;
+            }
+
            query_info_ = std::move(query_info).value();
            query_info.reset();
            callback(true);
@ -486,7 +490,11 @@ namespace
        void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override
        {
            if (!query_info.has_value())
+            {
                callback(false);
+                return;
+            }
+
            query_info_ = std::move(query_info).value();
            query_info.reset();
            callback(true);
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@ -1,6 +1,14 @@
 #include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
 #include <Storages/MergeTree/MergeTreeIndexInverted.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
 #include <Common/MemoryTrackerBlockerInThread.h>
+#include <Common/logger_useful.h>
+
+namespace ProfileEvents
+{
+extern const Event MergeTreeDataWriterSkipIndicesCalculationMicroseconds;
+extern const Event MergeTreeDataWriterStatisticsCalculationMicroseconds;
+}

 namespace DB
 {
@ -148,6 +156,8 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
    , default_codec(default_codec_)
    , compute_granularity(index_granularity.empty())
    , compress_primary_key(settings.compress_primary_key)
+    , execution_stats(skip_indices.size(), stats.size())
+    , log(getLogger(storage.getLogName() + " (DataPartWriter)"))
 {
    if (settings.blocks_are_granules_size && !index_granularity.empty())
        throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -329,9 +339,12 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc

 void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block)
 {
-    for (const auto & stat_ptr : stats)
+    for (size_t i = 0; i < stats.size(); ++i)
    {
+        const auto & stat_ptr = stats[i];
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterStatisticsCalculationMicroseconds);
        stat_ptr->update(block.getByName(stat_ptr->columnName()).column);
+        execution_stats.statistics_build_us[i] += watch.elapsed();
    }
 }

@ -378,10 +391,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
                    writeBinaryLittleEndian(1UL, marks_out);
            }

+            ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSkipIndicesCalculationMicroseconds);
+
            size_t pos = granule.start_row;
            skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.rows_to_write);
            if (granule.is_complete)
                ++skip_index_accumulated_marks[i];
+
+            execution_stats.skip_indices_build_us[i] += watch.elapsed();
        }
    }
 }
@ -481,6 +498,9 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync)
        if (sync)
            stream->sync();
    }
+
+    for (size_t i = 0; i < stats.size(); ++i)
+        LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part->name);
 }

 void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums)
@ -504,6 +524,10 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync)
    }
    for (auto & store: gin_index_stores)
        store.second->finalize();
+
+    for (size_t i = 0; i < skip_indices.size(); ++i)
+        LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part->name);
+
    gin_index_stores.clear();
    skip_indices_streams.clear();
    skip_indices_aggregators.clear();
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@ -190,6 +190,20 @@ private:
    void initStatistics();

    virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
+
+    struct ExecutionStatistics
+    {
+        ExecutionStatistics(size_t skip_indices_cnt, size_t stats_cnt)
+            : skip_indices_build_us(skip_indices_cnt, 0), statistics_build_us(stats_cnt, 0)
+        {
+        }
+
+        std::vector<size_t> skip_indices_build_us; // [i] corresponds to the i-th index
+        std::vector<size_t> statistics_build_us; // [i] corresponds to the i-th stat
+    };
+    ExecutionStatistics execution_stats;
+
+    LoggerPtr log;
 };

 }
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@ -1,21 +1,22 @@
-#include <Storages/MergeTree/MergeTreeDataWriter.h>
-#include <Storages/MergeTree/MergedBlockOutputStream.h>
-#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Columns/ColumnConst.h>
-#include <Common/OpenTelemetryTraceContext.h>
-#include <Common/HashTable/HashMap.h>
-#include <Common/Exception.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/ObjectUtils.h>
 #include <Disks/createVolume.h>
+#include <IO/HashingWriteBuffer.h>
+#include <IO/WriteHelpers.h>
 #include <Interpreters/AggregationCommon.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/MergeTreeTransaction.h>
-#include <IO/HashingWriteBuffer.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/ObjectUtils.h>
-#include <IO/WriteHelpers.h>
-#include <Common/typeid_cast.h>
 #include <Processors/TTL/ITTLAlgorithm.h>
+#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
+#include <Storages/MergeTree/MergeTreeDataWriter.h>
+#include <Storages/MergeTree/MergedBlockOutputStream.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/Exception.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/OpenTelemetryTraceContext.h>
+#include <Common/typeid_cast.h>

 #include <Parsers/queryToString.h>

@ -35,11 +36,16 @@ namespace ProfileEvents
    extern const Event MergeTreeDataWriterRows;
    extern const Event MergeTreeDataWriterUncompressedBytes;
    extern const Event MergeTreeDataWriterCompressedBytes;
+    extern const Event MergeTreeDataWriterSortingBlocksMicroseconds;
+    extern const Event MergeTreeDataWriterMergingBlocksMicroseconds;
+    extern const Event MergeTreeDataWriterProjectionsCalculationMicroseconds;
    extern const Event MergeTreeDataProjectionWriterBlocks;
    extern const Event MergeTreeDataProjectionWriterBlocksAlreadySorted;
    extern const Event MergeTreeDataProjectionWriterRows;
    extern const Event MergeTreeDataProjectionWriterUncompressedBytes;
    extern const Event MergeTreeDataProjectionWriterCompressedBytes;
+    extern const Event MergeTreeDataProjectionWriterSortingBlocksMicroseconds;
+    extern const Event MergeTreeDataProjectionWriterMergingBlocksMicroseconds;
    extern const Event RejectedInserts;
 }

@ -472,6 +478,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
    IColumn::Permutation perm;
    if (!sort_description.empty())
    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSortingBlocksMicroseconds);
+
        if (!isAlreadySorted(block, sort_description))
        {
            stableGetPermutation(block, sort_description, perm);
@ -483,7 +491,10 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(

    Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names;
    if (context->getSettingsRef().optimize_on_insert)
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterMergingBlocksMicroseconds);
        block = mergeBlock(block, sort_description, partition_key_columns, perm_ptr, data.merging_params);
+    }

    /// Size of part would not be greater than block.bytes() + epsilon
    size_t expected_size = block.bytes();
@ -588,7 +599,13 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(

    for (const auto & projection : metadata_snapshot->getProjections())
    {
-        auto projection_block = projection.calculate(block, context);
+        Block projection_block;
+        {
+            ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterProjectionsCalculationMicroseconds);
+            projection_block = projection.calculate(block, context);
+            LOG_DEBUG(log, "Spent {} ms calculating projection {} for the part {}", watch.elapsed() / 1000, projection.name, new_data_part->name);
+        }
+
        if (projection_block.rows())
        {
            auto proj_temp_part = writeProjectionPart(data, log, projection_block, projection, new_data_part.get());
@ -685,6 +702,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
    IColumn::Permutation perm;
    if (!sort_description.empty())
    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterSortingBlocksMicroseconds);
+
        if (!isAlreadySorted(block, sort_description))
        {
            stableGetPermutation(block, sort_description, perm);
@ -696,6 +715,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(

    if (projection.type == ProjectionDescription::Type::Aggregate)
    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterMergingBlocksMicroseconds);
+
        MergeTreeData::MergingParams projection_merging_params;
        projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
        block = mergeBlock(block, sort_description, {}, perm_ptr, projection_merging_params);
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@ -28,6 +28,11 @@
 #include <Common/ProfileEventsScope.h>


+namespace ProfileEvents
+{
+extern const Event MutateTaskProjectionsCalculationMicroseconds;
+}
+
 namespace CurrentMetrics
 {
    extern const Metric PartMutation;
@ -1242,7 +1247,13 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
        for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i)
        {
            const auto & projection = *ctx->projections_to_build[i];
-            auto projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context));
+
+            Block projection_block;
+            {
+                ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds);
+                projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context));
+            }
+
            if (projection_block)
            {
                auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart(
--- a/src/Storages/NATS/NATSConnection.cpp
+++ b/src/Storages/NATS/NATSConnection.cpp
@ -91,6 +91,8 @@ void NATSConnectionManager::connectImpl()
        natsOptions_SetUserInfo(options, configuration.username.c_str(), configuration.password.c_str());
    if (!configuration.token.empty())
        natsOptions_SetToken(options, configuration.token.c_str());
+    if (!configuration.credential_file.empty())
+        natsOptions_SetUserCredentialsFromFiles(options, configuration.credential_file.c_str(), nullptr);

    if (configuration.secure)
    {
--- a/src/Storages/NATS/NATSConnection.h
+++ b/src/Storages/NATS/NATSConnection.h
@ -14,6 +14,7 @@ struct NATSConfiguration
    String username;
    String password;
    String token;
+    String credential_file;

    int max_reconnect;
    int reconnect_wait;
--- a/src/Storages/NATS/NATSSettings.h
+++ b/src/Storages/NATS/NATSSettings.h
@ -25,6 +25,7 @@ class ASTStorage;
    M(String, nats_username, "", "NATS username", 0) \
    M(String, nats_password, "", "NATS password", 0) \
    M(String, nats_token, "", "NATS token", 0) \
+    M(String, nats_credential_file, "", "Path to a NATS credentials file", 0) \
    M(UInt64, nats_startup_connect_tries, 5, "Number of connect tries at startup", 0) \
    M(UInt64, nats_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \
    M(StreamingHandleErrorMode, nats_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for NATS engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \
--- a/src/Storages/NATS/StorageNATS.cpp
+++ b/src/Storages/NATS/StorageNATS.cpp
@ -67,6 +67,7 @@ StorageNATS::StorageNATS(
    auto nats_username = getContext()->getMacros()->expand(nats_settings->nats_username);
    auto nats_password = getContext()->getMacros()->expand(nats_settings->nats_password);
    auto nats_token = getContext()->getMacros()->expand(nats_settings->nats_token);
+    auto nats_credential_file = getContext()->getMacros()->expand(nats_settings->nats_credential_file);

    configuration =
    {
@ -75,6 +76,7 @@ StorageNATS::StorageNATS(
        .username = nats_username.empty() ? getContext()->getConfigRef().getString("nats.user", "") : nats_username,
        .password = nats_password.empty() ? getContext()->getConfigRef().getString("nats.password", "") : nats_password,
        .token = nats_token.empty() ? getContext()->getConfigRef().getString("nats.token", "") : nats_token,
+        .credential_file = nats_credential_file.empty() ? getContext()->getConfigRef().getString("nats.credential_file", "") : nats_credential_file,
        .max_reconnect = static_cast<int>(nats_settings->nats_max_reconnect.value),
        .reconnect_wait = static_cast<int>(nats_settings->nats_reconnect_wait.value),
        .secure = nats_settings->nats_secure.value
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@ -313,7 +313,7 @@ void MaterializedPostgreSQLConsumer::readTupleData(
                Int32 col_len = readInt32(message, pos, size);
                String value;
                for (Int32 i = 0; i < col_len; ++i)
-                    value += readInt8(message, pos, size);
+                    value += static_cast<char>(readInt8(message, pos, size));

                insertValue(storage_data, value, column_idx);
                break;
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@ -1533,10 +1533,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
 IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)
 {
    const auto & slot_to_shard = cluster->getSlotToShard();
-
    const IColumn * column = result.column.get();
-    if (const auto * col_const = typeid_cast<const ColumnConst *>(column))
-        column = &col_const->getDataColumn();

 // If result.type is DataTypeLowCardinality, do shard according to its dictionaryType
 #define CREATE_FOR_TYPE(TYPE)                                                                                       \
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@ -180,7 +180,7 @@ using Paths = std::deque<std::pair<String, ZkPathType>>;
 class ReadFromSystemZooKeeper final : public SourceStepWithFilter
 {
 public:
-    ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, ContextPtr context_);
+    ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, UInt64 max_block_size_, ContextPtr context_);

    String getName() const override { return "ReadFromSystemZooKeeper"; }

@ -189,13 +189,41 @@ public:
    void applyFilters() override;

 private:
-    void fillData(MutableColumns & res_columns);
-
    std::shared_ptr<const StorageLimitsList> storage_limits;
+    const UInt64 max_block_size;
    ContextPtr context;
    Paths paths;
 };

+
+class SystemZooKeeperSource : public ISource
+{
+public:
+    SystemZooKeeperSource(
+        Paths && paths_,
+        Block header_,
+        UInt64 max_block_size_,
+        ContextPtr context_)
+        : ISource(header_)
+        , max_block_size(max_block_size_)
+        , paths(std::move(paths_))
+        , context(std::move(context_))
+    {
+    }
+
+    String getName() const override { return "SystemZooKeeper"; }
+
+protected:
+    Chunk generate() override;
+
+private:
+    const UInt64 max_block_size;
+    Paths paths;
+    ContextPtr context;
+    bool started = false;
+};
+
+
 StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_)
        : IStorage(table_id_)
 {
@ -211,11 +239,11 @@ void StorageSystemZooKeeper::read(
    SelectQueryInfo & query_info,
    ContextPtr context,
    QueryProcessingStage::Enum /*processed_stage*/,
-    size_t /*max_block_size*/,
+    size_t max_block_size,
    size_t /*num_streams*/)
 {
    auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals());
-    auto read_step = std::make_unique<ReadFromSystemZooKeeper>(header, query_info, context);
+    auto read_step = std::make_unique<ReadFromSystemZooKeeper>(header, query_info, max_block_size, context);
    query_plan.addStep(std::move(read_step));
 }

@ -414,7 +442,7 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont
    for (const auto * node : filter_nodes)
        extractPathImpl(*node, res, context, allow_unrestricted);

-    if (filter_nodes.empty() && allow_unrestricted)
+    if (res.empty() && allow_unrestricted)
        res.emplace_back("/", ZkPathType::Recurse);

    return res;
@ -426,8 +454,26 @@ void ReadFromSystemZooKeeper::applyFilters()
    paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
 }

-void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
+
+Chunk SystemZooKeeperSource::generate()
 {
+    if (paths.empty())
+    {
+        if (!started)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "SELECT from system.zookeeper table must contain condition like path = 'path' "
+                        "or path IN ('path1','path2'...) or path IN (subquery) "
+                        "in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.");
+
+        /// No more work
+        return {};
+    }
+
+    started = true;
+
+    MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
+    size_t row_count = 0;
+
    QueryStatusPtr query_status = context->getProcessListElement();

    const auto & settings = context->getSettingsRef();
@ -453,12 +499,6 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
        return zookeeper;
    };

-    if (paths.empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "SELECT from system.zookeeper table must contain condition like path = 'path' "
-                        "or path IN ('path1','path2'...) or path IN (subquery) "
-                        "in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.");
-
    const Int64 max_inflight_requests = std::max<Int64>(1, context->getSettingsRef().max_download_threads.value);

    struct ListTask
@ -476,6 +516,16 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
        if (query_status)
            query_status->checkTimeLimit();

+        /// Check if the block is big enough already
+        if (max_block_size > 0 && row_count > 0)
+        {
+            size_t total_size = 0;
+            for (const auto & column : res_columns)
+                total_size += column->byteSize();
+            if (total_size > max_block_size)
+                break;
+        }
+
        list_tasks.clear();
        std::vector<String> paths_to_list;
        while (!paths.empty() && static_cast<Int64>(list_tasks.size()) < max_inflight_requests)
@ -519,8 +569,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
                continue;

            auto & task = list_tasks[list_task_idx];
-            if (auto elem = context->getProcessListElement())
-                elem->checkTimeLimit();
+            if (query_status)
+                query_status->checkTimeLimit();

            Strings nodes = std::move(list_result.names);

@ -557,8 +607,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)

            auto & get_task = get_tasks[i];
            auto & list_task = list_tasks[get_task.list_task_idx];
-            if (auto elem = context->getProcessListElement())
-                elem->checkTimeLimit();
+            if (query_status)
+                query_status->checkTimeLimit();

            // Deduplication
            String key = list_task.path_part + '/' + get_task.node;
@ -584,17 +634,22 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
            res_columns[col_num++]->insert(
                list_task.path); /// This is the original path. In order to process the request, condition in WHERE should be triggered.

+            ++row_count;
+
            if (list_task.path_type != ZkPathType::Exact && res.stat.numChildren > 0)
            {
                paths.emplace_back(key, ZkPathType::Recurse);
            }
        }
    }
+
+    return Chunk(std::move(res_columns), row_count);
 }

-ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, ContextPtr context_)
+ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, UInt64 max_block_size_, ContextPtr context_)
    : SourceStepWithFilter({.header = header})
    , storage_limits(query_info.storage_limits)
+    , max_block_size(max_block_size_)
    , context(std::move(context_))
 {
 }
@ -602,13 +657,7 @@ ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQue
 void ReadFromSystemZooKeeper::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
    const auto & header = getOutputStream().header;
-    MutableColumns res_columns = header.cloneEmptyColumns();
-    fillData(res_columns);
-
-    UInt64 num_rows = res_columns.at(0)->size();
-    Chunk chunk(std::move(res_columns), num_rows);
-
-    auto source = std::make_shared<SourceFromSingleChunk>(header, std::move(chunk));
+    auto source = std::make_shared<SystemZooKeeperSource>(std::move(paths), header, max_block_size, context);
    source->setStorageLimits(storage_limits);
    processors.emplace_back(source);
    pipeline.init(Pipe(std::move(source)));
--- a/src/Storages/System/attachSystemTablesImpl.h
+++ b/src/Storages/System/attachSystemTablesImpl.h
@ -7,14 +7,20 @@
 namespace DB
 {

-template<typename StorageT, typename... StorageArgs>
-void attach(ContextPtr context, IDatabase & system_database, const String & table_name, const String & comment, StorageArgs && ... args)
+template <int Length>
+using StringLiteral = const char(&)[Length];
+
+template<typename StorageT, int CommentSize, typename... StorageArgs>
+void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral<CommentSize> comment, StorageArgs && ... args)
 {
+    static_assert(CommentSize > 15, "The comment for a system table is too short or empty");
    assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE);
+
+    auto table_id = StorageID::createEmpty();
    if (system_database.getUUID() == UUIDHelpers::Nil)
    {
        /// Attach to Ordinary database.
-        auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name);
+        table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name);
        system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...));
    }
    else
@ -22,10 +28,11 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl
        /// Attach to Atomic database.
        /// NOTE: UUIDs are not persistent, but it's ok since no data are stored on disk for these storages
        /// and path is actually not used
-        auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4());
+        table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4());
        DatabaseCatalog::instance().addUUIDMapping(table_id.uuid);
        String path = "store/" + DatabaseCatalog::getPathForUUID(table_id.uuid);
        system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...), path);
+    }

    /// Set the comment
    auto table = DatabaseCatalog::instance().getTable(table_id, context);
@ -34,6 +41,5 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl
    metadata.comment = comment;
    table->setInMemoryMetadata(metadata);
 }
-}

 }
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@ -10,7 +10,6 @@ test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_ove
 test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
 test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
-test_profile_events_s3/test.py::test_profile_events
 test_replicating_constants/test.py::test_different_versions
 test_select_access_rights/test_main.py::test_alias_columns
 test_select_access_rights/test_main.py::test_select_count
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@ -23,7 +23,6 @@
 02428_parameterized_view
 02493_inconsistent_hex_and_binary_number
 02575_merge_prewhere_different_default_kind
-00917_multiple_joins_denny_crane
 02725_agg_projection_resprect_PK
 02763_row_policy_storage_merge_alias
 02818_parameterized_view_with_cte_multiple_usage
--- a/tests/ci/artifacts_helper.py
+++ b/tests/ci/artifacts_helper.py
@ -10,14 +10,17 @@ from pathlib import Path
 from shutil import copy2
 from typing import List, Optional, Union

+# isort: off
 from github.Commit import Commit

+# isort: on
+
 from build_download_helper import download_build_with_progress
 from commit_status_helper import post_commit_status
 from compress_files import SUFFIX, compress_fast, decompress_fast
 from env_helper import CI, RUNNER_TEMP, S3_BUILDS_BUCKET
 from git_helper import SHA_REGEXP
-from report import HEAD_HTML_TEMPLATE, FOOTER_HTML_TEMPLATE
+from report import FOOTER_HTML_TEMPLATE, HEAD_HTML_TEMPLATE, SUCCESS
 from s3_helper import S3Helper

 ARTIFACTS_PATH = Path(RUNNER_TEMP) / "artifacts"
@ -128,9 +131,7 @@ class ArtifactsHelper:

    @staticmethod
    def post_commit_status(commit: Commit, url: str) -> None:
-        post_commit_status(
-            commit, "success", url, "Artifacts for workflow", "Artifacts"
-        )
+        post_commit_status(commit, SUCCESS, url, "Artifacts for workflow", "Artifacts")

    def _regenerate_index(self) -> None:
        if CI:
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@ -7,13 +7,11 @@ import sys
 from pathlib import Path

 from build_download_helper import get_build_name_for_check, read_build_urls
-from clickhouse_helper import (
-    CiLogsCredentials,
-)
+from clickhouse_helper import CiLogsCredentials
 from docker_images_helper import DockerImage, get_docker_image, pull_image
 from env_helper import REPORT_PATH, TEMP_PATH
 from pr_info import PRInfo
-from report import JobReport
+from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult
 from stopwatch import Stopwatch
 from tee_popen import TeePopen

@ -113,7 +111,6 @@ def main():
    paths = {
        "run.log": run_log_path,
        "main.log": main_log_path,
-        "fuzzer.log": workspace_path / "fuzzer.log",
        "report.html": workspace_path / "report.html",
        "core.zst": workspace_path / "core.zst",
        "dmesg.log": workspace_path / "dmesg.log",
@ -122,13 +119,21 @@ def main():
    compressed_server_log_path = workspace_path / "server.log.zst"
    if compressed_server_log_path.exists():
        paths["server.log.zst"] = compressed_server_log_path
-
+    else:
        # The script can fail before the invocation of `zstd`, but we are still interested in its log:
-
        not_compressed_server_log_path = workspace_path / "server.log"
        if not_compressed_server_log_path.exists():
            paths["server.log"] = not_compressed_server_log_path

+    # Same idea but with the fuzzer log
+    compressed_fuzzer_log_path = workspace_path / "fuzzer.log.zst"
+    if compressed_fuzzer_log_path.exists():
+        paths["fuzzer.log.zst"] = compressed_fuzzer_log_path
+    else:
+        not_compressed_fuzzer_log_path = workspace_path / "fuzzer.log"
+        if not_compressed_fuzzer_log_path.exists():
+            paths["fuzzer.log"] = not_compressed_fuzzer_log_path
+
    # Try to get status message saved by the fuzzer
    try:
        with open(workspace_path / "status.txt", "r", encoding="utf-8") as status_f:
@ -137,12 +142,16 @@ def main():
        with open(workspace_path / "description.txt", "r", encoding="utf-8") as desc_f:
            description = desc_f.readline().rstrip("\n")
    except:
-        status = "failure"
+        status = FAILURE
        description = "Task failed: $?=" + str(retcode)

+    test_result = TestResult(description, OK)
+    if "fail" in status:
+        test_result.status = FAIL
+
    JobReport(
        description=description,
-        test_results=[],
+        test_results=[test_result],
        status=status,
        start_time=stopwatch.start_time_str,
        duration=stopwatch.duration_seconds,
@ -151,7 +160,7 @@ def main():
    ).dump()

    logging.info("Result: '%s', '%s'", status, description)
-    if status == "failure":
+    if status != SUCCESS:
        sys.exit(1)


--- a/Show More
+++ b/Show More