Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix_join_to_subqueries_null_pointer

2024-11-27 01:51:59 +00:00 · 2022-11-04 10:05:12 +08:00 · 2022-11-04 10:05:12 +08:00 · 9d8840b8a7
commit 9d8840b8a7
parent 1a8568473c 4012fcbf01
382 changed files with 7265 additions and 5083 deletions
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@ -6,7 +6,7 @@ env:

 on: # yamllint disable-line rule:truthy
  workflow_run:
-    workflows: ["PullRequestCI", "ReleaseCI", "DocsCheck", "BackportPR"]
+    workflows: ["PullRequestCI", "ReleaseBranchCI", "DocsCheck", "BackportPR"]
    types:
      - requested
 jobs:
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -122,3 +122,58 @@ jobs:
          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+  SonarCloud:
+    runs-on: [self-hosted, builder]
+    env:
+      SONAR_SCANNER_VERSION: 4.7.0.2747
+      SONAR_SERVER_URL: "https://sonarcloud.io"
+      BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
+      CC: clang-15
+      CXX: clang++-15
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0  # Shallow clones should be disabled for a better relevancy of analysis
+          submodules: true
+      - name: Set up JDK 11
+        uses: actions/setup-java@v1
+        with:
+          java-version: 11
+      - name: Download and set up sonar-scanner
+        env:
+          SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
+        run: |
+          mkdir -p "$HOME/.sonar"
+          curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}"
+          unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/"
+          echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH"
+      - name: Download and set up build-wrapper
+        env:
+          BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
+        run: |
+          curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}"
+          unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/"
+          echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH"
+      - name: Set Up Build Tools
+        run: |
+          sudo apt-get update
+          sudo apt-get install -yq git cmake ccache python3 ninja-build
+          sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
+      - name: Run build-wrapper
+        run: |
+          mkdir build
+          cd build
+          cmake ..
+          cd ..
+          build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
+      - name: Run sonar-scanner
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+        run: |
+          sonar-scanner \
+            --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
+            --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
+            --define sonar.projectKey="ClickHouse_ClickHouse" \
+            --define sonar.organization="clickhouse-java" \
+            --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql"
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -2023,6 +2023,7 @@ jobs:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH"
  TestsBugfixCheck:
+    needs: [CheckLabels, StyleCheck]
    runs-on: [self-hosted, stress-tester]
    steps:
      - name: Set envs
--- a/.snyk
+++ b/.snyk
@ -0,0 +1,4 @@
+# Snyk (https://snyk.io) policy file
+exclude:
+  global:
+    - tests/**
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -31,6 +31,8 @@
 * Add OpenTelemetry support to ON CLUSTER DDL (require `distributed_ddl_entry_format_version` to be set to 4). [#41484](https://github.com/ClickHouse/ClickHouse/pull/41484) ([Frank Chen](https://github.com/FrankChen021)).
 * Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode (with `wait_for_async_insert=0`)) for better introspection. [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) ([Anton Popov](https://github.com/CurtizJ)).
 * Add support for methods `lz4`, `bz2`, `snappy` in HTTP's `Accept-Encoding` which is a non-standard extension to HTTP protocol. [#42071](https://github.com/ClickHouse/ClickHouse/pull/42071) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Adds Morton Coding (ZCurve) encode/decode functions. [#41753](https://github.com/ClickHouse/ClickHouse/pull/41753) ([Constantine Peresypkin](https://github.com/pkit)).
+* Add support for `SET setting_name = DEFAULT`. [#42187](https://github.com/ClickHouse/ClickHouse/pull/42187) ([Filatenkov Artur](https://github.com/FArthur-cmd)).

 #### Experimental Feature
 * Added new infrastructure for query analysis and planning under the `allow_experimental_analyzer` setting. [#31796](https://github.com/ClickHouse/ClickHouse/pull/31796) ([Maksim Kita](https://github.com/kitaisreal)).
@ -66,8 +68,7 @@
 * Allow readable size values (like `1TB`) in cache config. [#41688](https://github.com/ClickHouse/ClickHouse/pull/41688) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * ClickHouse could cache stale DNS entries for some period of time (15 seconds by default) until the cache won't be updated asynchronously. During these periods ClickHouse can nevertheless try to establish a connection and produce errors. This behavior is fixed. [#41707](https://github.com/ClickHouse/ClickHouse/pull/41707) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Add interactive history search with fzf-like utility (fzf/sk) for `clickhouse-client`/`clickhouse-local` (note you can use `FZF_DEFAULT_OPTS`/`SKIM_DEFAULT_OPTIONS` to additionally configure the behavior). [#41730](https://github.com/ClickHouse/ClickHouse/pull/41730) ([Azat Khuzhin](https://github.com/azat)).
-* 
-Only allow clients connecting to a secure server with an invalid certificate only to proceed with the '--accept-certificate' flag. [#41743](https://github.com/ClickHouse/ClickHouse/pull/41743) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Only allow clients connecting to a secure server with an invalid certificate only to proceed with the '--accept-certificate' flag. [#41743](https://github.com/ClickHouse/ClickHouse/pull/41743) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
 * Add function `tryBase58Decode`, similar to the existing function `tryBase64Decode`. [#41824](https://github.com/ClickHouse/ClickHouse/pull/41824) ([Robert Schulze](https://github.com/rschu1ze)).
 * Improve feedback when replacing partition with different primary key. Fixes [#34798](https://github.com/ClickHouse/ClickHouse/issues/34798). [#41838](https://github.com/ClickHouse/ClickHouse/pull/41838) ([Salvatore](https://github.com/tbsal)).
 * Fix parallel parsing: segmentator now checks `max_block_size`. This fixed memory overallocation in case of parallel parsing and small LIMIT. [#41852](https://github.com/ClickHouse/ClickHouse/pull/41852) ([Vitaly Baranov](https://github.com/vitlibar)).
@ -86,6 +87,8 @@ Only allow clients connecting to a secure server with an invalid certificate onl
 * Fix rarely invalid cast of aggregate state types with complex types such as Decimal. This fixes [#42408](https://github.com/ClickHouse/ClickHouse/issues/42408). [#42417](https://github.com/ClickHouse/ClickHouse/pull/42417) ([Amos Bird](https://github.com/amosbird)).
 * Allow to use `Date32` arguments for `dateName` function. [#42554](https://github.com/ClickHouse/ClickHouse/pull/42554) ([Roman Vasin](https://github.com/rvasin)).
 * Now filters with NULL literals will be used during index analysis. [#34063](https://github.com/ClickHouse/ClickHouse/issues/34063). [#41842](https://github.com/ClickHouse/ClickHouse/pull/41842) ([Amos Bird](https://github.com/amosbird)).
+* Merge parts if every part in the range is older than a certain threshold. The threshold can be set by using `min_age_to_force_merge_seconds`. This closes [#35836](https://github.com/ClickHouse/ClickHouse/issues/35836). [#42423](https://github.com/ClickHouse/ClickHouse/pull/42423) ([Antonio Andelic](https://github.com/antonio2368)). This is continuation of [#39550i](https://github.com/ClickHouse/ClickHouse/pull/39550) by [@fastio](https://github.com/fastio) who implemented most of the logic.
+* Improve the time to recover lost keeper connections. [#42541](https://github.com/ClickHouse/ClickHouse/pull/42541) ([Raúl Marín](https://github.com/Algunenano)).

 #### Build/Testing/Packaging Improvement
 * Add fuzzer for table definitions [#40096](https://github.com/ClickHouse/ClickHouse/pull/40096) ([Anton Popov](https://github.com/CurtizJ)). This represents the biggest advancement for ClickHouse testing in this year so far.
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@ ClickHouse® is an open-source column-oriented database management system that a
 ## Useful Links

 * [Official website](https://clickhouse.com/) has a quick high-level overview of ClickHouse on the main page.
-* [ClickHouse Cloud](https://clickhouse.com/cloud) ClickHouse as a service, built by the creators and maintainers.
+* [ClickHouse Cloud](https://clickhouse.cloud) ClickHouse as a service, built by the creators and maintainers.
 * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
 * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
@ -16,5 +16,6 @@ ClickHouse® is an open-source column-oriented database management system that a
 * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.

 ## Upcoming events
-* [**v22.10 Release Webinar**](https://clickhouse.com/company/events/v22-10-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
-* [**Introducing ClickHouse Cloud**](https://clickhouse.com/company/events/cloud-beta) Introducing ClickHouse as a service, built by creators and maintainers of the fastest OLAP database on earth. Join Tanya Bragin for a detailed walkthrough of ClickHouse Cloud capabilities, as well as a peek behind the curtain to understand the unique architecture that makes our service tick. 
+* [**v22.11 Release Webinar**](https://clickhouse.com/company/events/v22-11-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
+* [**ClickHouse Meetup at the Deutsche Bank office in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/289311596/) Hear from Deutsche Bank on why they chose ClickHouse for big sensitive data in a regulated environment. The ClickHouse team will then present how ClickHouse is used for real time financial data analytics, including tick data, trade analytics and risk management.
+* [**AWS re:Invent**](https://clickhouse.com/company/events/aws-reinvent) Core members of the ClickHouse team -- including 2 of our founders -- will be at re:Invent from November 29 to December 3. We are available on the show floor, but are also determining interest in holding an event during the time there. 
--- a/cmake/clang_tidy.cmake
+++ b/cmake/clang_tidy.cmake
@ -3,10 +3,20 @@ option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)

 if (ENABLE_CLANG_TIDY)

-    find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
+    find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache")
+    if (CLANG_TIDY_CACHE_PATH)
+        find_program (_CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
+
+        # Why do we use ';' here?
+        # It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY
+        # The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax.
+        set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper")
+    else ()
+        find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
+    endif ()

    if (CLANG_TIDY_PATH)
-        message(STATUS
+        message (STATUS
            "Using clang-tidy: ${CLANG_TIDY_PATH}.
            The checks will be run during build process.
            See the .clang-tidy file at the root directory to configure the checks.")
@ -15,11 +25,15 @@ if (ENABLE_CLANG_TIDY)

        # clang-tidy requires assertions to guide the analysis
        # Note that NDEBUG is set implicitly by CMake for non-debug builds
-        set(COMPILER_FLAGS "${COMPILER_FLAGS} -UNDEBUG")
+        set (COMPILER_FLAGS "${COMPILER_FLAGS} -UNDEBUG")

-        # The variable CMAKE_CXX_CLANG_TIDY will be set inside src and base directories with non third-party code.
+        # The variable CMAKE_CXX_CLANG_TIDY will be set inside the following directories with non third-party code.
+        # - base
+        # - programs
+        # - src
+        # - utils
        # set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
    else ()
-        message(${RECONFIGURE_MESSAGE_LEVEL} "clang-tidy is not found")
+        message (${RECONFIGURE_MESSAGE_LEVEL} "clang-tidy is not found")
    endif ()
 endif ()
--- a/contrib/cctz
+++ b/contrib/cctz
@ -1 +1 @@
-Subproject commit 7a454c25c7d16053bcd327cdd16329212a08fa4a
+Subproject commit 5c8528fb35e89ee0b3a7157490423fba0d4dd7b5
--- a/contrib/libcxx
+++ b/contrib/libcxx
@ -1 +1 @@
-Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239
+Subproject commit 4db7f838afd3139eb3761694b04d31275df45d2d
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -25,6 +25,7 @@ set(SRCS
 "${LIBCXX_SOURCE_DIR}/src/ios.cpp"
 "${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp"
 "${LIBCXX_SOURCE_DIR}/src/iostream.cpp"
+"${LIBCXX_SOURCE_DIR}/src/legacy_debug_handler.cpp"
 "${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp"
 "${LIBCXX_SOURCE_DIR}/src/locale.cpp"
 "${LIBCXX_SOURCE_DIR}/src/memory.cpp"
@ -49,6 +50,7 @@ set(SRCS
 "${LIBCXX_SOURCE_DIR}/src/valarray.cpp"
 "${LIBCXX_SOURCE_DIR}/src/variant.cpp"
 "${LIBCXX_SOURCE_DIR}/src/vector.cpp"
+"${LIBCXX_SOURCE_DIR}/src/verbose_abort.cpp"
 )

 add_library(cxx ${SRCS})
--- a/contrib/libcxxabi
+++ b/contrib/libcxxabi
@ -1 +1 @@
-Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7
+Subproject commit a736a6b3c6a7b8aae2ebad629ca21b2c55b4820e
--- a/contrib/libcxxabi-cmake/CMakeLists.txt
+++ b/contrib/libcxxabi-cmake/CMakeLists.txt
@ -9,6 +9,7 @@ set(SRCS
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
+# "${LIBCXXABI_SOURCE_DIR}/src/cxa_noexception.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp"
--- a/contrib/llvm-project-cmake/CMakeLists.txt
+++ b/contrib/llvm-project-cmake/CMakeLists.txt
@ -21,6 +21,9 @@ set (LLVM_INCLUDE_DIRS
    "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm/include"
 )
 set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
+# NOTE: You should not remove this line since otherwise it will use default 20,
+# and llvm cannot be compiled with bundled libcxx and 20 standard.
+set (CMAKE_CXX_STANDARD 14)

 # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
 set (REQUIRED_LLVM_LIBRARIES
--- a/contrib/rocksdb
+++ b/contrib/rocksdb
@ -1 +1 @@
-Subproject commit e7c2b2f7bcf3b4b33892a1a6d25c32a93edfbdb9
+Subproject commit 2c8998e26c6d46b27c710d7829c3a15e34959f70
--- a/contrib/zlib-ng
+++ b/contrib/zlib-ng
@ -1 +1 @@
-Subproject commit bffad6f6fe74d6a2f92e2668390664a926c68733
+Subproject commit 50f0eae1a411764cd6d1e85b3ce471438acd3c1c
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -91,6 +91,9 @@ ENV PATH="$PATH:/usr/local/go/bin"
 ENV GOPATH=/workdir/go
 ENV GOCACHE=/workdir/

+RUN curl https://raw.githubusercontent.com/matus-chochlik/ctcache/7fd516e91c17779cbc6fc18bd119313d9532dd90/clang-tidy-cache -Lo /usr/bin/clang-tidy-cache \
+  && chmod +x /usr/bin/clang-tidy-cache
+
 RUN mkdir /workdir && chmod 777 /workdir
 WORKDIR /workdir

--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -258,6 +258,10 @@ def parse_env_variables(
        if clang_tidy:
            # 15G is not enough for tidy build
            cache_maxsize = "25G"
+
+            # `CTCACHE_DIR` has the same purpose as the `CCACHE_DIR` above.
+            # It's there to have the clang-tidy cache embedded into our standard `CCACHE_DIR`
+            result.append("CTCACHE_DIR=/ccache/clang-tidy-cache")
        result.append(f"CCACHE_MAXSIZE={cache_maxsize}")

    if distcc_hosts:
@ -282,9 +286,7 @@ def parse_env_variables(
        cmake_flags.append("-DENABLE_TESTS=1")

    if shared_libraries:
-        cmake_flags.append(
-            "-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1"
-        )
+        cmake_flags.append("-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1")
        # We can't always build utils because it requires too much space, but
        # we have to build them at least in some way in CI. The shared library
        # build is probably the least heavy disk-wise.
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="22.10.1.1877"
+ARG VERSION="22.10.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="22.10.1.1877"
+ARG VERSION="22.10.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/integration/base/Dockerfile
+++ b/docker/test/integration/base/Dockerfile
@ -27,9 +27,14 @@ RUN apt-get update \
        tar \
        tzdata \
        unixodbc \
+        python3-pip \
+        libcurl4-openssl-dev \
+        libssl-dev \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

+RUN pip3 install pycurl
+
 # Architecture of the image when BuildKit/buildx is used
 ARG TARGETARCH

--- a/docs/README.md
+++ b/docs/README.md
@ -212,4 +212,4 @@ Templates:

 ## How to Build Documentation

-You can build your documentation manually by following the instructions in [docs/tools/README.md](../docs/tools/README.md). Also, our CI runs the documentation build after the `documentation` label is added to PR. You can see the results of a build in the GitHub interface. If you have no permissions to add labels, a reviewer of your PR will add it.
+You can build your documentation manually by following the instructions in the docs repo [contrib-writing-guide](https://github.com/ClickHouse/clickhouse-docs/blob/main/contrib-writing-guide.md). Also, our CI runs the documentation build after the `documentation` label is added to PR. You can see the results of a build in the GitHub interface. If you have no permissions to add labels, a reviewer of your PR will add it.
--- a/docs/changelogs/v22.10.2.11-stable.md
+++ b/docs/changelogs/v22.10.2.11-stable.md
@ -0,0 +1,18 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.10.2.11-stable (d2bfcaba002) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42750](https://github.com/ClickHouse/ClickHouse/issues/42750): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} <Fatal> BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} <Fatal> BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} <Fatal> BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} <Fatal> BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} <Fatal> BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} <Fatal> BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} <Fatal> BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#42793](https://github.com/ClickHouse/ClickHouse/issues/42793): Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v22.3.14.18-lts.md
+++ b/docs/changelogs/v22.3.14.18-lts.md
@ -0,0 +1,26 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.3.14.18-lts (642946f61b2) FIXME as compared to v22.3.13.80-lts (e2708b01fba)
+
+#### Bug Fix
+* Backported in [#42432](https://github.com/ClickHouse/ClickHouse/issues/42432): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#42328](https://github.com/ClickHouse/ClickHouse/issues/42328): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
+* Backported in [#42358](https://github.com/ClickHouse/ClickHouse/issues/42358): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42298](https://github.com/ClickHouse/ClickHouse/issues/42298): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42592](https://github.com/ClickHouse/ClickHouse/issues/42592): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
--- a/docs/changelogs/v22.3.14.23-lts.md
+++ b/docs/changelogs/v22.3.14.23-lts.md
@ -0,0 +1,29 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.3.14.23-lts (74956bfee4d) FIXME as compared to v22.3.13.80-lts (e2708b01fba)
+
+#### Improvement
+* Backported in [#42527](https://github.com/ClickHouse/ClickHouse/issues/42527): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
+#### Bug Fix
+* Backported in [#42432](https://github.com/ClickHouse/ClickHouse/issues/42432): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#42328](https://github.com/ClickHouse/ClickHouse/issues/42328): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
+* Backported in [#42358](https://github.com/ClickHouse/ClickHouse/issues/42358): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42298](https://github.com/ClickHouse/ClickHouse/issues/42298): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42592](https://github.com/ClickHouse/ClickHouse/issues/42592): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
--- a/docs/changelogs/v22.7.7.24-stable.md
+++ b/docs/changelogs/v22.7.7.24-stable.md
@ -0,0 +1,29 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.7.7.24-stable (02ad1f979a8) FIXME as compared to v22.7.6.74-stable (c00ffb3c11a)
+
+#### Bug Fix
+* Backported in [#42433](https://github.com/ClickHouse/ClickHouse/issues/42433): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#42329](https://github.com/ClickHouse/ClickHouse/issues/42329): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
+* Backported in [#42359](https://github.com/ClickHouse/ClickHouse/issues/42359): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42268](https://github.com/ClickHouse/ClickHouse/issues/42268): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#42299](https://github.com/ClickHouse/ClickHouse/issues/42299): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42386](https://github.com/ClickHouse/ClickHouse/issues/42386): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#42498](https://github.com/ClickHouse/ClickHouse/issues/42498): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#42593](https://github.com/ClickHouse/ClickHouse/issues/42593): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
--- a/docs/changelogs/v22.8.7.34-lts.md
+++ b/docs/changelogs/v22.8.7.34-lts.md
@ -0,0 +1,37 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.8.7.34-lts (3c38e5e8ab9) FIXME as compared to v22.8.6.71-lts (7bf38a43e30)
+
+#### Improvement
+* Backported in [#42096](https://github.com/ClickHouse/ClickHouse/issues/42096): Replace back `clickhouse su` command with `sudo -u` in start in order to respect limits in `/etc/security/limits.conf`. [#41847](https://github.com/ClickHouse/ClickHouse/pull/41847) ([Eugene Konkov](https://github.com/ekonkov)).
+
+#### Bug Fix
+* Backported in [#42434](https://github.com/ClickHouse/ClickHouse/issues/42434): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#42296](https://github.com/ClickHouse/ClickHouse/issues/42296): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
+* Backported in [#42360](https://github.com/ClickHouse/ClickHouse/issues/42360): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42489](https://github.com/ClickHouse/ClickHouse/issues/42489): Removed skipping of mutations in unaffected partitions of `MergeTree` tables, because this feature never worked correctly and might cause resurrection of finished mutations. [#40589](https://github.com/ClickHouse/ClickHouse/pull/40589) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#42121](https://github.com/ClickHouse/ClickHouse/issues/42121): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* - Prevent crash when passing wrong aggregation states to groupBitmap*. [#41972](https://github.com/ClickHouse/ClickHouse/pull/41972) ([Raúl Marín](https://github.com/Algunenano)).
+* - Fix read bytes/rows in X-ClickHouse-Summary with materialized views. [#41973](https://github.com/ClickHouse/ClickHouse/pull/41973) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#42269](https://github.com/ClickHouse/ClickHouse/issues/42269): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#42300](https://github.com/ClickHouse/ClickHouse/issues/42300): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42387](https://github.com/ClickHouse/ClickHouse/issues/42387): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#42499](https://github.com/ClickHouse/ClickHouse/issues/42499): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#42571](https://github.com/ClickHouse/ClickHouse/issues/42571): Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42594](https://github.com/ClickHouse/ClickHouse/issues/42594): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
--- a/docs/changelogs/v22.8.8.3-lts.md
+++ b/docs/changelogs/v22.8.8.3-lts.md
@ -0,0 +1,13 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.8.8.3-lts (ac5a6cababc) FIXME as compared to v22.8.7.34-lts (3c38e5e8ab9)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42677](https://github.com/ClickHouse/ClickHouse/issues/42677): keeper-fix: fix race in accessing logs while snapshot is being installed. [#40627](https://github.com/ClickHouse/ClickHouse/pull/40627) ([Antonio Andelic](https://github.com/antonio2368)).
+
--- a/docs/changelogs/v22.9.4.32-stable.md
+++ b/docs/changelogs/v22.9.4.32-stable.md
@ -0,0 +1,33 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.9.4.32-stable (3db8bcf1a70) FIXME as compared to v22.9.3.18-stable (0cb4b15d2fa)
+
+#### Bug Fix
+* Backported in [#42435](https://github.com/ClickHouse/ClickHouse/issues/42435): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#42297](https://github.com/ClickHouse/ClickHouse/issues/42297): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
+* Backported in [#42361](https://github.com/ClickHouse/ClickHouse/issues/42361): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42122](https://github.com/ClickHouse/ClickHouse/issues/42122): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#41938](https://github.com/ClickHouse/ClickHouse/issues/41938): Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)).
+* Backported in [#42179](https://github.com/ClickHouse/ClickHouse/issues/42179): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#42301](https://github.com/ClickHouse/ClickHouse/issues/42301): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42388](https://github.com/ClickHouse/ClickHouse/issues/42388): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#42500](https://github.com/ClickHouse/ClickHouse/issues/42500): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#42581](https://github.com/ClickHouse/ClickHouse/issues/42581): This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42572](https://github.com/ClickHouse/ClickHouse/issues/42572): Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#42595](https://github.com/ClickHouse/ClickHouse/issues/42595): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -105,7 +105,7 @@ ninja
 Example for Fedora Rawhide:
 ``` bash
 sudo yum update
-yum --nogpg install git cmake make clang-c++ python3
+sudo yum --nogpg install git cmake make clang python3 ccache
 git clone --recursive https://github.com/ClickHouse/ClickHouse.git
 mkdir build && cd build
 cmake ../ClickHouse
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -139,7 +139,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`.
 -   `region` — Specifies S3 region name. Optional.
 -   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`.
-   `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times.
+-   `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
 -   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
 -   `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional.

--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@ -33,7 +33,7 @@ CREATE TABLE trips (
    tip_amount          Float32,
    tolls_amount        Float32,
    total_amount        Float32,
-    payment_type        Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4),
+    payment_type        Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4, 'UNK' = 5),
    pickup_ntaname      LowCardinality(String),
    dropoff_ntaname     LowCardinality(String)
 )
@ -63,7 +63,7 @@ SELECT
    payment_type,
    pickup_ntaname,
    dropoff_ntaname
-FROM url(
+FROM s3(
    'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz',
    'TabSeparatedWithNames'
 )
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@ -128,6 +128,24 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.

 </details>

+<details>
+<summary>Migration Method for installing the deb-packages</summary>
+
+```bash
+sudo apt-key del E0C56BD4
+sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
+echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
+    /etc/apt/sources.list.d/clickhouse.list
+sudo apt-get update
+
+sudo apt-get install -y clickhouse-server clickhouse-client
+
+sudo service clickhouse-server start
+clickhouse-client # or "clickhouse-client --password" if you set up a password.
+```
+
+</details>
+
 You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.

 You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/).
--- a/docs/en/operations/_backup.md
+++ b/docs/en/operations/_backup.md
@ -1,9 +1,12 @@
---
-slug: /en/operations/backup
-sidebar_position: 49
-sidebar_label: Data backup and restore
-title: Data backup and restore
---
+
+[//]: # (This file is included in Manage > Backups)
+
+- [Backup to a local disk](#backup-to-a-local-disk)
+- [Configuring backup/restore to use an S3 endpoint](#configuring-backuprestore-to-use-an-s3-endpoint)
+- [Backup/restore using an S3 disk](#backuprestore-using-an-s3-disk)
+- [Alternatives](#alternatives)
+
+## Background

 While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented.

@ -15,7 +18,9 @@ Each company has different resources available and business requirements, so the
 Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly.
 :::

-## Configure a backup destination
+## Backup to a local disk
+
+### Configure a backup destination

 In the examples below you will see the backup destination specified like `Disk('backups', '1.zip')`.  To prepare the destination add a file to `/etc/clickhouse-server/config.d/backup_disk.xml` specifying the backup destination.  For example, this file defines disk named `backups` and then adds that disk to the **backups > allowed_disk** list:

@ -39,7 +44,7 @@ In the examples below you will see the backup destination specified like `Disk('
 </clickhouse>
 ```

-## Parameters
+### Parameters

 Backups can be either full or incremental, and can include tables (including materialized views, projections, and dictionaries), and databases.  Backups can be synchronous (default) or asynchronous.  They can be compressed.  Backups can be password protected.

@ -52,7 +57,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
    - `password` for the file on disk
    - `base_backup`: the destination of the previous backup of this source.  For example, `Disk('backups', '1.zip')` 

-## Usage examples
+### Usage examples

 Backup and then restore a table:
 ```
@ -81,7 +86,7 @@ RESTORE TABLE test.table AS test.table2 FROM Disk('backups', '1.zip')
 BACKUP TABLE test.table3 AS test.table4 TO Disk('backups', '2.zip')
 ```

-## Incremental backups
+### Incremental backups

 Incremental backups can be taken by specifying the `base_backup`.
 :::note
@ -100,7 +105,7 @@ RESTORE TABLE test.table AS test.table2
  FROM Disk('backups', 'incremental-a.zip');
 ```

-## Assign a password to the backup
+### Assign a password to the backup

 Backups written to disk can have a password applied to the file:
 ```
@ -116,7 +121,7 @@ RESTORE TABLE test.table
  SETTINGS password='qwerty'
 ```

-## Compression settings
+### Compression settings

 If you would like to specify the compression method or level:
 ```
@ -125,14 +130,14 @@ BACKUP TABLE test.table
  SETTINGS compression_method='lzma', compression_level=3
 ```

-## Restore specific partitions
+### Restore specific partitions
 If specific partitions associated with a table need to be restored these can be specified.  To restore partitions 1 and 4 from backup:
 ```
 RESTORE TABLE test.table PARTITIONS '2', '3'
  FROM Disk('backups', 'filename.zip')
 ```

-## Check the status of backups
+### Check the status of backups

 The backup command returns an `id` and `status`, and that `id` can be used to get the status of the backup.  This is very useful to check the progress of long ASYNC backups.  The example below shows a failure that happened when trying to overwrite an existing backup file:
 ```sql
@ -171,13 +176,118 @@ end_time:          2022-08-30 09:21:46
 1 row in set. Elapsed: 0.002 sec.
 ```

-## Backup to S3
+## Configuring BACKUP/RESTORE to use an S3 Endpoint

-It is possible to `BACKUP`/`RESTORE` to S3, but this disk should be configured
-in a proper way, since by default you will need to backup metadata from local
-disk to make backup full.
+To write backups to an S3 bucket you need three pieces of information:
+- S3 endpoint,
+  for example `https://mars-doc-test.s3.amazonaws.com/backup-S3/`
+- Access key ID,
+  for example `ABC123`
+- Secret access key,
+  for example `Abc+123`

-First of all, you need to configure S3 disk in a special way:
+:::note
+Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/configuring-s3-for-clickhouse-use.md), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket.
+:::
+
+The destination for a backup will be specified like this:
+```
+S3('<S3 endpoint>/<directory>', '<Access key ID>', '<Secret access key>)
+```
+
+```sql
+CREATE TABLE data
+(
+    `key` Int,
+    `value` String,
+    `array` Array(String)
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+```
+
+```sql
+INSERT INTO data SELECT *
+FROM generateRandom('key Int, value String, array Array(String)')
+LIMIT 1000
+```
+
+### Create a base (initial) backup
+
+Incremental backups require a _base_ backup to start from, this example will be used
+later as the base backup.  The first parameter of the S3 destination is the S3 endpoint followed by the directory within the bucket to use for this backup.  In this example the directory is named `my_backup`.
+
+```sql
+BACKUP TABLE data TO S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_backup', 'ABC123', 'Abc+123')
+```
+
+```response
+┌─id───────────────────────────────────┬─status─────────┐
+│ de442b75-a66c-4a3c-a193-f76f278c70f3 │ BACKUP_CREATED │
+└──────────────────────────────────────┴────────────────┘
+```
+
+### Add more data
+
+Incremental backups are populated with the difference between the base backup and the current content of the table being backed up.  Add more data before taking the incremental backup:
+
+```sql
+INSERT INTO data SELECT *
+FROM generateRandom('key Int, value String, array Array(String)')
+LIMIT 100
+```
+### Take an incremental backup
+
+This backup command is similar to the base backup, but adds `SETTINGS base_backup` and the location of the base backup.  Note that the destination for the incremental backup is not the same directory as the base, it is the same endpoint with a different target directory within the bucket.  The base backup is in `my_backup`, and the incremental will be written to `my_incremental`:
+```sql
+BACKUP TABLE data TO S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_incremental', 'ABC123', 'Abc+123') SETTINGS base_backup = S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_backup', 'ABC123', 'Abc+123')
+```
+
+```response
+┌─id───────────────────────────────────┬─status─────────┐
+│ f6cd3900-850f-41c9-94f1-0c4df33ea528 │ BACKUP_CREATED │
+└──────────────────────────────────────┴────────────────┘
+```
+### Restore from the incremental backup
+
+This command restores the incremental backup into a new table, `data3`.  Note that when an incremental backup is restored, the base backup is also included.  Specify only the incremental backup when restoring:
+```sql
+RESTORE TABLE data AS data3 FROM S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_incremental', 'ABC123', 'Abc+123')
+```
+
+```response
+┌─id───────────────────────────────────┬─status───┐
+│ ff0c8c39-7dff-4324-a241-000796de11ca │ RESTORED │
+└──────────────────────────────────────┴──────────┘
+```
+
+### Verify the count
+
+There were two inserts into the original table `data`, one with 1,000 rows and one with 100 rows, for a total of 1,100. Verify that the restored table has 1,100 rows:
+```sql
+SELECT count()
+FROM data3
+```
+```response
+┌─count()─┐
+│    1100 │
+└─────────┘
+```
+
+### Verify the content
+This compares the content of the original table, `data` with the restored table `data3`:
+```sql
+SELECT throwIf((
+        SELECT groupArray(tuple(*))
+        FROM data
+    ) != (
+        SELECT groupArray(tuple(*))
+        FROM data3
+    ), 'Data does not match after BACKUP/RESTORE')
+```
+## BACKUP/RESTORE Using an S3 Disk
+
+It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the ClickHouse storage configuration.  Configure the disk like this by adding a file to `/etc/clickhouse-server/config.d`:

 ```xml
 <clickhouse>
--- a/docs/en/operations/_update.md
+++ b/docs/en/operations/_update.md
@ -1,10 +1,7 @@
---
-slug: /en/operations/update
-sidebar_position: 47
-sidebar_label: ClickHouse Upgrade
---

-# ClickHouse Upgrade
+[//]: # (This file is included in Manage > Updates)
+
+## Self-managed ClickHouse Upgrade

 If ClickHouse was installed from `deb` packages, execute the following commands on the server:

--- a/docs/en/operations/clickhouse-keeper.md
+++ b/docs/en/operations/clickhouse-keeper.md
@ -309,7 +309,7 @@ Sessions with Ephemerals (1):
 /clickhouse/task_queue/ddl
 ```

-## [experimental] Migration from ZooKeeper {#migration-from-zookeeper}
+## Migration from ZooKeeper {#migration-from-zookeeper}

 Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:

--- a/docs/en/operations/system-tables/information_schema.md
+++ b/docs/en/operations/system-tables/information_schema.md
@ -178,7 +178,7 @@ Columns:
 -   `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view.
 -   `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking.
 -   `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated.
-   `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view/#materialized). Possible values:
+-   `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view.md/#materialized-view). Possible values:
    -   `NO` — The created view is not materialized.
    -   `YES` — The created view is materialized.
 -   `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated.
--- a/docs/en/operations/system-tables/replicated_fetches.md
+++ b/docs/en/operations/system-tables/replicated_fetches.md
@ -68,6 +68,5 @@ thread_id:                   54

 **See Also**

-   [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated)
+-   [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#managing-replicatedmergetree-tables)

-[Original article](https://clickhouse.com/docs/en/operations/system_tables/replicated_fetches) <!--hide-->
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@ -303,17 +303,25 @@ or
 CREATE DICTIONARY somedict (
    id UInt64,
    first Date,
-    last Date
+    last Date,
+    advertiser_id UInt64
 )
 PRIMARY KEY id
+SOURCE(CLICKHOUSE(TABLE 'date_table'))
+LIFETIME(MIN 1 MAX 1000)
 LAYOUT(RANGE_HASHED())
 RANGE(MIN first MAX last)
 ```

-To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected:
+To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected:

 ``` sql
-dictGetT('dict_name', 'attr_name', id, date)
+dictGet('dict_name', 'attr_name', id, date)
+```
+Query example:
+
+``` sql
+SELECT dictGet('somedict', 'advertiser_id', 1, '2022-10-20 23:20:10.000'::DateTime64::UInt64);
 ```

 This function returns the value for the specified `id`s and the date range that includes the passed date.
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md
@ -14,8 +14,10 @@ Example of a polygon dictionary configuration:
 <dictionary>
    <structure>
        <key>
-            <name>key</name>
-            <type>Array(Array(Array(Array(Float64))))</type>
+            <attribute>
+                <name>key</name>
+                <type>Array(Array(Array(Array(Float64))))</type>
+            </attribute>
        </key>

        <attribute>
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -1068,7 +1068,7 @@ Example:
 SELECT timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600));
 SELECT timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299);
 SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0));
-``` 
+```
 ``` text
 ┌─timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600))─┐
 │ ['2012-01-01 12:00:00','2012-01-01 12:30:00']               │
@ -1244,7 +1244,7 @@ Result:
 └──────────────────────────┘
 ```

-When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
+When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.

 For example:

--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -571,13 +571,13 @@ Similar to base58Decode, but returns an empty string in case of error.

 ## base64Encode(s)

-Encodes ‘s’ string into base64
+Encodes ‘s’ FixedString or String into base64.

 Alias: `TO_BASE64`.

 ## base64Decode(s)

-Decode base64-encoded string ‘s’ into original string. In case of failure raises an exception.
+Decode base64-encoded FixedString or String ‘s’ into original string. In case of failure raises an exception.

 Alias: `FROM_BASE64`.

--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -107,7 +107,7 @@ ALTER TABLE visits RENAME COLUMN webBrowser TO browser
 CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name
 ```

-Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to specify the partition expression](#alter-how-to-specify-part-expr).
+Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](partition.md#how-to-set-partition-expression).

 If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.

@ -204,8 +204,9 @@ It is used if it is necessary to add or update a column with a complicated expre
 Syntax:

 ```sql
-ALTER TABLE table MATERIALIZE COLUMN col;
+ALTER TABLE [db.]table [ON CLUSTER cluster] MATERIALIZE COLUMN col [IN PARTITION partition | IN PARTITION ID 'partition_id'];
 ```
+- If you specify a PARTITION, a column will be materialized with only the specified partition.

 **Example**

--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@ -39,7 +39,7 @@ ALTER TABLE mt DETACH PARTITION '2020-11-21';
 ALTER TABLE mt DETACH PART 'all_2_2_0';
 ```

-Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
+Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression).

 After the query is executed, you can do whatever you want with the data in the `detached` directory — delete it from the file system, or just leave it.

@ -53,7 +53,7 @@ ALTER TABLE table_name [ON CLUSTER cluster] DROP PARTITION|PART partition_expr

 Deletes the specified partition from the table. This query tags the partition as inactive and deletes data completely, approximately in 10 minutes.

-Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
+Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression).

 The query is replicated – it deletes data on all replicas.

@ -71,7 +71,7 @@ ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partiti
 ```

 Removes the specified part or all parts of the specified partition from `detached`.
-Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
+Read more about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression).

 ## ATTACH PARTITION\|PART

@ -86,7 +86,7 @@ ALTER TABLE visits ATTACH PARTITION 201901;
 ALTER TABLE visits ATTACH PART 201901_2_2_0;
 ```

-Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
+Read more about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression).

 This query is replicated. The replica-initiator checks whether there is data in the `detached` directory.
 If data exists, the query checks its integrity. If everything is correct, the query adds the data to the table.
@ -166,7 +166,7 @@ This query creates a local backup of a specified partition. If the `PARTITION` c
 The entire backup process is performed without stopping the server.
 :::

-Note that for old-styled tables you can specify the prefix of the partition name (for example, `2019`) - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
+Note that for old-styled tables you can specify the prefix of the partition name (for example, `2019`) - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression).

 At the time of execution, for a data snapshot, the query creates hardlinks to a table data. Hardlinks are placed in the directory `/var/lib/clickhouse/shadow/N/...`, where:

@ -194,7 +194,7 @@ To restore data from a backup, do the following:

 Restoring from a backup does not require stopping the server.

-For more information about backups and restoring data, see the [Data Backup](../../../operations/backup.md) section.
+For more information about backups and restoring data, see the [Data Backup](/docs/en/manage/backups.mdx) section.

 ## UNFREEZE PARTITION

--- a/docs/en/sql-reference/statements/alter/projection.md
+++ b/docs/en/sql-reference/statements/alter/projection.md
@ -7,18 +7,26 @@ title: "Manipulating Projections"

 The following operations with [projections](../../../engines/table-engines/mergetree-family/mergetree.md#projections) are available:

-   `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
+## ADD PROJECTION

-   `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+`ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.

-   `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+## DROP PROJECTION

-   `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+
+## MATERIALIZE PROJECTION
+
+`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+
+## CLEAR PROJECTION
+
+`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).


 The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files.

-Also, they are replicated, syncing projections metadata via ZooKeeper.
+Also, they are replicated, syncing projections metadata via ClickHouse Keeper or ZooKeeper.

 :::note    
 Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
--- a/docs/en/sql-reference/statements/create/database.md
+++ b/docs/en/sql-reference/statements/create/database.md
@ -31,7 +31,7 @@ By default, ClickHouse uses its own [Atomic](../../../engines/database-engines/a

 ### COMMENT

-You can add a comment to the database when you creating it.
+You can add a comment to the database when you are creating it.

 The comment is supported for all database engines.

--- a/docs/en/sql-reference/statements/create/function.md
+++ b/docs/en/sql-reference/statements/create/function.md
@ -4,7 +4,7 @@ sidebar_position: 38
 sidebar_label: FUNCTION
 ---

-# CREATE FUNCTION
+# CREATE FUNCTION &mdash; user defined function (UDF)

 Creates a user defined function from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls.

--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@ -22,7 +22,7 @@ The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/me
 When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `2`) or on current replica (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `1`).

 -   If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting.
-   If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](../../sql-reference/statements/alter/index.md#alter-how-to-specify-part-expr).
+-   If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression).
 -   If you specify `FINAL`, optimization is performed even when all the data is already in one part. Also merge is forced even if concurrent merges are performed.
 -   If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine.

--- a/docs/en/sql-reference/statements/select/intersect.md
+++ b/docs/en/sql-reference/statements/select/intersect.md
@ -7,7 +7,7 @@ sidebar_label: INTERSECT

 The `INTERSECT` clause returns only those rows that result from both the first and the second queries. The queries must match the number of columns, order, and type. The result of `INTERSECT` can contain duplicate rows.

-Multiple `INTERSECT` statements are executes left to right if parenthesis are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clause.
+Multiple `INTERSECT` statements are executed left to right if parentheses are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clauses.


 ``` sql
--- a/docs/ru/engines/table-engines/integrations/kafka.md
+++ b/docs/ru/engines/table-engines/integrations/kafka.md
@ -87,14 +87,15 @@ SETTINGS

 <summary>Устаревший способ создания таблицы</summary>

-    :::note "Attention"
-    Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше.
+:::note "Attention"
+Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше.
+:::

 ``` sql
 Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
      [, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages])
 ```
-    :::
+
 </details>

 ## Описание {#opisanie}
--- a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md
@ -39,9 +39,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

 <summary>Устаревший способ создания таблицы</summary>

-    :::note "Attention"
-    Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
-    :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
--- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
@ -43,9 +43,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

 <summary>Устаревший способ создания таблицы</summary>

-    :::note "Attention"
-    Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
-    :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
@ -59,7 +60,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

 -   `sign` — Имя столбца с типом строки: `1` — строка состояния, `-1` — строка отмены состояния.

-        Тип данных столбца — `Int8`.
+    Тип данных столбца — `Int8`.

 </details>

--- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
@ -55,9 +55,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

 <summary>Устаревший способ создания таблицы</summary>

-    :::note "Attention"
-    Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
-    :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -115,9 +115,10 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa

 <summary>Устаревший способ создания таблицы</summary>

-    :::note "Attention"
-    Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше.
-    :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше.
+:::
+
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
--- a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md
@ -42,9 +42,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

 <summary>Устаревший способ создания таблицы</summary>

-    :::note "Attention"
-    Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
-    :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@ -316,9 +316,9 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101;
 Возвращается дата.

 :::note "Attention"
-    Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами.
+Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами.
 :::
-    
+
 ## toMonday {#tomonday}

 Округляет дату или дату-с-временем вниз до ближайшего понедельника.
@ -1126,8 +1126,7 @@ SELECT FROM_UNIXTIME(423543535);
 └──────────────────────────┘
 ```

-В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string).
-
+В случае, когда есть два или три аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md), а второй является строкой постоянного формата и третий является строкой постоянной временной зоны — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string).

 Запрос:

--- a/docs/ru/sql-reference/operators/in.md
+++ b/docs/ru/sql-reference/operators/in.md
@ -122,9 +122,9 @@ FROM t_null

 Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса.

-    :::note "Attention"
-    Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`.
-    :::
+:::note "Attention"
+Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`.
+:::
 При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`.

 При использовании `GLOBAL IN` / `GLOBAL JOIN-а`, сначала выполняются все подзапросы для `GLOBAL IN` / `GLOBAL JOIN-ов`, и результаты складываются во временные таблицы. Затем эти временные таблицы передаются на каждый удалённый сервер, и на них выполняются запросы, с использованием этих переданных временных данных.
--- a/programs/copier/Aliases.h
+++ b/programs/copier/Aliases.h
@ -1,6 +1,10 @@
 #pragma once

-#include <Interpreters/Cluster.h>
+#include <base/types.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+#include <utility>

 namespace DB
 {
@ -8,21 +12,4 @@ namespace DB

    using DatabaseAndTableName = std::pair<String, String>;
    using ListOfDatabasesAndTableNames = std::vector<DatabaseAndTableName>;
-
-    /// Hierarchical description of the tasks
-    struct ShardPartitionPiece;
-    struct ShardPartition;
-    struct TaskShard;
-    struct TaskTable;
-    struct TaskCluster;
-    struct ClusterPartition;
-
-    using PartitionPieces = std::vector<ShardPartitionPiece>;
-    using TasksPartition = std::map<String, ShardPartition, std::greater<>>;
-    using ShardInfo = Cluster::ShardInfo;
-    using TaskShardPtr = std::shared_ptr<TaskShard>;
-    using TasksShard = std::vector<TaskShardPtr>;
-    using TasksTable = std::list<TaskTable>;
-    using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
 }
-
--- a/programs/copier/CMakeLists.txt
+++ b/programs/copier/CMakeLists.txt
@ -1,7 +1,13 @@
 set(CLICKHOUSE_COPIER_SOURCES
        "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp"
        "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp")
+        "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp")

 set (CLICKHOUSE_COPIER_LINK
        PRIVATE
--- a/programs/copier/ClusterCopier.h
+++ b/programs/copier/ClusterCopier.h
@ -3,7 +3,8 @@
 #include "Aliases.h"
 #include "Internals.h"
 #include "TaskCluster.h"
-#include "TaskTableAndShard.h"
+#include "TaskShard.h"
+#include "TaskTable.h"
 #include "ShardPartition.h"
 #include "ShardPartitionPiece.h"
 #include "ZooKeeperStaff.h"
--- a/programs/copier/ClusterPartition.h
+++ b/programs/copier/ClusterPartition.h
@ -1,17 +1,22 @@
 #pragma once

-#include "Aliases.h"
+#include <base/types.h>
+#include <map>

 namespace DB
 {
-    /// Contains info about all shards that contain a partition
-    struct ClusterPartition
-    {
-        double elapsed_time_seconds = 0;
-        UInt64 bytes_copied = 0;
-        UInt64 rows_copied = 0;
-        UInt64 blocks_copied = 0;

-        UInt64 total_tries = 0;
-    };
+/// Contains info about all shards that contain a partition
+struct ClusterPartition
+{
+    double elapsed_time_seconds = 0;
+    UInt64 bytes_copied = 0;
+    UInt64 rows_copied = 0;
+    UInt64 blocks_copied = 0;
+
+    UInt64 total_tries = 0;
+};
+
+using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
+
 }
--- a/programs/copier/ShardPartition.cpp
+++ b/programs/copier/ShardPartition.cpp
@ -0,0 +1,70 @@
+#include "ShardPartition.h"
+
+#include "TaskShard.h"
+#include "TaskTable.h"
+
+namespace DB
+{
+
+ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits)
+    : task_shard(parent)
+    , name(std::move(name_quoted_))
+{
+    pieces.reserve(number_of_splits);
+}
+
+String ShardPartition::getPartitionCleanStartPath() const
+{
+    return getPartitionPath() + "/clean_start";
+}
+
+String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const
+{
+    assert(current_piece_number < task_shard.task_table.number_of_splits);
+    return getPartitionPiecePath(current_piece_number) + "/clean_start";
+}
+
+String ShardPartition::getPartitionPath() const
+{
+    return task_shard.task_table.getPartitionPath(name);
+}
+
+String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const
+{
+    assert(current_piece_number < task_shard.task_table.number_of_splits);
+    return task_shard.task_table.getPartitionPiecePath(name, current_piece_number);
+}
+
+String ShardPartition::getShardStatusPath() const
+{
+    // schema: /<root...>/tables/<table>/<partition>/shards/<shard>
+    // e.g. /root/table_test.hits/201701/shards/1
+    return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
+}
+
+String ShardPartition::getPartitionShardsPath() const
+{
+    return getPartitionPath() + "/shards";
+}
+
+String ShardPartition::getPartitionActiveWorkersPath() const
+{
+    return getPartitionPath() + "/partition_active_workers";
+}
+
+String ShardPartition::getActiveWorkerPath() const
+{
+    return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
+}
+
+String ShardPartition::getCommonPartitionIsDirtyPath() const
+{
+    return getPartitionPath() + "/is_dirty";
+}
+
+String ShardPartition::getCommonPartitionIsCleanedPath() const
+{
+    return getCommonPartitionIsDirtyPath() + "/cleaned";
+}
+
+}
--- a/programs/copier/ShardPartition.h
+++ b/programs/copier/ShardPartition.h
@ -1,19 +1,23 @@
 #pragma once

-#include "Aliases.h"
-#include "TaskTableAndShard.h"
+#include "ShardPartitionPiece.h"
+
+#include <base/types.h>
+
+#include <map>

 namespace DB
 {

+struct TaskShard;
+
 /// Just destination partition of a shard
 /// I don't know what this comment means.
 /// In short, when we discovered what shards contain currently processing partition,
 /// This class describes a partition (name) that is stored on the shard (parent).
 struct ShardPartition
 {
-    ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10)
-            : task_shard(parent), name(std::move(name_quoted_)) { pieces.reserve(number_of_splits); }
+    ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10);

    String getPartitionPath() const;

@ -45,58 +49,6 @@ struct ShardPartition
    String name;
 };

-inline String ShardPartition::getPartitionCleanStartPath() const
-{
-    return getPartitionPath() + "/clean_start";
-}
-
-inline String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const
-{
-    assert(current_piece_number < task_shard.task_table.number_of_splits);
-    return getPartitionPiecePath(current_piece_number) + "/clean_start";
-}
-
-inline String ShardPartition::getPartitionPath() const
-{
-    return task_shard.task_table.getPartitionPath(name);
-}
-
-inline String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const
-{
-    assert(current_piece_number < task_shard.task_table.number_of_splits);
-    return task_shard.task_table.getPartitionPiecePath(name, current_piece_number);
-}
-
-inline String ShardPartition::getShardStatusPath() const
-{
-    // schema: /<root...>/tables/<table>/<partition>/shards/<shard>
-    // e.g. /root/table_test.hits/201701/shards/1
-    return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
-}
-
-inline String ShardPartition::getPartitionShardsPath() const
-{
-    return getPartitionPath() + "/shards";
-}
-
-inline String ShardPartition::getPartitionActiveWorkersPath() const
-{
-    return getPartitionPath() + "/partition_active_workers";
-}
-
-inline String ShardPartition::getActiveWorkerPath() const
-{
-    return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
-}
-
-inline String ShardPartition::getCommonPartitionIsDirtyPath() const
-{
-    return getPartitionPath() + "/is_dirty";
-}
-
-inline String ShardPartition::getCommonPartitionIsCleanedPath() const
-{
-    return getCommonPartitionIsDirtyPath() + "/cleaned";
-}
+using TasksPartition = std::map<String, ShardPartition, std::greater<>>;

 }
--- a/programs/copier/ShardPartitionPiece.cpp
+++ b/programs/copier/ShardPartitionPiece.cpp
@ -0,0 +1,64 @@
+#include "ShardPartitionPiece.h"
+
+#include "ShardPartition.h"
+#include "TaskShard.h"
+
+#include <IO/WriteHelpers.h>
+
+namespace DB
+{
+
+ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_)
+    : is_absent_piece(!is_present_piece_)
+    , current_piece_number(current_piece_number_)
+    , shard_partition(parent)
+{
+}
+
+String ShardPartitionPiece::getPartitionPiecePath() const
+{
+    return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number);
+}
+
+String ShardPartitionPiece::getPartitionPieceCleanStartPath() const
+{
+    return getPartitionPiecePath() + "/clean_start";
+}
+
+String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const
+{
+    return getPartitionPiecePath() + "/is_dirty";
+}
+
+String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const
+{
+    return getPartitionPieceIsDirtyPath() + "/cleaned";
+}
+
+String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const
+{
+    return getPartitionPiecePath() + "/partition_piece_active_workers";
+}
+
+String ShardPartitionPiece::getActiveWorkerPath() const
+{
+    return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
+}
+
+/// On what shards do we have current partition.
+String ShardPartitionPiece::getPartitionPieceShardsPath() const
+{
+    return getPartitionPiecePath() + "/shards";
+}
+
+String ShardPartitionPiece::getShardStatusPath() const
+{
+    return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
+}
+
+String ShardPartitionPiece::getPartitionPieceCleanerPath() const
+{
+    return getPartitionPieceIsDirtyPath() + "/cleaner";
+}
+
+}
--- a/programs/copier/ShardPartitionPiece.h
+++ b/programs/copier/ShardPartitionPiece.h
@ -1,16 +1,15 @@
 #pragma once

-#include "Internals.h"
+#include <base/types.h>

 namespace DB
 {

+struct ShardPartition;
+
 struct ShardPartitionPiece
 {
-
-    ShardPartitionPiece(ShardPartition &parent, size_t current_piece_number_, bool is_present_piece_)
-            : is_absent_piece(!is_present_piece_), current_piece_number(current_piece_number_),
-              shard_partition(parent) {}
+    ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_);

    String getPartitionPiecePath() const;

@ -37,52 +36,6 @@ struct ShardPartitionPiece
    ShardPartition & shard_partition;
 };

-
-inline String ShardPartitionPiece::getPartitionPiecePath() const
-{
-    return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number);
-}
-
-inline String ShardPartitionPiece::getPartitionPieceCleanStartPath() const
-{
-    return getPartitionPiecePath() + "/clean_start";
-}
-
-inline String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const
-{
-    return getPartitionPiecePath() + "/is_dirty";
-}
-
-inline String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const
-{
-    return getPartitionPieceIsDirtyPath() + "/cleaned";
-}
-
-inline String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const
-{
-    return getPartitionPiecePath() + "/partition_piece_active_workers";
-}
-
-inline String ShardPartitionPiece::getActiveWorkerPath() const
-{
-    return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
-}
-
-/// On what shards do we have current partition.
-inline String ShardPartitionPiece::getPartitionPieceShardsPath() const
-{
-    return getPartitionPiecePath() + "/shards";
-}
-
-inline String ShardPartitionPiece::getShardStatusPath() const
-{
-    return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
-}
-
-inline String ShardPartitionPiece::getPartitionPieceCleanerPath() const
-{
-    return getPartitionPieceIsDirtyPath() + "/cleaner";
-}
-
+using PartitionPieces = std::vector<ShardPartitionPiece>;

 }
--- a/programs/copier/StatusAccumulator.cpp
+++ b/programs/copier/StatusAccumulator.cpp
@ -0,0 +1,48 @@
+#include "StatusAccumulator.h"
+
+#include <Poco/JSON/Parser.h>
+#include <Poco/JSON/JSON.h>
+#include <Poco/JSON/Object.h>
+#include <Poco/JSON/Stringifier.h>
+
+#include <iostream>
+
+namespace DB
+{
+
+StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json)
+{
+    Poco::JSON::Parser parser;
+    auto state = parser.parse(state_json).extract<Poco::JSON::Object::Ptr>();
+    MapPtr result_ptr = std::make_shared<Map>();
+    for (const auto & table_name : state->getNames())
+    {
+        auto table_status_json = state->getValue<String>(table_name);
+        auto table_status = parser.parse(table_status_json).extract<Poco::JSON::Object::Ptr>();
+        /// Map entry will be created if it is absent
+        auto & map_table_status = (*result_ptr)[table_name];
+        map_table_status.all_partitions_count += table_status->getValue<size_t>("all_partitions_count");
+        map_table_status.processed_partitions_count += table_status->getValue<size_t>("processed_partitions_count");
+    }
+    return result_ptr;
+}
+
+String StatusAccumulator::serializeToJSON(MapPtr statuses)
+{
+    Poco::JSON::Object result_json;
+    for (const auto & [table_name, table_status] : *statuses)
+    {
+        Poco::JSON::Object status_json;
+        status_json.set("all_partitions_count", table_status.all_partitions_count);
+        status_json.set("processed_partitions_count", table_status.processed_partitions_count);
+
+        result_json.set(table_name, status_json);
+    }
+    std::ostringstream oss;     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    oss.exceptions(std::ios::failbit);
+    Poco::JSON::Stringifier::stringify(result_json, oss);
+    auto result = oss.str();
+    return result;
+}
+
+}
--- a/programs/copier/StatusAccumulator.h
+++ b/programs/copier/StatusAccumulator.h
@ -1,65 +1,27 @@
 #pragma once

+#include <base/types.h>

-#include <Poco/JSON/Parser.h>
-#include <Poco/JSON/JSON.h>
-#include <Poco/JSON/Object.h>
-#include <Poco/JSON/Stringifier.h>
-
-#include <unordered_map>
 #include <memory>
-#include <string>
-#include <iostream>
+#include <unordered_map>

 namespace DB
 {

 class StatusAccumulator
 {
-    public:
-        struct TableStatus
-        {
-            size_t all_partitions_count;
-            size_t processed_partitions_count;
-        };
+public:
+    struct TableStatus
+    {
+        size_t all_partitions_count;
+        size_t processed_partitions_count;
+    };

-        using Map = std::unordered_map<std::string, TableStatus>;
-        using MapPtr = std::shared_ptr<Map>;
+    using Map = std::unordered_map<String, TableStatus>;
+    using MapPtr = std::shared_ptr<Map>;

-        static MapPtr fromJSON(std::string state_json)
-        {
-            Poco::JSON::Parser parser;
-            auto state = parser.parse(state_json).extract<Poco::JSON::Object::Ptr>();
-            MapPtr result_ptr = std::make_shared<Map>();
-            for (const auto & table_name : state->getNames())
-            {
-                auto table_status_json = state->getValue<std::string>(table_name);
-                auto table_status = parser.parse(table_status_json).extract<Poco::JSON::Object::Ptr>();
-                /// Map entry will be created if it is absent
-                auto & map_table_status = (*result_ptr)[table_name];
-                map_table_status.all_partitions_count += table_status->getValue<size_t>("all_partitions_count");
-                map_table_status.processed_partitions_count += table_status->getValue<size_t>("processed_partitions_count");
-            }
-            return result_ptr;
-        }
-
-        static std::string serializeToJSON(MapPtr statuses)
-        {
-            Poco::JSON::Object result_json;
-            for (const auto & [table_name, table_status] : *statuses)
-            {
-                Poco::JSON::Object status_json;
-                status_json.set("all_partitions_count", table_status.all_partitions_count);
-                status_json.set("processed_partitions_count", table_status.processed_partitions_count);
-
-                result_json.set(table_name, status_json);
-            }
-            std::ostringstream oss;     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-            oss.exceptions(std::ios::failbit);
-            Poco::JSON::Stringifier::stringify(result_json, oss);
-            auto result = oss.str();
-            return result;
-        }
+    static MapPtr fromJSON(String state_json);
+    static String serializeToJSON(MapPtr statuses);
 };

 }
--- a/programs/copier/TaskCluster.cpp
+++ b/programs/copier/TaskCluster.cpp
@ -0,0 +1,74 @@
+#include "TaskCluster.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
+    : task_zookeeper_path(task_zookeeper_path_)
+    , default_local_database(default_local_database_)
+{}
+
+void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
+{
+    String prefix = base_key.empty() ? "" : base_key + ".";
+
+    clusters_prefix = prefix + "remote_servers";
+    if (!config.has(clusters_prefix))
+        throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS);
+
+    Poco::Util::AbstractConfiguration::Keys tables_keys;
+    config.keys(prefix + "tables", tables_keys);
+
+    for (const auto & table_key : tables_keys)
+    {
+        table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
+    }
+}
+
+void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
+{
+    String prefix = base_key.empty() ? "" : base_key + ".";
+
+    max_workers = config.getUInt64(prefix + "max_workers");
+
+    settings_common = Settings();
+    if (config.has(prefix + "settings"))
+        settings_common.loadSettingsFromConfig(prefix + "settings", config);
+
+    settings_common.prefer_localhost_replica = false;
+
+    settings_pull = settings_common;
+    if (config.has(prefix + "settings_pull"))
+        settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
+
+    settings_push = settings_common;
+    if (config.has(prefix + "settings_push"))
+        settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
+
+    auto set_default_value = [] (auto && setting, auto && default_value)
+    {
+        setting = setting.changed ? setting.value : default_value;
+    };
+
+    /// Override important settings
+    settings_pull.readonly = 1;
+    settings_pull.prefer_localhost_replica = false;
+    settings_push.insert_distributed_sync = true;
+    settings_push.prefer_localhost_replica = false;
+
+    set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
+    set_default_value(settings_pull.max_threads, 1);
+    set_default_value(settings_pull.max_block_size, 8192UL);
+    set_default_value(settings_pull.preferred_block_size_bytes, 0);
+
+    set_default_value(settings_push.insert_distributed_timeout, 0);
+    set_default_value(settings_push.replication_alter_partitions_sync, 2);
+}
+
+}
+
--- a/programs/copier/TaskCluster.h
+++ b/programs/copier/TaskCluster.h
@ -1,21 +1,20 @@
 #pragma once

-#include "Aliases.h"
+#include "TaskTable.h"
+
+#include <Core/Settings.h>
+#include <base/types.h>
+
 #include <Poco/Util/AbstractConfiguration.h>

+#include <random>
+
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}

 struct TaskCluster
 {
-    TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
-            : task_zookeeper_path(task_zookeeper_path_)
-            , default_local_database(default_local_database_)
-    {}
+    TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_);

    void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");

@ -50,61 +49,4 @@ struct TaskCluster
    pcg64 random_engine;
 };

-inline void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
-{
-    String prefix = base_key.empty() ? "" : base_key + ".";
-
-    clusters_prefix = prefix + "remote_servers";
-    if (!config.has(clusters_prefix))
-        throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS);
-
-    Poco::Util::AbstractConfiguration::Keys tables_keys;
-    config.keys(prefix + "tables", tables_keys);
-
-    for (const auto & table_key : tables_keys)
-    {
-        table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
-    }
-}
-
-inline void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
-{
-    String prefix = base_key.empty() ? "" : base_key + ".";
-
-    max_workers = config.getUInt64(prefix + "max_workers");
-
-    settings_common = Settings();
-    if (config.has(prefix + "settings"))
-        settings_common.loadSettingsFromConfig(prefix + "settings", config);
-
-    settings_common.prefer_localhost_replica = 0;
-
-    settings_pull = settings_common;
-    if (config.has(prefix + "settings_pull"))
-        settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
-
-    settings_push = settings_common;
-    if (config.has(prefix + "settings_push"))
-        settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
-
-    auto set_default_value = [] (auto && setting, auto && default_value)
-    {
-        setting = setting.changed ? setting.value : default_value;
-    };
-
-    /// Override important settings
-    settings_pull.readonly = 1;
-    settings_pull.prefer_localhost_replica = false;
-    settings_push.insert_distributed_sync = true;
-    settings_push.prefer_localhost_replica = false;
-
-    set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
-    set_default_value(settings_pull.max_threads, 1);
-    set_default_value(settings_pull.max_block_size, 8192UL);
-    set_default_value(settings_pull.preferred_block_size_bytes, 0);
-
-    set_default_value(settings_push.insert_distributed_timeout, 0);
-    set_default_value(settings_push.replication_alter_partitions_sync, 2);
-}
-
 }
--- a/programs/copier/TaskShard.cpp
+++ b/programs/copier/TaskShard.cpp
@ -0,0 +1,37 @@
+#include "TaskShard.h"
+
+#include "TaskTable.h"
+
+namespace DB
+{
+
+TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_)
+    : task_table(parent)
+    , info(info_)
+{
+    list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName());
+}
+
+UInt32 TaskShard::numberInCluster() const
+{
+    return info.shard_num;
+}
+
+UInt32 TaskShard::indexInCluster() const
+{
+    return info.shard_num - 1;
+}
+
+String DB::TaskShard::getDescription() const
+{
+    return fmt::format("N{} (having a replica {}, pull table {} of cluster {}",
+                       numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name);
+}
+
+String DB::TaskShard::getHostNameExample() const
+{
+    const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
+    return replicas.at(0).readableString();
+}
+
+}
--- a/programs/copier/TaskShard.h
+++ b/programs/copier/TaskShard.h
@ -0,0 +1,56 @@
+#pragma once
+
+#include "Aliases.h"
+#include "Internals.h"
+#include "ClusterPartition.h"
+#include "ShardPartition.h"
+
+
+namespace DB
+{
+
+struct TaskTable;
+
+struct TaskShard
+{
+    TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_);
+
+    TaskTable & task_table;
+
+    Cluster::ShardInfo info;
+
+    UInt32 numberInCluster() const;
+
+    UInt32 indexInCluster() const;
+
+    String getDescription() const;
+
+    String getHostNameExample() const;
+
+    /// Used to sort clusters by their proximity
+    ShardPriority priority;
+
+    /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
+    ColumnWithTypeAndName partition_key_column;
+
+    /// There is a task for each destination partition
+    TasksPartition partition_tasks;
+
+    /// Which partitions have been checked for existence
+    /// If some partition from this lists is exists, it is in partition_tasks
+    std::set<String> checked_partitions;
+
+    /// Last CREATE TABLE query of the table of the shard
+    ASTPtr current_pull_table_create_query;
+    ASTPtr current_push_table_create_query;
+
+    /// Internal distributed tables
+    DatabaseAndTableName table_read_shard;
+    DatabaseAndTableName main_table_split_shard;
+    ListOfDatabasesAndTableNames list_of_split_tables_on_shard;
+};
+
+using TaskShardPtr = std::shared_ptr<TaskShard>;
+using TasksShard = std::vector<TaskShardPtr>;
+
+}
--- a/programs/copier/TaskTable.cpp
+++ b/programs/copier/TaskTable.cpp
@ -0,0 +1,221 @@
+#include "TaskTable.h"
+
+#include "ClusterPartition.h"
+#include "TaskCluster.h"
+
+#include <Parsers/ASTFunction.h>
+
+#include <boost/algorithm/string/join.hpp>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
+    extern const int LOGICAL_ERROR;
+}
+
+TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config,
+                     const String & prefix_, const String & table_key)
+        : task_cluster(parent)
+{
+    String table_prefix = prefix_ + "." + table_key + ".";
+
+    name_in_config = table_key;
+
+    number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3);
+
+    allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false);
+    allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false);
+
+    cluster_pull_name = config.getString(table_prefix + "cluster_pull");
+    cluster_push_name = config.getString(table_prefix + "cluster_push");
+
+    table_pull.first = config.getString(table_prefix + "database_pull");
+    table_pull.second = config.getString(table_prefix + "table_pull");
+
+    table_push.first = config.getString(table_prefix + "database_push");
+    table_push.second = config.getString(table_prefix + "table_push");
+
+    /// Used as node name in ZooKeeper
+    table_id = escapeForFileName(cluster_push_name)
+               + "." + escapeForFileName(table_push.first)
+               + "." + escapeForFileName(table_push.second);
+
+    engine_push_str = config.getString(table_prefix + "engine", "rand()");
+
+    {
+        ParserStorage parser_storage;
+        engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+        engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
+        primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
+        is_replicated_table = isReplicatedTableEngine(engine_push_ast);
+    }
+
+    sharding_key_str = config.getString(table_prefix + "sharding_key");
+
+    auxiliary_engine_split_asts.reserve(number_of_splits);
+    {
+        ParserExpressionWithOptionalAlias parser_expression(false);
+        sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+        main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
+                                                            sharding_key_ast);
+
+        for (const auto piece_number : collections::range(0, number_of_splits))
+        {
+            auxiliary_engine_split_asts.emplace_back
+                    (
+                            createASTStorageDistributed(cluster_push_name, table_push.first,
+                                                        table_push.second + "_piece_" + toString(piece_number), sharding_key_ast)
+                    );
+        }
+    }
+
+    where_condition_str = config.getString(table_prefix + "where_condition", "");
+    if (!where_condition_str.empty())
+    {
+        ParserExpressionWithOptionalAlias parser_expression(false);
+        where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+
+        // Will use canonical expression form
+        where_condition_str = queryToString(where_condition_ast);
+    }
+
+    String enabled_partitions_prefix = table_prefix + "enabled_partitions";
+    has_enabled_partitions = config.has(enabled_partitions_prefix);
+
+    if (has_enabled_partitions)
+    {
+        Strings keys;
+        config.keys(enabled_partitions_prefix, keys);
+
+        if (keys.empty())
+        {
+            /// Parse list of partition from space-separated string
+            String partitions_str = config.getString(table_prefix + "enabled_partitions");
+            boost::trim_if(partitions_str, isWhitespaceASCII);
+            boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
+        }
+        else
+        {
+            /// Parse sequence of <partition>...</partition>
+            for (const String &key : keys)
+            {
+                if (!startsWith(key, "partition"))
+                    throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+
+                enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
+            }
+        }
+
+        std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
+    }
+}
+
+
+String TaskTable::getPartitionPath(const String & partition_name) const
+{
+    return task_cluster.task_zookeeper_path             // root
+           + "/tables/" + table_id                      // tables/dst_cluster.merge.hits
+           + "/" + escapeForFileName(partition_name);   // 201701
+}
+
+String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const
+{
+    return getPartitionPath(partition_name) + "/attach_active";
+}
+
+String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const
+{
+    return getPartitionPath(partition_name) + "/attach_is_done";
+}
+
+String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const
+{
+    assert(piece_number < number_of_splits);
+    return getPartitionPath(partition_name) + "/piece_" + toString(piece_number);  // 1...number_of_splits
+}
+
+String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const
+{
+    return getPartitionPath(partition_name) + "/is_dirty";
+}
+
+String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const
+{
+    return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty";
+}
+
+String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const
+{
+    return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned";
+}
+
+String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const
+{
+    return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned";
+}
+
+String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const
+{
+    return getPartitionPath(partition_name) + "/shards";
+}
+
+String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const
+{
+    return getPartitionPiecePath(partition_name, piece_number) + "/shards";
+}
+
+bool TaskTable::isReplicatedTable() const
+{
+    return is_replicated_table;
+}
+
+String TaskTable::getStatusAllPartitionCount() const
+{
+    return task_cluster.task_zookeeper_path + "/status/all_partitions_count";
+}
+
+String TaskTable::getStatusProcessedPartitionsCount() const
+{
+    return task_cluster.task_zookeeper_path + "/status/processed_partitions_count";
+}
+
+ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const
+{
+    ASTPtr prev_engine_push_ast = engine_push_ast->clone();
+
+    auto & new_storage_ast = prev_engine_push_ast->as<ASTStorage &>();
+    auto & new_engine_ast = new_storage_ast.engine->as<ASTFunction &>();
+
+    /// Remove "Replicated" from name
+    new_engine_ast.name = new_engine_ast.name.substr(10);
+
+    if (new_engine_ast.arguments)
+    {
+        auto & replicated_table_arguments = new_engine_ast.arguments->children;
+
+
+        /// In some cases of Atomic database engine usage ReplicatedMergeTree tables
+        /// could be created without arguments.
+        if (!replicated_table_arguments.empty())
+        {
+            /// Delete first two arguments of Replicated...MergeTree() table.
+            replicated_table_arguments.erase(replicated_table_arguments.begin());
+            replicated_table_arguments.erase(replicated_table_arguments.begin());
+        }
+    }
+
+    return new_storage_ast.clone();
+}
+
+ClusterPartition & TaskTable::getClusterPartition(const String & partition_name)
+{
+    auto it = cluster_partitions.find(partition_name);
+    if (it == cluster_partitions.end())
+        throw Exception("There are no cluster partition " + partition_name + " in " + table_id,
+                        ErrorCodes::LOGICAL_ERROR);
+    return it->second;
+}
+
+}
--- a/programs/copier/TaskTable.h
+++ b/programs/copier/TaskTable.h
@ -0,0 +1,173 @@
+#pragma once
+
+#include "Aliases.h"
+#include "TaskShard.h"
+
+
+namespace DB
+{
+
+struct ClusterPartition;
+struct TaskCluster;
+
+struct TaskTable
+{
+    TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key);
+
+    TaskCluster & task_cluster;
+
+    /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone()
+    /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc.
+
+    String getPartitionPath(const String & partition_name) const;
+
+    String getPartitionAttachIsActivePath(const String & partition_name) const;
+
+    String getPartitionAttachIsDonePath(const String & partition_name) const;
+
+    String getPartitionPiecePath(const String & partition_name, size_t piece_number) const;
+
+    String getCertainPartitionIsDirtyPath(const String & partition_name) const;
+
+    String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const;
+
+    String getCertainPartitionIsCleanedPath(const String & partition_name) const;
+
+    String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const;
+
+    String getCertainPartitionTaskStatusPath(const String & partition_name) const;
+
+    String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const;
+
+    bool isReplicatedTable() const;
+
+    /// These nodes are used for check-status option
+    String getStatusAllPartitionCount() const;
+    String getStatusProcessedPartitionsCount() const;
+
+    /// Partitions will be split into number-of-splits pieces.
+    /// Each piece will be copied independently. (10 by default)
+    size_t number_of_splits;
+
+    bool allow_to_copy_alias_and_materialized_columns{false};
+    bool allow_to_drop_target_partitions{false};
+
+    String name_in_config;
+
+    /// Used as task ID
+    String table_id;
+
+    /// Column names in primary key
+    String primary_key_comma_separated;
+
+    /// Source cluster and table
+    String cluster_pull_name;
+    DatabaseAndTableName table_pull;
+
+    /// Destination cluster and table
+    String cluster_push_name;
+    DatabaseAndTableName table_push;
+
+    /// Storage of destination table
+    /// (tables that are stored on each shard of target cluster)
+    String engine_push_str;
+    ASTPtr engine_push_ast;
+    ASTPtr engine_push_partition_key_ast;
+
+    /// First argument of Replicated...MergeTree()
+    String engine_push_zk_path;
+    bool is_replicated_table;
+
+    ASTPtr rewriteReplicatedCreateQueryToPlain() const;
+
+    /*
+     * A Distributed table definition used to split data
+     * Distributed table will be created on each shard of default
+     * cluster to perform data copying and resharding
+     * */
+    String sharding_key_str;
+    ASTPtr sharding_key_ast;
+    ASTPtr main_engine_split_ast;
+
+    /*
+     * To copy partition piece form one cluster to another we have to use Distributed table.
+     * In case of usage separate table (engine_push) for each partition piece,
+     * we have to use many Distributed tables.
+     * */
+    ASTs auxiliary_engine_split_asts;
+
+    /// Additional WHERE expression to filter input data
+    String where_condition_str;
+    ASTPtr where_condition_ast;
+
+    /// Resolved clusters
+    ClusterPtr cluster_pull;
+    ClusterPtr cluster_push;
+
+    /// Filter partitions that should be copied
+    bool has_enabled_partitions = false;
+    Strings enabled_partitions;
+    NameSet enabled_partitions_set;
+
+    /**
+     * Prioritized list of shards
+     * all_shards contains information about all shards in the table.
+     * So we have to check whether particular shard have current partition or not while processing.
+     */
+    TasksShard all_shards;
+    TasksShard local_shards;
+
+    /// All partitions of the current table.
+    ClusterPartitions cluster_partitions;
+    NameSet finished_cluster_partitions;
+
+    /// Partition names to process in user-specified order
+    Strings ordered_partition_names;
+
+    ClusterPartition & getClusterPartition(const String & partition_name);
+
+    Stopwatch watch;
+    UInt64 bytes_copied = 0;
+    UInt64 rows_copied = 0;
+
+    template <typename RandomEngine>
+    void initShards(RandomEngine &&random_engine);
+};
+
+using TasksTable = std::list<TaskTable>;
+
+
+template<typename RandomEngine>
+inline void TaskTable::initShards(RandomEngine && random_engine)
+{
+    const String & fqdn_name = getFQDNOrHostName();
+    std::uniform_int_distribution<uint8_t> get_urand(0, std::numeric_limits<UInt8>::max());
+
+    // Compute the priority
+    for (const auto & shard_info : cluster_pull->getShardsInfo())
+    {
+        TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
+        const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
+        task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
+
+        all_shards.emplace_back(task_shard);
+    }
+
+    // Sort by priority
+    std::sort(all_shards.begin(), all_shards.end(),
+              [](const TaskShardPtr & lhs, const TaskShardPtr & rhs)
+              {
+                  return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
+              });
+
+    // Cut local shards
+    auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
+                                            [](const TaskShardPtr & lhs, UInt8 is_remote)
+                                            {
+                                                return lhs->priority.is_remote < is_remote;
+                                            });
+
+    local_shards.assign(all_shards.begin(), it_first_remote);
+}
+
+}
--- a/programs/copier/TaskTableAndShard.h
+++ b/programs/copier/TaskTableAndShard.h
@ -1,434 +0,0 @@
-#pragma once
-
-#include "Aliases.h"
-#include "Internals.h"
-#include "ClusterPartition.h"
-
-#include <Core/Defines.h>
-#include <Parsers/ASTFunction.h>
-
-#include <base/map.h>
-#include <boost/algorithm/string/join.hpp>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
-    extern const int LOGICAL_ERROR;
-}
-
-struct TaskShard;
-
-struct TaskTable
-{
-    TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix,
-              const String & table_key);
-
-    TaskCluster & task_cluster;
-
-    /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone()
-    /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc.
-
-    String getPartitionPath(const String & partition_name) const;
-
-    String getPartitionAttachIsActivePath(const String & partition_name) const;
-
-    String getPartitionAttachIsDonePath(const String & partition_name) const;
-
-    String getPartitionPiecePath(const String & partition_name, size_t piece_number) const;
-
-    String getCertainPartitionIsDirtyPath(const String & partition_name) const;
-
-    String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const;
-
-    String getCertainPartitionIsCleanedPath(const String & partition_name) const;
-
-    String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const;
-
-    String getCertainPartitionTaskStatusPath(const String & partition_name) const;
-
-    String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const;
-
-    bool isReplicatedTable() const { return is_replicated_table; }
-
-    /// These nodes are used for check-status option
-    String getStatusAllPartitionCount() const;
-    String getStatusProcessedPartitionsCount() const;
-
-    /// Partitions will be split into number-of-splits pieces.
-    /// Each piece will be copied independently. (10 by default)
-    size_t number_of_splits;
-
-    bool allow_to_copy_alias_and_materialized_columns{false};
-    bool allow_to_drop_target_partitions{false};
-
-    String name_in_config;
-
-    /// Used as task ID
-    String table_id;
-
-    /// Column names in primary key
-    String primary_key_comma_separated;
-
-    /// Source cluster and table
-    String cluster_pull_name;
-    DatabaseAndTableName table_pull;
-
-    /// Destination cluster and table
-    String cluster_push_name;
-    DatabaseAndTableName table_push;
-
-    /// Storage of destination table
-    /// (tables that are stored on each shard of target cluster)
-    String engine_push_str;
-    ASTPtr engine_push_ast;
-    ASTPtr engine_push_partition_key_ast;
-
-    /// First argument of Replicated...MergeTree()
-    String engine_push_zk_path;
-    bool is_replicated_table;
-
-    ASTPtr rewriteReplicatedCreateQueryToPlain() const;
-
-    /*
-     * A Distributed table definition used to split data
-     * Distributed table will be created on each shard of default
-     * cluster to perform data copying and resharding
-     * */
-    String sharding_key_str;
-    ASTPtr sharding_key_ast;
-    ASTPtr main_engine_split_ast;
-
-    /*
-     * To copy partition piece form one cluster to another we have to use Distributed table.
-     * In case of usage separate table (engine_push) for each partition piece,
-     * we have to use many Distributed tables.
-     * */
-    ASTs auxiliary_engine_split_asts;
-
-    /// Additional WHERE expression to filter input data
-    String where_condition_str;
-    ASTPtr where_condition_ast;
-
-    /// Resolved clusters
-    ClusterPtr cluster_pull;
-    ClusterPtr cluster_push;
-
-    /// Filter partitions that should be copied
-    bool has_enabled_partitions = false;
-    Strings enabled_partitions;
-    NameSet enabled_partitions_set;
-
-    /**
-     * Prioritized list of shards
-     * all_shards contains information about all shards in the table.
-     * So we have to check whether particular shard have current partition or not while processing.
-     */
-    TasksShard all_shards;
-    TasksShard local_shards;
-
-    /// All partitions of the current table.
-    ClusterPartitions cluster_partitions;
-    NameSet finished_cluster_partitions;
-
-    /// Partition names to process in user-specified order
-    Strings ordered_partition_names;
-
-    ClusterPartition & getClusterPartition(const String & partition_name)
-    {
-        auto it = cluster_partitions.find(partition_name);
-        if (it == cluster_partitions.end())
-            throw Exception("There are no cluster partition " + partition_name + " in " + table_id,
-                            ErrorCodes::LOGICAL_ERROR);
-        return it->second;
-    }
-
-    Stopwatch watch;
-    UInt64 bytes_copied = 0;
-    UInt64 rows_copied = 0;
-
-    template <typename RandomEngine>
-    void initShards(RandomEngine &&random_engine);
-};
-
-
-struct TaskShard
-{
-    TaskShard(TaskTable & parent, const ShardInfo & info_) : task_table(parent), info(info_)
-    {
-        list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName());
-    }
-
-    TaskTable & task_table;
-
-    ShardInfo info;
-
-    UInt32 numberInCluster() const { return info.shard_num; }
-
-    UInt32 indexInCluster() const { return info.shard_num - 1; }
-
-    String getDescription() const;
-
-    String getHostNameExample() const;
-
-    /// Used to sort clusters by their proximity
-    ShardPriority priority;
-
-    /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
-    ColumnWithTypeAndName partition_key_column;
-
-    /// There is a task for each destination partition
-    TasksPartition partition_tasks;
-
-    /// Which partitions have been checked for existence
-    /// If some partition from this lists is exists, it is in partition_tasks
-    std::set<String> checked_partitions;
-
-    /// Last CREATE TABLE query of the table of the shard
-    ASTPtr current_pull_table_create_query;
-    ASTPtr current_push_table_create_query;
-
-    /// Internal distributed tables
-    DatabaseAndTableName table_read_shard;
-    DatabaseAndTableName main_table_split_shard;
-    ListOfDatabasesAndTableNames list_of_split_tables_on_shard;
-};
-
-
-inline String TaskTable::getPartitionPath(const String & partition_name) const
-{
-    return task_cluster.task_zookeeper_path             // root
-           + "/tables/" + table_id                      // tables/dst_cluster.merge.hits
-           + "/" + escapeForFileName(partition_name);   // 201701
-}
-
-inline String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const
-{
-    return getPartitionPath(partition_name) + "/attach_active";
-}
-
-inline String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const
-{
-    return getPartitionPath(partition_name) + "/attach_is_done";
-}
-
-inline String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const
-{
-    assert(piece_number < number_of_splits);
-    return getPartitionPath(partition_name) + "/piece_" + toString(piece_number);  // 1...number_of_splits
-}
-
-inline String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const
-{
-    return getPartitionPath(partition_name) + "/is_dirty";
-}
-
-inline String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const
-{
-    return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty";
-}
-
-inline String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const
-{
-    return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned";
-}
-
-inline String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const
-{
-    return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned";
-}
-
-inline String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const
-{
-    return getPartitionPath(partition_name) + "/shards";
-}
-
-inline String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const
-{
-    return getPartitionPiecePath(partition_name, piece_number) + "/shards";
-}
-
-inline String TaskTable::getStatusAllPartitionCount() const
-{
-    return task_cluster.task_zookeeper_path + "/status/all_partitions_count";
-}
-
-inline String TaskTable::getStatusProcessedPartitionsCount() const
-{
-    return task_cluster.task_zookeeper_path + "/status/processed_partitions_count";
-}
-
-inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config,
-                     const String & prefix_, const String & table_key)
-        : task_cluster(parent)
-{
-    String table_prefix = prefix_ + "." + table_key + ".";
-
-    name_in_config = table_key;
-
-    number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3);
-
-    allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false);
-    allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false);
-
-    cluster_pull_name = config.getString(table_prefix + "cluster_pull");
-    cluster_push_name = config.getString(table_prefix + "cluster_push");
-
-    table_pull.first = config.getString(table_prefix + "database_pull");
-    table_pull.second = config.getString(table_prefix + "table_pull");
-
-    table_push.first = config.getString(table_prefix + "database_push");
-    table_push.second = config.getString(table_prefix + "table_push");
-
-    /// Used as node name in ZooKeeper
-    table_id = escapeForFileName(cluster_push_name)
-               + "." + escapeForFileName(table_push.first)
-               + "." + escapeForFileName(table_push.second);
-
-    engine_push_str = config.getString(table_prefix + "engine", "rand()");
-
-    {
-        ParserStorage parser_storage;
-        engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-        engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
-        primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
-        is_replicated_table = isReplicatedTableEngine(engine_push_ast);
-    }
-
-    sharding_key_str = config.getString(table_prefix + "sharding_key");
-
-    auxiliary_engine_split_asts.reserve(number_of_splits);
-    {
-        ParserExpressionWithOptionalAlias parser_expression(false);
-        sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-        main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
-                                                            sharding_key_ast);
-
-        for (const auto piece_number : collections::range(0, number_of_splits))
-        {
-            auxiliary_engine_split_asts.emplace_back
-                    (
-                            createASTStorageDistributed(cluster_push_name, table_push.first,
-                                                        table_push.second + "_piece_" + toString(piece_number), sharding_key_ast)
-                    );
-        }
-    }
-
-    where_condition_str = config.getString(table_prefix + "where_condition", "");
-    if (!where_condition_str.empty())
-    {
-        ParserExpressionWithOptionalAlias parser_expression(false);
-        where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-
-        // Will use canonical expression form
-        where_condition_str = queryToString(where_condition_ast);
-    }
-
-    String enabled_partitions_prefix = table_prefix + "enabled_partitions";
-    has_enabled_partitions = config.has(enabled_partitions_prefix);
-
-    if (has_enabled_partitions)
-    {
-        Strings keys;
-        config.keys(enabled_partitions_prefix, keys);
-
-        if (keys.empty())
-        {
-            /// Parse list of partition from space-separated string
-            String partitions_str = config.getString(table_prefix + "enabled_partitions");
-            boost::trim_if(partitions_str, isWhitespaceASCII);
-            boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
-        }
-        else
-        {
-            /// Parse sequence of <partition>...</partition>
-            for (const String &key : keys)
-            {
-                if (!startsWith(key, "partition"))
-                    throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
-
-                enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
-            }
-        }
-
-        std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
-    }
-}
-
-template<typename RandomEngine>
-inline void TaskTable::initShards(RandomEngine && random_engine)
-{
-    const String & fqdn_name = getFQDNOrHostName();
-    std::uniform_int_distribution<UInt8> get_urand(0, std::numeric_limits<UInt8>::max());
-
-    // Compute the priority
-    for (const auto & shard_info : cluster_pull->getShardsInfo())
-    {
-        TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
-        const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
-        task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
-
-        all_shards.emplace_back(task_shard);
-    }
-
-    // Sort by priority
-    std::sort(all_shards.begin(), all_shards.end(),
-              [](const TaskShardPtr & lhs, const TaskShardPtr & rhs)
-              {
-                  return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
-              });
-
-    // Cut local shards
-    auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
-                                            [](const TaskShardPtr & lhs, UInt8 is_remote)
-                                            {
-                                                return lhs->priority.is_remote < is_remote;
-                                            });
-
-    local_shards.assign(all_shards.begin(), it_first_remote);
-}
-
-inline ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const
-{
-    ASTPtr prev_engine_push_ast = engine_push_ast->clone();
-
-    auto & new_storage_ast = prev_engine_push_ast->as<ASTStorage &>();
-    auto & new_engine_ast = new_storage_ast.engine->as<ASTFunction &>();
-
-    /// Remove "Replicated" from name
-    new_engine_ast.name = new_engine_ast.name.substr(10);
-
-    if (new_engine_ast.arguments)
-    {
-        auto & replicated_table_arguments = new_engine_ast.arguments->children;
-
-
-        /// In some cases of Atomic database engine usage ReplicatedMergeTree tables
-        /// could be created without arguments.
-        if (!replicated_table_arguments.empty())
-        {
-            /// Delete first two arguments of Replicated...MergeTree() table.
-            replicated_table_arguments.erase(replicated_table_arguments.begin());
-            replicated_table_arguments.erase(replicated_table_arguments.begin());
-        }
-    }
-
-    return new_storage_ast.clone();
-}
-
-
-inline String DB::TaskShard::getDescription() const
-{
-    return fmt::format("N{} (having a replica {}, pull table {} of cluster {}",
-                       numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name);
-}
-
-inline String DB::TaskShard::getHostNameExample() const
-{
-    const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
-    return replicas.at(0).readableString();
-}
-
-}
--- a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
+++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
@ -10,23 +10,34 @@ namespace DB
 namespace
 {

-struct QueryTreeNodeHash
+struct QueryTreeNodeWithHash
 {
-    size_t operator()(const IQueryTreeNode * node) const
+    explicit QueryTreeNodeWithHash(const IQueryTreeNode * node_)
+        : node(node_)
+        , hash(node->getTreeHash().first)
+    {}
+
+    const IQueryTreeNode * node = nullptr;
+    size_t hash = 0;
+};
+
+struct QueryTreeNodeWithHashHash
+{
+    size_t operator()(const QueryTreeNodeWithHash & node_with_hash) const
    {
-        return node->getTreeHash().first;
+        return node_with_hash.hash;
    }
 };

-struct QueryTreeNodeEqualTo
+struct QueryTreeNodeWithHashEqualTo
 {
-    size_t operator()(const IQueryTreeNode * lhs_node, const IQueryTreeNode * rhs_node) const
+    bool operator()(const QueryTreeNodeWithHash & lhs_node, const QueryTreeNodeWithHash & rhs_node) const
    {
-        return lhs_node->isEqual(*rhs_node);
+        return lhs_node.hash == rhs_node.hash && lhs_node.node->isEqual(*rhs_node.node);
    }
 };

-using QueryTreeNodeSet = std::unordered_set<const IQueryTreeNode *, QueryTreeNodeHash, QueryTreeNodeEqualTo>;
+using QueryTreeNodeWithHashSet = std::unordered_set<QueryTreeNodeWithHash, QueryTreeNodeWithHashHash, QueryTreeNodeWithHashEqualTo>;

 class OrderByLimitByDuplicateEliminationVisitor : public InDepthQueryTreeVisitor<OrderByLimitByDuplicateEliminationVisitor>
 {
@ -82,7 +93,7 @@ public:
    }

 private:
-    QueryTreeNodeSet unique_expressions_nodes_set;
+    QueryTreeNodeWithHashSet unique_expressions_nodes_set;
 };

 }
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -1621,34 +1621,7 @@ void QueryAnalyzer::validateTableExpressionModifiers(const QueryTreeNodePtr & ta
        table_expression_node->formatASTForErrorMessage(),
        scope.scope_node->formatASTForErrorMessage());

-    if (query_node || union_node)
-    {
-        auto table_expression_modifiers = query_node ? query_node->getTableExpressionModifiers() : union_node->getTableExpressionModifiers();
-
-        if (table_expression_modifiers.has_value())
-        {
-            String table_expression_modifiers_error_message;
-
-            if (table_expression_modifiers->hasFinal())
-            {
-                table_expression_modifiers_error_message += "FINAL";
-
-                if (table_expression_modifiers->hasSampleSizeRatio())
-                    table_expression_modifiers_error_message += ", SAMPLE";
-            }
-            else if (table_expression_modifiers->hasSampleSizeRatio())
-            {
-                table_expression_modifiers_error_message += "SAMPLE";
-            }
-
-            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                "Table expression modifiers {} are not supported for subquery {}. In scope {}",
-                table_expression_modifiers_error_message,
-                table_expression_node->formatASTForErrorMessage(),
-                scope.scope_node->formatASTForErrorMessage());
-        }
-    }
-    else if (table_node || table_function_node)
+    if (table_node || table_function_node)
    {
        auto table_expression_modifiers = table_node ? table_node->getTableExpressionModifiers() : table_function_node->getTableExpressionModifiers();

@ -4661,17 +4634,23 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod

                auto table_expression_modifiers = from_table_identifier.getTableExpressionModifiers();

-                if (auto * resolved_identifier_query_node = resolved_identifier->as<QueryNode>())
+                auto * resolved_identifier_query_node = resolved_identifier->as<QueryNode>();
+                auto * resolved_identifier_union_node = resolved_identifier->as<UnionNode>();
+
+                if (resolved_identifier_query_node || resolved_identifier_union_node)
                {
-                    resolved_identifier_query_node->setIsCTE(false);
+                    if (resolved_identifier_query_node)
+                        resolved_identifier_query_node->setIsCTE(false);
+                    else
+                        resolved_identifier_union_node->setIsCTE(false);
+
                    if (table_expression_modifiers.has_value())
-                        resolved_identifier_query_node->setTableExpressionModifiers(*table_expression_modifiers);
-                }
-                else if (auto * resolved_identifier_union_node = resolved_identifier->as<UnionNode>())
-                {
-                    resolved_identifier_union_node->setIsCTE(false);
-                    if (table_expression_modifiers.has_value())
-                        resolved_identifier_union_node->setTableExpressionModifiers(*table_expression_modifiers);
+                    {
+                        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                            "Table expression modifiers {} are not supported for subquery {}",
+                            table_expression_modifiers->formatForErrorMessage(),
+                            resolved_identifier->formatASTForErrorMessage());
+                    }
                }
                else if (auto * resolved_identifier_table_node = resolved_identifier->as<TableNode>())
                {
--- a/src/Analyzer/QueryNode.cpp
+++ b/src/Analyzer/QueryNode.cpp
@ -74,12 +74,6 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
        buffer << ", constant_value_type: " << constant_value->getType()->getName();
    }

-    if (table_expression_modifiers)
-    {
-        buffer << ", ";
-        table_expression_modifiers->dump(buffer);
-    }
-
    if (hasWith())
    {
        buffer << '\n' << std::string(indent + 2, ' ') << "WITH\n";
@ -195,13 +189,6 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
    else if (!constant_value && rhs_typed.constant_value)
        return false;

-    if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
-        return false;
-    else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
-        return false;
-    else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
-        return false;
-
    return is_subquery == rhs_typed.is_subquery &&
        is_cte == rhs_typed.is_cte &&
        cte_name == rhs_typed.cte_name &&
@ -250,9 +237,6 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
        state.update(constant_value_type_name.size());
        state.update(constant_value_type_name);
    }
-
-    if (table_expression_modifiers)
-        table_expression_modifiers->updateTreeHash(state);
 }

 QueryTreeNodePtr QueryNode::cloneImpl() const
@ -270,7 +254,6 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
    result_query_node->cte_name = cte_name;
    result_query_node->projection_columns = projection_columns;
    result_query_node->constant_value = constant_value;
-    result_query_node->table_expression_modifiers = table_expression_modifiers;

    return result_query_node;
 }
--- a/src/Analyzer/QueryNode.h
+++ b/src/Analyzer/QueryNode.h
@ -176,24 +176,6 @@ public:
        is_group_by_with_grouping_sets = is_group_by_with_grouping_sets_value;
    }

-    /// Return true if query node has table expression modifiers, false otherwise
-    bool hasTableExpressionModifiers() const
-    {
-        return table_expression_modifiers.has_value();
-    }
-
-    /// Get table expression modifiers
-    const std::optional<TableExpressionModifiers> & getTableExpressionModifiers() const
-    {
-        return table_expression_modifiers;
-    }
-
-    /// Set table expression modifiers
-    void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
-    {
-        table_expression_modifiers = std::move(table_expression_modifiers_value);
-    }
-
    /// Returns true if query node WITH section is not empty, false otherwise
    bool hasWith() const
    {
@ -602,7 +584,6 @@ private:
    std::string cte_name;
    NamesAndTypes projection_columns;
    ConstantValuePtr constant_value;
-    std::optional<TableExpressionModifiers> table_expression_modifiers;
    SettingsChanges settings_changes;

    static constexpr size_t with_child_index = 0;
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@ -145,12 +145,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr &
    if (select_lists.children.size() == 1)
        return buildSelectOrUnionExpression(select_lists.children[0], is_subquery, cte_name);

-    auto union_node = std::make_shared<UnionNode>();
+    auto union_node = std::make_shared<UnionNode>(select_with_union_query_typed.union_mode);
    union_node->setIsSubquery(is_subquery);
    union_node->setIsCTE(!cte_name.empty());
    union_node->setCTEName(cte_name);
-    union_node->setUnionMode(select_with_union_query_typed.union_mode);
-    union_node->setUnionModes(select_with_union_query_typed.list_of_modes);
    union_node->setOriginalAST(select_with_union_query);

    size_t select_lists_children_size = select_lists.children.size();
@ -173,23 +171,22 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr
    if (select_lists.size() == 1)
        return buildSelectExpression(select_lists[0], is_subquery, cte_name);

-    auto union_node = std::make_shared<UnionNode>();
-    union_node->setIsSubquery(is_subquery);
-    union_node->setIsCTE(!cte_name.empty());
-    union_node->setCTEName(cte_name);
-
+    SelectUnionMode union_mode;
    if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_ALL)
-        union_node->setUnionMode(SelectUnionMode::INTERSECT_ALL);
+        union_mode = SelectUnionMode::INTERSECT_ALL;
    else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_DISTINCT)
-        union_node->setUnionMode(SelectUnionMode::INTERSECT_DISTINCT);
+        union_mode = SelectUnionMode::INTERSECT_DISTINCT;
    else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_ALL)
-        union_node->setUnionMode(SelectUnionMode::EXCEPT_ALL);
+        union_mode = SelectUnionMode::EXCEPT_ALL;
    else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_DISTINCT)
-        union_node->setUnionMode(SelectUnionMode::EXCEPT_DISTINCT);
+        union_mode = SelectUnionMode::EXCEPT_DISTINCT;
    else
        throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized");

-    union_node->setUnionModes(SelectUnionModes(select_lists.size() - 1, union_node->getUnionMode()));
+    auto union_node = std::make_shared<UnionNode>(union_mode);
+    union_node->setIsSubquery(is_subquery);
+    union_node->setIsCTE(!cte_name.empty());
+    union_node->setCTEName(cte_name);
    union_node->setOriginalAST(select_intersect_except_query);

    size_t select_lists_size = select_lists.size();
@ -676,14 +673,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select

                if (table_expression_modifiers)
                {
-                    if (auto * query_node = node->as<QueryNode>())
-                        query_node->setTableExpressionModifiers(*table_expression_modifiers);
-                    else if (auto * union_node = node->as<UnionNode>())
-                        union_node->setTableExpressionModifiers(*table_expression_modifiers);
-                    else
-                        throw Exception(ErrorCodes::LOGICAL_ERROR,
-                            "Unexpected table expression subquery node. Expected union or query. Actual {}",
-                            node->formatASTForErrorMessage());
+                    throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                        "Table expression modifiers {} are not supported for subquery {}",
+                        table_expression_modifiers->formatForErrorMessage(),
+                        node->formatASTForErrorMessage());
                }

                table_expressions.push_back(std::move(node));
--- a/src/Analyzer/TableExpressionModifiers.cpp
+++ b/src/Analyzer/TableExpressionModifiers.cpp
@ -5,6 +5,7 @@
 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>

 namespace DB
 {
@ -39,4 +40,27 @@ void TableExpressionModifiers::updateTreeHash(SipHash & hash_state) const
    }
 }

+String TableExpressionModifiers::formatForErrorMessage() const
+{
+    WriteBufferFromOwnString buffer;
+    if (has_final)
+        buffer << "FINAL";
+
+    if (sample_size_ratio)
+    {
+        if (has_final)
+            buffer << ' ';
+        buffer << "SAMPLE " << ASTSampleRatio::toString(*sample_size_ratio);
+    }
+
+    if (sample_offset_ratio)
+    {
+        if (has_final || sample_size_ratio)
+            buffer << ' ';
+        buffer << "OFFSET " << ASTSampleRatio::toString(*sample_offset_ratio);
+    }
+
+    return buffer.str();
+}
+
 }
--- a/src/Analyzer/TableExpressionModifiers.h
+++ b/src/Analyzer/TableExpressionModifiers.h
@ -58,6 +58,9 @@ public:
    /// Update tree hash
    void updateTreeHash(SipHash & hash_state) const;

+    /// Format for error message
+    String formatForErrorMessage() const;
+
 private:
    bool has_final = false;
    std::optional<Rational> sample_size_ratio;
--- a/src/Analyzer/UnionNode.cpp
+++ b/src/Analyzer/UnionNode.cpp
@ -30,11 +30,18 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int TYPE_MISMATCH;
+    extern const int BAD_ARGUMENTS;
 }

-UnionNode::UnionNode()
+UnionNode::UnionNode(SelectUnionMode union_mode_)
    : IQueryTreeNode(children_size)
+    , union_mode(union_mode_)
 {
+    if (union_mode == SelectUnionMode::UNION_DEFAULT ||
+        union_mode == SelectUnionMode::EXCEPT_DEFAULT ||
+        union_mode == SelectUnionMode::INTERSECT_DEFAULT)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode {} must be normalized", toString(union_mode));
+
    children[queries_child_index] = std::make_shared<ListNode>();
 }

@ -101,28 +108,8 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
        buffer << ", constant_value_type: " << constant_value->getType()->getName();
    }

-    if (table_expression_modifiers)
-    {
-        buffer << ", ";
-        table_expression_modifiers->dump(buffer);
-    }
-
    buffer << ", union_mode: " << toString(union_mode);

-    size_t union_modes_size = union_modes.size();
-    buffer << '\n' << std::string(indent + 2, ' ') << "UNION MODES " << union_modes_size << '\n';
-
-    for (size_t i = 0; i < union_modes_size; ++i)
-    {
-        buffer << std::string(indent + 4, ' ');
-
-        auto query_union_mode = union_modes[i];
-        buffer << toString(query_union_mode);
-
-        if (i + 1 != union_modes_size)
-            buffer << '\n';
-    }
-
    buffer << '\n' << std::string(indent + 2, ' ') << "QUERIES\n";
    getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4);
 }
@ -137,15 +124,8 @@ bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const
    else if (!constant_value && rhs_typed.constant_value)
        return false;

-    if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
-        return false;
-    else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
-        return false;
-    else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
-        return false;
-
    return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name &&
-        union_mode == rhs_typed.union_mode && union_modes == rhs_typed.union_modes;
+        union_mode == rhs_typed.union_mode;
 }

 void UnionNode::updateTreeHashImpl(HashState & state) const
@ -158,10 +138,6 @@ void UnionNode::updateTreeHashImpl(HashState & state) const

    state.update(static_cast<size_t>(union_mode));

-    state.update(union_modes.size());
-    for (const auto & query_union_mode : union_modes)
-        state.update(static_cast<size_t>(query_union_mode));
-
    if (constant_value)
    {
        auto constant_dump = applyVisitor(FieldVisitorToString(), constant_value->getValue());
@ -172,23 +148,16 @@ void UnionNode::updateTreeHashImpl(HashState & state) const
        state.update(constant_value_type_name.size());
        state.update(constant_value_type_name);
    }
-
-    if (table_expression_modifiers)
-        table_expression_modifiers->updateTreeHash(state);
 }

 QueryTreeNodePtr UnionNode::cloneImpl() const
 {
-    auto result_union_node = std::make_shared<UnionNode>();
+    auto result_union_node = std::make_shared<UnionNode>(union_mode);

    result_union_node->is_subquery = is_subquery;
    result_union_node->is_cte = is_cte;
    result_union_node->cte_name = cte_name;
-    result_union_node->union_mode = union_mode;
-    result_union_node->union_modes = union_modes;
-    result_union_node->union_modes_set = union_modes_set;
    result_union_node->constant_value = constant_value;
-    result_union_node->table_expression_modifiers = table_expression_modifiers;

    return result_union_node;
 }
@ -197,14 +166,7 @@ ASTPtr UnionNode::toASTImpl() const
 {
    auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
    select_with_union_query->union_mode = union_mode;
-
-    if (union_mode != SelectUnionMode::UNION_DEFAULT &&
-        union_mode != SelectUnionMode::EXCEPT_DEFAULT &&
-        union_mode != SelectUnionMode::INTERSECT_DEFAULT)
-        select_with_union_query->is_normalized = true;
-
-    select_with_union_query->list_of_modes = union_modes;
-    select_with_union_query->set_of_modes = union_modes_set;
+    select_with_union_query->is_normalized = true;
    select_with_union_query->children.push_back(getQueriesNode()->toAST());
    select_with_union_query->list_of_selects = select_with_union_query->children.back();

--- a/src/Analyzer/UnionNode.h
+++ b/src/Analyzer/UnionNode.h
@ -19,6 +19,7 @@ namespace ErrorCodes
 }

 /** Union node represents union of queries in query tree.
+  * Union node must be initialized with normalized union mode.
  *
  * Example: (SELECT id FROM test_table) UNION ALL (SELECT id FROM test_table_2);
  * Example: (SELECT id FROM test_table) UNION DISTINCT (SELECT id FROM test_table_2);
@ -41,7 +42,8 @@ using UnionNodePtr = std::shared_ptr<UnionNode>;
 class UnionNode final : public IQueryTreeNode
 {
 public:
-    explicit UnionNode();
+    /// Construct union node with normalized union mode
+    explicit UnionNode(SelectUnionMode union_mode_);

    /// Returns true if union node is subquery, false otherwise
    bool isSubquery() const
@ -85,25 +87,6 @@ public:
        return union_mode;
    }

-    /// Set union mode value
-    void setUnionMode(SelectUnionMode union_mode_value)
-    {
-        union_mode = union_mode_value;
-    }
-
-    /// Get union modes
-    const SelectUnionModes & getUnionModes() const
-    {
-        return union_modes;
-    }
-
-    /// Set union modes value
-    void setUnionModes(const SelectUnionModes & union_modes_value)
-    {
-        union_modes = union_modes_value;
-        union_modes_set = SelectUnionModesSet(union_modes.begin(), union_modes.end());
-    }
-
    /// Get union node queries
    const ListNode & getQueries() const
    {
@ -128,24 +111,6 @@ public:
        return children[queries_child_index];
    }

-    /// Return true if union node has table expression modifiers, false otherwise
-    bool hasTableExpressionModifiers() const
-    {
-        return table_expression_modifiers.has_value();
-    }
-
-    /// Get table expression modifiers
-    const std::optional<TableExpressionModifiers> & getTableExpressionModifiers() const
-    {
-        return table_expression_modifiers;
-    }
-
-    /// Set table expression modifiers
-    void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
-    {
-        table_expression_modifiers = std::move(table_expression_modifiers_value);
-    }
-
    /// Compute union node projection columns
    NamesAndTypes computeProjectionColumns() const;

@ -189,10 +154,7 @@ private:
    bool is_cte = false;
    std::string cte_name;
    SelectUnionMode union_mode;
-    SelectUnionModes union_modes;
-    SelectUnionModesSet union_modes_set;
    ConstantValuePtr constant_value;
-    std::optional<TableExpressionModifiers> table_expression_modifiers;

    static constexpr size_t queries_child_index = 0;
    static constexpr size_t children_size = queries_child_index + 1;
--- a/src/Analyzer/Utils.cpp
+++ b/src/Analyzer/Utils.cpp
@ -98,11 +98,6 @@ static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expre

    if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
    {
-        if (auto * query_node = table_expression_node->as<QueryNode>())
-            table_expression_modifiers = query_node->getTableExpressionModifiers();
-        else if (auto * union_node = table_expression_node->as<UnionNode>())
-            table_expression_modifiers = union_node->getTableExpressionModifiers();
-
        result_table_expression->subquery = result_table_expression->children.back();
    }
    else if (node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::IDENTIFIER)
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@ -176,6 +176,9 @@ public:

    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;

+    void finalize() override { data->finalize(); }
+    bool isFinalized() const override { return data->isFinalized(); }
+
    bool isCollationSupported() const override { return getData().isCollationSupported(); }

    size_t getNumberOfDimensions() const;
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@ -93,6 +93,8 @@ public:
    bool structureEquals(const IColumn & rhs) const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
+    void finalize() override { nested->finalize(); }
+    bool isFinalized() const override { return nested->isFinalized(); }

    const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }
    ColumnArray & getNestedColumn() { return assert_cast<ColumnArray &>(*nested); }
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@ -732,8 +732,8 @@ void ColumnObject::get(size_t n, Field & res) const
 {
    assert(n < size());
    res = Object();
-
    auto & object = res.get<Object &>();
+
    for (const auto & entry : subcolumns)
    {
        auto it = object.try_emplace(entry->path.getPath()).first;
@ -744,7 +744,6 @@ void ColumnObject::get(size_t n, Field & res) const
 void ColumnObject::insertFrom(const IColumn & src, size_t n)
 {
    insert(src[n]);
-    finalize();
 }

 void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -792,9 +791,8 @@ MutableColumnPtr ColumnObject::applyForSubcolumns(Func && func) const
 {
    if (!isFinalized())
    {
-        auto finalized = IColumn::mutate(getPtr());
+        auto finalized = cloneFinalized();
        auto & finalized_object = assert_cast<ColumnObject &>(*finalized);
-        finalized_object.finalize();
        return finalized_object.applyForSubcolumns(std::forward<Func>(func));
    }

--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@ -198,10 +198,6 @@ public:
    Subcolumns & getSubcolumns() { return subcolumns; }
    PathsInData getKeys() const;

-    /// Finalizes all subcolumns.
-    void finalize();
-    bool isFinalized() const;
-
    /// Part of interface

    const char * getFamilyName() const override { return "Object"; }
@ -219,12 +215,17 @@ public:
    void popBack(size_t length) override;
    Field operator[](size_t n) const override;
    void get(size_t n, Field & res) const override;
+
    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
    ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
    ColumnPtr replicate(const Offsets & offsets) const override;
    MutableColumnPtr cloneResized(size_t new_size) const override;

+    /// Finalizes all subcolumns.
+    void finalize() override;
+    bool isFinalized() const override;
+
    /// Order of rows in ColumnObject is undefined.
    void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
    void compareColumn(const IColumn & rhs, size_t rhs_row_num,
@ -264,9 +265,7 @@ private:
    template <typename Func>
    MutableColumnPtr applyForSubcolumns(Func && func) const;

-    /// For given subcolumn return subcolumn from the same Nested type.
    /// It's used to get shared sized of Nested to insert correct default values.
    const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const;
 };
-
 }
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@ -570,4 +570,15 @@ void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, siz
    return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
 }

+void ColumnTuple::finalize()
+{
+    for (auto & column : columns)
+        column->finalize();
+}
+
+bool ColumnTuple::isFinalized() const
+{
+    return std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column->isFinalized(); });
+}
+
 }
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@ -103,6 +103,8 @@ public:
    ColumnPtr compress() const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
+    void finalize() override;
+    bool isFinalized() const override;

    size_t tupleSize() const { return columns.size(); }

--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@ -85,8 +85,8 @@ public:
    [[nodiscard]] virtual MutablePtr cloneEmpty() const { return cloneResized(0); }

    /// Creates column with the same type and specified size.
-    /// If size is less current size, then data is cut.
-    /// If size is greater, than default values are appended.
+    /// If size is less than current size, then data is cut.
+    /// If size is greater, then default values are appended.
    [[nodiscard]] virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cloneResized() column {}", getName()); }

    /// Returns number of values in column.
@ -453,6 +453,16 @@ public:
        return getPtr();
    }

+    /// Some columns may require finalization before using of other operations.
+    virtual void finalize() {}
+    virtual bool isFinalized() const { return true; }
+
+    MutablePtr cloneFinalized() const
+    {
+        auto finalized = IColumn::mutate(getPtr());
+        finalized->finalize();
+        return finalized;
+    }

    [[nodiscard]] static MutablePtr mutate(Ptr ptr)
    {
--- a/src/Common/CaresPTRResolver.cpp
+++ b/src/Common/CaresPTRResolver.cpp
@ -2,6 +2,7 @@
 #include <arpa/inet.h>
 #include <sys/select.h>
 #include <Common/Exception.h>
+#include <Common/logger_useful.h>
 #include "ares.h"
 #include "netdb.h"

@ -40,6 +41,8 @@ namespace DB
        }
    }

+    std::mutex CaresPTRResolver::mutex;
+
    CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr)
    {
        /*
@ -73,6 +76,8 @@ namespace DB

    std::unordered_set<std::string> CaresPTRResolver::resolve(const std::string & ip)
    {
+        std::lock_guard guard(mutex);
+
        std::unordered_set<std::string> ptr_records;

        resolve(ip, ptr_records);
@ -83,6 +88,8 @@ namespace DB

    std::unordered_set<std::string> CaresPTRResolver::resolve_v6(const std::string & ip)
    {
+        std::lock_guard guard(mutex);
+
        std::unordered_set<std::string> ptr_records;

        resolve_v6(ip, ptr_records);
@ -110,23 +117,83 @@ namespace DB

    void CaresPTRResolver::wait()
    {
-        timeval * tvp, tv;
-        fd_set read_fds;
-        fd_set write_fds;
-        int nfds;
+        int sockets[ARES_GETSOCK_MAXNUM];
+        pollfd pollfd[ARES_GETSOCK_MAXNUM];

-        for (;;)
+        while (true)
        {
-            FD_ZERO(&read_fds);
-            FD_ZERO(&write_fds);
-            nfds = ares_fds(channel, &read_fds,&write_fds);
-            if (nfds == 0)
+            auto readable_sockets = get_readable_sockets(sockets, pollfd);
+            auto timeout = calculate_timeout();
+
+            int number_of_fds_ready = 0;
+            if (!readable_sockets.empty())
+            {
+                number_of_fds_ready = poll(readable_sockets.data(), static_cast<nfds_t>(readable_sockets.size()), static_cast<int>(timeout));
+            }
+
+            if (number_of_fds_ready > 0)
+            {
+                process_readable_sockets(readable_sockets);
+            }
+            else
+            {
+                process_possible_timeout();
+                break;
+            }
+        }
+    }
+
+    std::span<pollfd> CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd)
+    {
+        int sockets_bitmask = ares_getsock(channel, sockets, ARES_GETSOCK_MAXNUM);
+
+        int number_of_sockets_to_poll = 0;
+
+        for (int i = 0; i < ARES_GETSOCK_MAXNUM; i++, number_of_sockets_to_poll++)
+        {
+            pollfd[i].events = 0;
+            pollfd[i].revents = 0;
+
+            if (ARES_GETSOCK_READABLE(sockets_bitmask, i))
+            {
+                pollfd[i].fd = sockets[i];
+                pollfd[i].events = POLLIN;
+            }
+            else
            {
                break;
            }
-            tvp = ares_timeout(channel, nullptr, &tv);
-            select(nfds, &read_fds, &write_fds, nullptr, tvp);
-            ares_process(channel, &read_fds, &write_fds);
+        }
+
+        return std::span<struct pollfd>(pollfd, number_of_sockets_to_poll);
+    }
+
+    int64_t CaresPTRResolver::calculate_timeout()
+    {
+        timeval tv;
+        if (auto * tvp = ares_timeout(channel, nullptr, &tv))
+        {
+            auto timeout = tvp->tv_sec * 1000 + tvp->tv_usec / 1000;
+
+            return timeout;
+        }
+
+        return 0;
+    }
+
+    void CaresPTRResolver::process_possible_timeout()
+    {
+        /* Call ares_process() unconditonally here, even if we simply timed out
+        above, as otherwise the ares name resolve won't timeout! */
+        ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
+    }
+
+    void CaresPTRResolver::process_readable_sockets(std::span<pollfd> readable_sockets)
+    {
+        for (auto readable_socket : readable_sockets)
+        {
+            auto fd = readable_socket.revents & POLLIN ? readable_socket.fd : ARES_SOCKET_BAD;
+            ares_process_fd(channel, fd, ARES_SOCKET_BAD);
        }
    }
 }
--- a/src/Common/CaresPTRResolver.h
+++ b/src/Common/CaresPTRResolver.h
@ -1,5 +1,8 @@
 #pragma once

+#include <span>
+#include <poll.h>
+#include <mutex>
 #include "DNSPTRResolver.h"

 using ares_channel = struct ares_channeldata *;
@ -20,7 +23,6 @@ namespace DB
         * Allow only DNSPTRProvider to instantiate this class
         * */
        struct provider_token {};
-
    public:
        explicit CaresPTRResolver(provider_token);
        ~CaresPTRResolver() override;
@ -36,7 +38,17 @@ namespace DB

        void resolve_v6(const std::string & ip, std::unordered_set<std::string> & response);

+        std::span<pollfd> get_readable_sockets(int * sockets, pollfd * pollfd);
+
+        int64_t calculate_timeout();
+
+        void process_possible_timeout();
+
+        void process_readable_sockets(std::span<pollfd> readable_sockets);
+
        ares_channel channel;
+
+        static std::mutex mutex;
    };
 }

--- a/src/Common/DNSPTRResolverProvider.cpp
+++ b/src/Common/DNSPTRResolverProvider.cpp
@ -5,8 +5,10 @@ namespace DB
 {
    std::shared_ptr<DNSPTRResolver> DNSPTRResolverProvider::get()
    {
-        return std::make_shared<CaresPTRResolver>(
+        static auto resolver = std::make_shared<CaresPTRResolver>(
            CaresPTRResolver::provider_token {}
        );
+
+        return resolver;
    }
 }
--- a/src/Common/JSONParsers/DummyJSONParser.h
+++ b/src/Common/JSONParsers/DummyJSONParser.h
@ -3,6 +3,7 @@
 #include <Common/Exception.h>
 #include <base/types.h>
 #include <base/defines.h>
+#include "ElementTypes.h"


 namespace DB
@ -25,6 +26,7 @@ struct DummyJSONParser
    {
    public:
        Element() = default;
+        static ElementType type() { return ElementType::NULL_VALUE; }
        static bool isInt64() { return false; }
        static bool isUInt64() { return false; }
        static bool isDouble() { return false; }
--- a/src/Common/JSONParsers/ElementTypes.h
+++ b/src/Common/JSONParsers/ElementTypes.h
@ -0,0 +1,17 @@
+#pragma once
+
+namespace DB
+{
+// Enum values match simdjson's for fast conversion
+enum class ElementType
+{
+    ARRAY = '[',
+    OBJECT = '{',
+    INT64 = 'l',
+    UINT64 = 'u',
+    DOUBLE = 'd',
+    STRING = '"',
+    BOOL = 't',
+    NULL_VALUE = 'n'
+};
+}
--- a/src/Common/JSONParsers/RapidJSONParser.h
+++ b/src/Common/JSONParsers/RapidJSONParser.h
@ -6,7 +6,7 @@
 #    include <base/types.h>
 #    include <base/defines.h>
 #    include <rapidjson/document.h>
-
+#    include "ElementTypes.h"

 namespace DB
 {
@ -26,6 +26,20 @@ struct RapidJSONParser
        ALWAYS_INLINE Element() = default;
        ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT

+        ALWAYS_INLINE ElementType type() const
+        {
+            switch (ptr->GetType())
+            {
+                case rapidjson::kNumberType: return ptr->IsDouble() ? ElementType::DOUBLE : (ptr->IsUint64() ? ElementType::UINT64 : ElementType::INT64);
+                case rapidjson::kStringType: return ElementType::STRING;
+                case rapidjson::kArrayType: return ElementType::ARRAY;
+                case rapidjson::kObjectType: return ElementType::OBJECT;
+                case rapidjson::kTrueType: return ElementType::BOOL;
+                case rapidjson::kFalseType: return ElementType::BOOL;
+                case rapidjson::kNullType: return ElementType::NULL_VALUE;
+            }
+        }
+
        ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); }
        ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); }
        ALWAYS_INLINE bool isDouble() const { return ptr->IsDouble(); }
--- a/src/Common/JSONParsers/SimdJSONParser.h
+++ b/src/Common/JSONParsers/SimdJSONParser.h
@ -7,7 +7,7 @@
 #    include <Common/Exception.h>
 #    include <base/defines.h>
 #    include <simdjson.h>
-
+#    include "ElementTypes.h"

 namespace DB
 {
@ -31,6 +31,21 @@ struct SimdJSONParser
        ALWAYS_INLINE Element() {} /// NOLINT
        ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} /// NOLINT

+        ALWAYS_INLINE ElementType type() const
+        {
+            switch (element.type())
+            {
+                case simdjson::dom::element_type::INT64: return ElementType::INT64;
+                case simdjson::dom::element_type::UINT64: return ElementType::UINT64;
+                case simdjson::dom::element_type::DOUBLE: return ElementType::DOUBLE;
+                case simdjson::dom::element_type::STRING: return ElementType::STRING;
+                case simdjson::dom::element_type::ARRAY: return ElementType::ARRAY;
+                case simdjson::dom::element_type::OBJECT: return ElementType::OBJECT;
+                case simdjson::dom::element_type::BOOL: return ElementType::BOOL;
+                case simdjson::dom::element_type::NULL_VALUE: return ElementType::NULL_VALUE;
+            }
+        }
+
        ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
        ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
        ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@ -10,6 +10,7 @@
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
 #include <Common/OvercommitTracker.h>
+#include <Common/Stopwatch.h>
 #include <Common/logger_useful.h>

 #include "config.h"
@ -86,6 +87,8 @@ inline std::string_view toDescription(OvercommitResult result)
 namespace ProfileEvents
 {
    extern const Event QueryMemoryLimitExceeded;
+    extern const Event MemoryAllocatorPurge;
+    extern const Event MemoryAllocatorPurgeTimeMicroseconds;
 }

 using namespace std::chrono_literals;
@ -229,7 +232,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
        {
            if (free_memory_in_allocator_arenas.exchange(-current_free_memory_in_allocator_arenas) > 0)
            {
+                Stopwatch watch;
                mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0);
+                ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge);
+                ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds());
            }
        }

@ -432,7 +438,7 @@ void MemoryTracker::reset()

 void MemoryTracker::setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_)
 {
-    Int64 new_amount = rss_; // - free_memory_in_allocator_arenas_;
+    Int64 new_amount = rss_;
    total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed);
    free_memory_in_allocator_arenas.store(free_memory_in_allocator_arenas_, std::memory_order_relaxed);

--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -229,6 +229,8 @@ The server successfully detected this situation and will download merged part fr
    M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \
    M(SystemTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in OS kernel space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \
    M(MemoryOvercommitWaitTimeMicroseconds, "Total time spent in waiting for memory to be freed in OvercommitTracker.") \
+    M(MemoryAllocatorPurge, "Total number of times memory allocator purge was requested") \
+    M(MemoryAllocatorPurgeTimeMicroseconds, "Total number of times memory allocator purge was requested") \
    M(SoftPageFaults, "The number of soft page faults in query execution threads. Soft page fault usually means a miss in the memory allocator cache which required a new memory mapping from the OS and subsequent allocation of a page of physical memory.") \
    M(HardPageFaults, "The number of hard page faults in query execution threads. High values indicate either that you forgot to turn off swap on your server, or eviction of memory pages of the ClickHouse binary during very high memory pressure, or successful usage of the 'mmap' read method for the tables data.") \
    \
--- a/src/Common/Throttler.cpp
+++ b/src/Common/Throttler.cpp
@ -3,7 +3,6 @@
 #include <Common/Exception.h>
 #include <Common/Stopwatch.h>
 #include <IO/WriteHelpers.h>
-#include <cmath>

 namespace ProfileEvents
 {
@ -21,63 +20,56 @@ namespace ErrorCodes
 /// Just 10^9.
 static constexpr auto NS = 1000000000UL;

-/// Tracking window. Actually the size is not really important. We just want to avoid
-/// throttles when there are no actions for a long period time.
-static const double window_ns = 1ULL * NS;
+static const size_t default_burst_seconds = 1;
+
+Throttler::Throttler(size_t max_speed_, const std::shared_ptr<Throttler> & parent_)
+    : max_speed(max_speed_)
+    , max_burst(max_speed_ * default_burst_seconds)
+    , limit_exceeded_exception_message("")
+    , tokens(max_burst)
+    , parent(parent_)
+{}
+
+Throttler::Throttler(size_t max_speed_, size_t limit_, const char * limit_exceeded_exception_message_,
+            const std::shared_ptr<Throttler> & parent_)
+    : max_speed(max_speed_)
+    , max_burst(max_speed_ * default_burst_seconds)
+    , limit(limit_)
+    , limit_exceeded_exception_message(limit_exceeded_exception_message_)
+    , tokens(max_burst)
+    , parent(parent_)
+{}

 void Throttler::add(size_t amount)
 {
-    size_t new_count;
-    /// This outer variable is always equal to smoothed_speed.
-    /// We use to avoid race condition.
-    double current_speed = 0;
-
+    // Values obtained under lock to be checked after release
+    size_t count_value;
+    double tokens_value;
    {
        std::lock_guard lock(mutex);
-
        auto now = clock_gettime_ns_adjusted(prev_ns);
-        /// If prev_ns is equal to zero (first `add` call) we known nothing about speed
-        /// and don't track anything.
-        if (max_speed && prev_ns != 0)
+        if (max_speed)
        {
-            /// Time spent to process the amount of bytes
-            double time_spent = now - prev_ns;
-
-            /// The speed in bytes per second is equal to amount / time_spent in seconds
-            auto new_speed = amount / (time_spent / NS);
-
-            /// We want to make old values of speed less important for our smoothed value
-            /// so we decay it's value with coef.
-            auto decay_coeff = std::pow(0.5, time_spent / window_ns);
-
-            /// Weighted average between previous and new speed
-            smoothed_speed = smoothed_speed * decay_coeff + (1 - decay_coeff) * new_speed;
-            current_speed = smoothed_speed;
+            double delta_seconds = prev_ns ? static_cast<double>(now - prev_ns) / NS : 0;
+            tokens = std::min<double>(tokens + max_speed * delta_seconds - amount, max_burst);
        }
-
        count += amount;
-        new_count = count;
+        count_value = count;
+        tokens_value = tokens;
        prev_ns = now;
    }

-    if (limit && new_count > limit)
+    if (limit && count_value > limit)
        throw Exception(limit_exceeded_exception_message + std::string(" Maximum: ") + toString(limit), ErrorCodes::LIMIT_EXCEEDED);

-    if (max_speed && current_speed > max_speed)
+    /// Wait unless there is positive amount of tokens - throttling
+    if (max_speed && tokens_value < 0)
    {
-        /// If we was too fast then we have to sleep until our smoothed speed became <= max_speed
-        int64_t sleep_time = static_cast<int64_t>(-window_ns * std::log2(max_speed / current_speed));
-
-        if (sleep_time > 0)
-        {
-            accumulated_sleep += sleep_time;
-
-            sleepForNanoseconds(sleep_time);
-
-            accumulated_sleep -= sleep_time;
-
-            ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_time / 1000UL);
-        }
+        int64_t sleep_time = static_cast<int64_t>(-tokens_value / max_speed * NS);
+        accumulated_sleep += sleep_time;
+        sleepForNanoseconds(sleep_time);
+        accumulated_sleep -= sleep_time;
+        ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_time / 1000UL);
    }

    if (parent)
@ -89,9 +81,9 @@ void Throttler::reset()
    std::lock_guard lock(mutex);

    count = 0;
-    accumulated_sleep = 0;
-    smoothed_speed = 0;
+    tokens = max_burst;
    prev_ns = 0;
+    // NOTE: do not zero `accumulated_sleep` to avoid races
 }

 bool Throttler::isThrottling() const
--- a/Show More
+++ b/Show More