Merge branch 'master' into fix-optimize-aggregate-in-order-with-aliases

2024-11-23 08:02:02 +00:00 · 2023-03-10 21:17:02 +01:00 · 2023-03-10 21:17:02 +01:00 · 91a2f39c0c
commit 91a2f39c0c
parent 2fc5864a11 81380356a3
522 changed files with 10828 additions and 3534 deletions
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -79,7 +79,7 @@ jobs:
        with:
          name: changed_images
          path: ${{ runner.temp }}/changed_images.json
-  CompatibilityCheck:
+  CompatibilityCheckX86:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, style-checker]
    steps:
@ -98,12 +98,43 @@ jobs:
        uses: actions/download-artifact@v3
        with:
          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
        run: |
          sudo rm -fr "$TEMP_PATH"
          mkdir -p "$TEMP_PATH"
          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
      - name: Cleanup
        if: always()
        run: |
@ -421,7 +452,8 @@ jobs:
      - name: Check docker clickhouse/clickhouse-server building
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head --no-push
+          python3 docker_server.py --release-type head --no-push \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
          python3 docker_server.py --release-type head --no-push --no-ubuntu \
            --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
      - name: Cleanup
@ -741,7 +773,8 @@ jobs:
      - FunctionalStatefulTestDebug
      - StressTestTsan
      - IntegrationTestsRelease
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
--- a/.github/workflows/cherry_pick.yml
+++ b/.github/workflows/cherry_pick.yml
@ -35,7 +35,6 @@ jobs:
          fetch-depth: 0
      - name: Cherry pick
        run: |
-          sudo pip install GitPython
          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 cherry_pick.py
      - name: Cleanup
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -110,7 +110,7 @@ jobs:
          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH"
-  CompatibilityCheck:
+  CompatibilityCheckX86:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, style-checker]
    steps:
@ -129,12 +129,43 @@ jobs:
        uses: actions/download-artifact@v3
        with:
          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
        run: |
          sudo rm -fr "$TEMP_PATH"
          mkdir -p "$TEMP_PATH"
          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
      - name: Cleanup
        if: always()
        run: |
@ -829,7 +860,8 @@ jobs:
      - name: Check docker clickhouse/clickhouse-server building
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head
+          python3 docker_server.py --release-type head \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
          python3 docker_server.py --release-type head --no-ubuntu \
            --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
      - name: Cleanup
@ -3124,7 +3156,8 @@ jobs:
      - PerformanceComparisonX86-1
      - PerformanceComparisonX86-2
      - PerformanceComparisonX86-3
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
      - ASTFuzzerTestDebug
      - ASTFuzzerTestAsan
      - ASTFuzzerTestTsan
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -37,7 +37,6 @@ jobs:
          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 run_check.py
  PythonUnitTests:
-    needs: CheckLabels
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
@ -174,7 +173,7 @@ jobs:
          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
-  CompatibilityCheck:
+  CompatibilityCheckX86:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, style-checker]
    steps:
@ -193,12 +192,43 @@ jobs:
        uses: actions/download-artifact@v3
        with:
          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
        run: |
          sudo rm -fr "$TEMP_PATH"
          mkdir -p "$TEMP_PATH"
          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
      - name: Cleanup
        if: always()
        run: |
@ -886,7 +916,8 @@ jobs:
      - name: Check docker clickhouse/clickhouse-server building
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head --no-push
+          python3 docker_server.py --release-type head --no-push \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
          python3 docker_server.py --release-type head --no-push --no-ubuntu \
            --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
      - name: Cleanup
@ -4792,7 +4823,8 @@ jobs:
      - UnitTestsMsan
      - UnitTestsUBsan
      - UnitTestsReleaseClang
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
      - IntegrationTestsFlakyCheck
      - SQLancerTestRelease
      - SQLancerTestDebug
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -7,15 +7,28 @@ on: # yamllint disable-line rule:truthy
  release:
    types:
    - published
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Release tag'
+        required: true
+        type: string

 jobs:
  ReleasePublish:
    runs-on: [self-hosted, style-checker]
    steps:
+    - name: Set tag from input
+      if: github.event_name == 'workflow_dispatch'
+      run: |
+        echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
+    - name: Set tag from REF
+      if: github.event_name == 'release'
+      run: |
+        echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
    - name: Deploy packages and assets
      run: |
-        GITHUB_TAG="${GITHUB_REF#refs/tags/}"
-        curl --silent --data '' \
+        curl --silent --data '' --no-buffer \
          '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
  ############################################################################################
  ##################################### Docker images  #######################################
@ -23,16 +36,26 @@ jobs:
  DockerServerImages:
    runs-on: [self-hosted, style-checker]
    steps:
+    - name: Set tag from input
+      if: github.event_name == 'workflow_dispatch'
+      run: |
+        echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
+    - name: Set tag from REF
+      if: github.event_name == 'release'
+      run: |
+        echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
    - name: Check out repository code
      uses: ClickHouse/checkout@v1
      with:
        clear-repository: true
        fetch-depth: 0  # otherwise we will have no version info
+        ref: ${{ env.GITHUB_TAG }}
    - name: Check docker clickhouse/clickhouse-server building
      run: |
        cd "$GITHUB_WORKSPACE/tests/ci"
-        python3 docker_server.py --release-type auto --version "${{ github.ref }}"
-        python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
+        python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \
+          --image-repo clickhouse/clickhouse-server --image-path docker/server
+        python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --no-ubuntu \
          --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
    - name: Cleanup
      if: always()
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@ -71,7 +71,7 @@ jobs:
        with:
          name: changed_images
          path: ${{ runner.temp }}/changed_images.json
-  CompatibilityCheck:
+  CompatibilityCheckX86:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, style-checker]
    steps:
@ -90,12 +90,43 @@ jobs:
        uses: actions/download-artifact@v3
        with:
          path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
        run: |
          sudo rm -fr "$TEMP_PATH"
          mkdir -p "$TEMP_PATH"
          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
      - name: Cleanup
        if: always()
        run: |
@ -494,7 +525,8 @@ jobs:
      - name: Check docker clickhouse/clickhouse-server building
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head --no-push
+          python3 docker_server.py --release-type head --no-push \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
          python3 docker_server.py --release-type head --no-push --no-ubuntu \
            --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
      - name: Cleanup
@ -1947,7 +1979,8 @@ jobs:
      - IntegrationTestsTsan1
      - IntegrationTestsTsan2
      - IntegrationTestsTsan3
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
--- a/README.md
+++ b/README.md
@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
 * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
 * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
-* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
+* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
 * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
 * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
--- a/base/base/TypeList.h
+++ b/base/base/TypeList.h
@ -4,7 +4,6 @@
 #include <type_traits>
 #include <utility>
 #include "defines.h"
-#include "TypePair.h"

 /// General-purpose typelist. Easy on compilation times as it does not use recursion.
 template <typename ...Args>
@ -28,7 +27,7 @@ namespace TypeListUtils /// In some contexts it's more handy to use functions in
    constexpr Root<Args...> changeRoot(TypeList<Args...>) { return {}; }

    template <typename F, typename ...Args>
-    constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(Id<Args>{}), ...); }
+    constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(TypeList<Args>{}), ...); }
 }

 template <typename TypeListLeft, typename TypeListRight>
--- a/base/base/TypePair.h
+++ b/base/base/TypePair.h
@ -1,4 +0,0 @@
-#pragma once
-
-template <typename T, typename V> struct TypePair {};
-template <typename T> struct Id {};
--- a/base/base/hex.h
+++ b/base/base/hex.h
@ -0,0 +1,214 @@
+#pragma once
+
+#include <cstring>
+#include "types.h"
+
+/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
+
+constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
+constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
+
+constexpr char hexDigitUppercase(unsigned char c)
+{
+    return hex_digit_to_char_uppercase_table[c];
+}
+constexpr char hexDigitLowercase(unsigned char c)
+{
+    return hex_digit_to_char_lowercase_table[c];
+}
+
+/// Maps 0..255 to 00..FF or 00..ff correspondingly
+
+constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
+    "000102030405060708090A0B0C0D0E0F"
+    "101112131415161718191A1B1C1D1E1F"
+    "202122232425262728292A2B2C2D2E2F"
+    "303132333435363738393A3B3C3D3E3F"
+    "404142434445464748494A4B4C4D4E4F"
+    "505152535455565758595A5B5C5D5E5F"
+    "606162636465666768696A6B6C6D6E6F"
+    "707172737475767778797A7B7C7D7E7F"
+    "808182838485868788898A8B8C8D8E8F"
+    "909192939495969798999A9B9C9D9E9F"
+    "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
+    "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
+    "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
+    "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
+    "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
+    "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
+
+constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
+    "000102030405060708090a0b0c0d0e0f"
+    "101112131415161718191a1b1c1d1e1f"
+    "202122232425262728292a2b2c2d2e2f"
+    "303132333435363738393a3b3c3d3e3f"
+    "404142434445464748494a4b4c4d4e4f"
+    "505152535455565758595a5b5c5d5e5f"
+    "606162636465666768696a6b6c6d6e6f"
+    "707172737475767778797a7b7c7d7e7f"
+    "808182838485868788898a8b8c8d8e8f"
+    "909192939495969798999a9b9c9d9e9f"
+    "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
+    "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
+    "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
+    "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
+    "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
+    "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
+
+inline void writeHexByteUppercase(UInt8 byte, void * out)
+{
+    memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
+}
+
+inline void writeHexByteLowercase(UInt8 byte, void * out)
+{
+    memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
+}
+
+constexpr inline std::string_view bin_byte_to_char_table = //
+    "0000000000000001000000100000001100000100000001010000011000000111"
+    "0000100000001001000010100000101100001100000011010000111000001111"
+    "0001000000010001000100100001001100010100000101010001011000010111"
+    "0001100000011001000110100001101100011100000111010001111000011111"
+    "0010000000100001001000100010001100100100001001010010011000100111"
+    "0010100000101001001010100010101100101100001011010010111000101111"
+    "0011000000110001001100100011001100110100001101010011011000110111"
+    "0011100000111001001110100011101100111100001111010011111000111111"
+    "0100000001000001010000100100001101000100010001010100011001000111"
+    "0100100001001001010010100100101101001100010011010100111001001111"
+    "0101000001010001010100100101001101010100010101010101011001010111"
+    "0101100001011001010110100101101101011100010111010101111001011111"
+    "0110000001100001011000100110001101100100011001010110011001100111"
+    "0110100001101001011010100110101101101100011011010110111001101111"
+    "0111000001110001011100100111001101110100011101010111011001110111"
+    "0111100001111001011110100111101101111100011111010111111001111111"
+    "1000000010000001100000101000001110000100100001011000011010000111"
+    "1000100010001001100010101000101110001100100011011000111010001111"
+    "1001000010010001100100101001001110010100100101011001011010010111"
+    "1001100010011001100110101001101110011100100111011001111010011111"
+    "1010000010100001101000101010001110100100101001011010011010100111"
+    "1010100010101001101010101010101110101100101011011010111010101111"
+    "1011000010110001101100101011001110110100101101011011011010110111"
+    "1011100010111001101110101011101110111100101111011011111010111111"
+    "1100000011000001110000101100001111000100110001011100011011000111"
+    "1100100011001001110010101100101111001100110011011100111011001111"
+    "1101000011010001110100101101001111010100110101011101011011010111"
+    "1101100011011001110110101101101111011100110111011101111011011111"
+    "1110000011100001111000101110001111100100111001011110011011100111"
+    "1110100011101001111010101110101111101100111011011110111011101111"
+    "1111000011110001111100101111001111110100111101011111011011110111"
+    "1111100011111001111110101111101111111100111111011111111011111111";
+
+inline void writeBinByte(UInt8 byte, void * out)
+{
+    memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
+}
+
+/// Produces hex representation of an unsigned int with leading zeros (for checksums)
+template <typename TUInt>
+inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
+{
+    union
+    {
+        TUInt value;
+        UInt8 uint8[sizeof(TUInt)];
+    };
+
+    value = uint_;
+
+    for (size_t i = 0; i < sizeof(TUInt); ++i)
+    {
+        if constexpr (std::endian::native == std::endian::little)
+            memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
+        else
+            memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
+    }
+}
+
+template <typename TUInt>
+inline void writeHexUIntUppercase(TUInt uint_, char * out)
+{
+    writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
+}
+
+template <typename TUInt>
+inline void writeHexUIntLowercase(TUInt uint_, char * out)
+{
+    writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
+}
+
+template <typename TUInt>
+std::string getHexUIntUppercase(TUInt uint_)
+{
+    std::string res(sizeof(TUInt) * 2, '\0');
+    writeHexUIntUppercase(uint_, res.data());
+    return res;
+}
+
+template <typename TUInt>
+std::string getHexUIntLowercase(TUInt uint_)
+{
+    std::string res(sizeof(TUInt) * 2, '\0');
+    writeHexUIntLowercase(uint_, res.data());
+    return res;
+}
+
+/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
+
+constexpr inline std::string_view hex_char_to_digit_table
+    = {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
+       "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+       256};
+
+constexpr UInt8 unhex(char c)
+{
+    return hex_char_to_digit_table[static_cast<UInt8>(c)];
+}
+
+constexpr UInt8 unhex2(const char * data)
+{
+    return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
+}
+
+constexpr UInt16 unhex4(const char * data)
+{
+    return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
+        + static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
+}
+
+template <typename TUInt>
+constexpr TUInt unhexUInt(const char * data)
+{
+    TUInt res = 0;
+    if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
+    {
+        for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
+        {
+            res <<= 4;
+            res += unhex(*data);
+        }
+    }
+    else
+    {
+        for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
+        {
+            res <<= 64;
+            res += unhexUInt<UInt64>(data);
+        }
+    }
+    return res;
+}
--- a/base/base/interpolate.h
+++ b/base/base/interpolate.h
@ -0,0 +1,13 @@
+#pragma once
+#include <cassert>
+#include <cmath>
+
+/** Linear interpolation in logarithmic coordinates.
+  * Exponential interpolation is related to linear interpolation
+  * exactly in same way as geometric mean is related to arithmetic mean.
+  */
+constexpr double interpolateExponential(double min, double max, double ratio)
+{
+    assert(min > 0 && ratio >= 0 && ratio <= 1);
+    return min * std::pow(max / min, ratio);
+}
--- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
+++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
@ -91,6 +91,9 @@ namespace MongoDB
        Poco::Net::SocketAddress address() const;
        /// Returns the address of the MongoDB server.
        
+        const std::string & uri() const;
+        /// Returns the uri on which the connection was made.
+
        void connect(const std::string & hostAndPort);
        /// Connects to the given MongoDB server.
        ///
@ -148,6 +151,7 @@ namespace MongoDB
    private:
        Poco::Net::SocketAddress _address;
        Poco::Net::StreamSocket _socket;
+        std::string _uri;
    };


@ -158,6 +162,10 @@ namespace MongoDB
    {
        return _address;
    }
+    inline const std::string & Connection::uri() const
+    {
+    	return _uri;
+    }


 }
--- a/base/poco/MongoDB/src/Connection.cpp
+++ b/base/poco/MongoDB/src/Connection.cpp
@ -145,14 +145,60 @@ void Connection::connect(const Poco::Net::StreamSocket& socket)

 void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
 {
-	Poco::URI theURI(uri);
+    std::vector<std::string> strAddresses;
+    std::string newURI;
+
+    if (uri.find(',') != std::string::npos)
+    {
+        size_t pos;
+        size_t head = 0;
+        if ((pos = uri.find("@")) != std::string::npos)
+        {
+            head = pos + 1;
+        }
+        else if ((pos = uri.find("://")) != std::string::npos)
+        {
+            head = pos + 3;
+        }
+
+        std::string tempstr;
+        std::string::const_iterator it = uri.begin();
+        it += head;
+        size_t tail = head;
+        for (;it != uri.end() && *it != '?' && *it != '/'; ++it)
+        {
+            tempstr += *it;
+            tail++;
+        }
+
+        it = tempstr.begin();
+        std::string token;
+        for (;it != tempstr.end(); ++it)
+        {
+            if (*it == ',')
+            {
+                newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
+                strAddresses.push_back(newURI);
+                token = "";
+            }
+            else
+            {
+                token += *it;
+            }
+        }
+        newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
+        strAddresses.push_back(newURI);
+    }
+    else
+    {
+        strAddresses.push_back(uri);
+    }
+
+    newURI = strAddresses.front();
+    Poco::URI theURI(newURI);
    if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);

    std::string userInfo = theURI.getUserInfo();
-	std::string host = theURI.getHost();
-	Poco::UInt16 port = theURI.getPort();
-	if (port == 0) port = 27017;
-
    std::string databaseName = theURI.getPath();
    if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
    if (databaseName.empty()) databaseName = "admin";
@ -161,6 +207,7 @@ void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
    Poco::Timespan connectTimeout;
    Poco::Timespan socketTimeout;
    std::string authMechanism = Database::AUTH_SCRAM_SHA1;
+    std::string readPreference="primary";

    Poco::URI::QueryParameters params = theURI.getQueryParameters();
    for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
@ -181,16 +228,55 @@ void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
        {
            authMechanism = it->second;
        }
+        else if (it->first == "readPreference")
+        {
+            readPreference= it->second;
+        }
    }

-	connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
+    for (std::vector<std::string>::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it)
+    {
+        newURI = *it;
+        theURI = Poco::URI(newURI);

+        std::string host = theURI.getHost();
+        Poco::UInt16 port = theURI.getPort();
+        if (port == 0) port = 27017;
+
+        connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
+        _uri = newURI;
        if (socketTimeout > 0)
        {
            _socket.setSendTimeout(socketTimeout);
            _socket.setReceiveTimeout(socketTimeout);
        }
+        if (strAddresses.size() > 1)
+        {
+            Poco::MongoDB::QueryRequest request("admin.$cmd");
+            request.setNumberToReturn(1);
+            request.selector().add("isMaster", 1);
+            Poco::MongoDB::ResponseMessage response;

+            sendRequest(request, response);
+            _uri = newURI;
+            if (!response.documents().empty())
+            {
+                Poco::MongoDB::Document::Ptr doc = response.documents()[0];
+                if (doc->get<bool>("ismaster") && readPreference == "primary")
+                {
+                    break;
+                }
+                else if (!doc->get<bool>("ismaster") && readPreference == "secondary")
+                {
+                    break;
+                }
+                else if (it + 1 == strAddresses.cend())
+                {
+                    throw Poco::URISyntaxException(uri);
+                }
+            }
+        }
+    }
    if (!userInfo.empty())
    {
        std::string username;
@ -204,6 +290,7 @@ void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
        else username = userInfo;

        Database database(databaseName);
+
        if (!database.authenticate(*this, username, password, authMechanism))
            throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
    }
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A

 # ARROW_ORC + adapters/orc/CMakefiles
 set(ORC_SRCS
+        "${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h"
+        "${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc"
        "${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
        "${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
        "${ORC_SOURCE_SRC_DIR}/Reader.cc"
@ -129,13 +136,20 @@ set(ORC_SRCS
        "${ORC_SOURCE_SRC_DIR}/MemoryPool.cc"
        "${ORC_SOURCE_SRC_DIR}/RLE.cc"
        "${ORC_SOURCE_SRC_DIR}/RLEv1.cc"
-        "${ORC_SOURCE_SRC_DIR}/RLEv2.cc"
+        "${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc"
+        "${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc"
+        "${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc"
        "${ORC_SOURCE_SRC_DIR}/Statistics.cc"
        "${ORC_SOURCE_SRC_DIR}/StripeStream.cc"
        "${ORC_SOURCE_SRC_DIR}/Timezone.cc"
        "${ORC_SOURCE_SRC_DIR}/TypeImpl.cc"
        "${ORC_SOURCE_SRC_DIR}/Vector.cc"
        "${ORC_SOURCE_SRC_DIR}/Writer.cc"
+        "${ORC_SOURCE_SRC_DIR}/Adaptor.cc"
+        "${ORC_SOURCE_SRC_DIR}/BloomFilter.cc"
+        "${ORC_SOURCE_SRC_DIR}/Murmur3.cc"
+        "${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc"
+        "${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc"
        "${ORC_SOURCE_SRC_DIR}/io/InputStream.cc"
        "${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc"
        "${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
 add_definitions(-DARROW_WITH_ZSTD)
 SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})

+add_definitions(-DARROW_WITH_BROTLI)
+SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
+

 add_library(_arrow ${ARROW_SRCS})

@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE
    ch_contrib::snappy
    ch_contrib::zlib
    ch_contrib::zstd
+    ch_contrib::brotli
 )
 target_link_libraries(_arrow PUBLIC _orc)

--- a/contrib/libfarmhash/CMakeLists.txt
+++ b/contrib/libfarmhash/CMakeLists.txt
@ -6,6 +6,10 @@ if (MSVC)
    target_compile_definitions (_farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1)
 endif ()

+if (ARCH_S390X)
+    add_compile_definitions(WORDS_BIGENDIAN)
+endif ()
+
 target_include_directories (_farmhash BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

 add_library(ch_contrib::farmhash ALIAS _farmhash)
--- a/contrib/orc
+++ b/contrib/orc
@ -1 +1 @@
-Subproject commit f9a393ed2433a60034795284f82d093b348f2102
+Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
    esac

 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.4.12"
 ARG PACKAGES="clickhouse-keeper"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.4.12"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.4.12"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@ -60,6 +60,13 @@ install_packages previous_release_package_folder
 export USE_S3_STORAGE_FOR_MERGE_TREE=1
 # Previous version may not be ready for fault injections
 export ZOOKEEPER_FAULT_INJECTION=0
+
+# force_sync=false doesn't work correctly on some older versions
+sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
+  | sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
+  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
+sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+
 configure

 # But we still need default disk because some tables loaded only into it
@ -161,8 +168,10 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
           -e "Authentication failed" \
           -e "Cannot flush" \
           -e "Container already exists" \
-    /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
-    && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
+    clickhouse-server.upgrade.log \
+    | grep -av -e "_repl_01111_.*Mapping for table with UUID" \
+    | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
+    && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
        >> /test_output/test_results.tsv \
    || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv

--- a/docs/changelogs/v22.12.4.76-stable.md
+++ b/docs/changelogs/v22.12.4.76-stable.md
@ -0,0 +1,55 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02)
+
+#### Performance Improvement
+* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### Bug Fix
+* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v22.8.14.53-lts.md
+++ b/docs/changelogs/v22.8.14.53-lts.md
@ -0,0 +1,40 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18)
+
+#### Performance Improvement
+* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
+* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.1.4.58-stable.md
+++ b/docs/changelogs/v23.1.4.58-stable.md
@ -0,0 +1,47 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce)
+
+#### Performance Improvement
+* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### Improvement
+* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)).
+* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.2.2.20-stable.md
+++ b/docs/changelogs/v23.2.2.20-stable.md
@ -0,0 +1,30 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a)
+
+#### Improvement
+* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+
+#### Bug Fix
+* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)).
+* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.2.3.17-stable.md
+++ b/docs/changelogs/v23.2.3.17-stable.md
@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.2.3.17-stable (dec18bf7281) FIXME as compared to v23.2.2.20-stable (f6c269c8df2)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#46907](https://github.com/ClickHouse/ClickHouse/issues/46907): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#47091](https://github.com/ClickHouse/ClickHouse/issues/47091): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#46885](https://github.com/ClickHouse/ClickHouse/issues/46885): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#47067](https://github.com/ClickHouse/ClickHouse/issues/47067): Fix typo in systemd service, which causes the systemd service start to fail. [#47051](https://github.com/ClickHouse/ClickHouse/pull/47051) ([Palash Goel](https://github.com/palash-goel)).
+* Backported in [#47259](https://github.com/ClickHouse/ClickHouse/issues/47259): Fix concrete columns PREWHERE support. [#47154](https://github.com/ClickHouse/ClickHouse/pull/47154) ([Azat Khuzhin](https://github.com/azat)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* do flushUntrackedMemory when context switches [#47102](https://github.com/ClickHouse/ClickHouse/pull/47102) ([Sema Checherinda](https://github.com/CheSema)).
+* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.2.4.12-stable.md
+++ b/docs/changelogs/v23.2.4.12-stable.md
@ -0,0 +1,20 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.2.4.12-stable (8fe866cb035) FIXME as compared to v23.2.3.17-stable (dec18bf7281)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#47277](https://github.com/ClickHouse/ClickHouse/issues/47277): Fix IPv4/IPv6 serialization/deserialization in binary formats that was broken in https://github.com/ClickHouse/ClickHouse/pull/43221. Closes [#46522](https://github.com/ClickHouse/ClickHouse/issues/46522). [#46616](https://github.com/ClickHouse/ClickHouse/pull/46616) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#47212](https://github.com/ClickHouse/ClickHouse/issues/47212): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#47363](https://github.com/ClickHouse/ClickHouse/issues/47363): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro

 Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.

-IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU.
+IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.

 For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.

--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@ -67,7 +67,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These

 You can also clone the repository via https protocol:

-    git clone --recursive--shallow-submodules https://github.com/ClickHouse/ClickHouse.git
+    git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHouse.git

 This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.

--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@ -19,8 +19,8 @@ Kafka lets you:
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
-    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
-    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
+    name1 [type1],
+    name2 [type2],
    ...
 ) ENGINE = Kafka()
 SETTINGS
@ -113,6 +113,10 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format

 </details>

+:::info
+The Kafka table engine doesn't support columns with [default value](../../../sql-reference/statements/create/table.md#default_value). If you need columns with default value, you can add them at materialized view level (see below).
+:::
+
 ## Description {#description}

 The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -450,10 +450,10 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY

 Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.

-The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
+Indexes of type `set` can be utilized by all functions. The other index types are supported as follows:

 | Function (operator) / Index                                                                                | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
-|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
+|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|
 | [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals)                | ✔           | ✔      | ✔          | ✔          | ✔            |
 | [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals)    | ✔           | ✔      | ✔          | ✔          | ✔            |
 | [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like)                         | ✔           | ✔      | ✔          | ✔          | ✗            |
@ -469,6 +469,9 @@ The `set` index can be used with all functions. Function subsets for other index
 | [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔           | ✔      | ✗          | ✗          | ✗            |
 | [empty](/docs/en/sql-reference/functions/array-functions#function-empty)                                   | ✔           | ✔      | ✗          | ✗          | ✗            |
 | [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty)                             | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [has](/docs/en/sql-reference/functions/array-functions#function-has)                                       | ✗           | ✗      | ✔          | ✔          | ✔            |
+| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny)                                 | ✗           | ✗      | ✗          | ✗          | ✔            |
+| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll)                                 | ✗           | ✗      | ✗          | ✗          | ✔            |
 | hasToken                                                                                                   | ✗           | ✗      | ✗          | ✔          | ✗            |
 | hasTokenOrNull                                                                                             | ✗           | ✗      | ✗          | ✔          | ✗            |
 | hasTokenCaseInsensitive                                                                                    | ✗           | ✗      | ✗          | ✔          | ✗            |
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -1233,7 +1233,7 @@ Each row is formatted as a single document and each column is formatted as a sin
 For output it uses the following correspondence between ClickHouse types and BSON types:

 | ClickHouse type                                                                                                       | BSON Type                                                                                                     |
-|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
+|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
 | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                  | `\x08` boolean                                                                                                |
 | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                           | `\x10` int32                                                                                                  |
 | [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                          | `\x10` int32                                                                                                  |
@ -1256,16 +1256,18 @@ For output it uses the following correspondence between ClickHouse types and BSO
 | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                   | `\x04` array                                                                                                  |
 | [Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                             | `\x03` document                                                                                               |
 | [Map](/docs/en/sql-reference/data-types/map.md) (with String keys)                                                    | `\x03` document                                                                                               |
+| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                             | `\x10` int32                                                                                                  |
+| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             | `\x05` binary, `\x00` binary subtype                                                                          |

 For input it uses the following correspondence between BSON types and ClickHouse types:

 | BSON Type                                | ClickHouse Type                                                                                                                                                                       |
-|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                         |
 | `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
 | `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                             |
 | `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                               |
-| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                    |
+| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       |
 | `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
 | `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
 | `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
@ -1275,7 +1277,7 @@ For input it uses the following correspondence between BSON types and ClickHouse
 | `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                 |
 | `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
 | `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)                                                         |
+| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)     |
 | `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |

 Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
@ -1609,7 +1611,7 @@ See also [Format Schema](#formatschema).
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.

 | CapnProto data type (`INSERT`)   | ClickHouse data type                                                                                                   | CapnProto data type (`SELECT`) |
-|--------------------------------|-----------------------------------------------------------|--------------------------------|
+|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
 | `UINT8`, `BOOL`                  | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                                 | `UINT8`                      |
 | `INT8`                           | [Int8](/docs/en/sql-reference/data-types/int-uint.md)                                                                  | `INT8`                       |
 | `UINT16`                         | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md)             | `UINT16`                     |
@ -1622,9 +1624,11 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `FLOAT64`                        | [Float64](/docs/en/sql-reference/data-types/float.md)                                                                  | `FLOAT64`                    |
 | `TEXT, DATA`                     | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA`                 |
 | `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                               | `union(T, Void), union(Void, T)`                 |
-| `ENUM`                         | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md)        | `ENUM`                         |
+| `ENUM`                           | [Enum(8\                                                                                                               |16)](/docs/en/sql-reference/data-types/enum.md)  | `ENUM`                         |
 | `LIST`                           | [Array](/docs/en/sql-reference/data-types/array.md)                                                                    | `LIST`                       |
 | `STRUCT`                         | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                    | `STRUCT`                     |
+| `UINT32`                         | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                              | `UINT32`                     |
+| `DATA`                           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                              | `DATA`                       |

 For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.

@ -1805,20 +1809,22 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.

 | Avro data type `INSERT`                     | ClickHouse data type                                                                                            | Avro data type `SELECT`                         |
-|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------|
-| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int`                        |
+|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------|
+| `boolean`, `int`, `long`, `float`, `double` | [Int(8\                                                                                                         | 16\                                             |32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int`                        |
 | `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long`                                          |
 | `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md)                                                           | `float`                                         |
 | `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md)                                                           | `double`                                        |
 | `bytes`, `string`, `fixed`, `enum`          | [String](/docs/en/sql-reference/data-types/string.md)                                                           | `bytes` or `string` \*                          |
 | `bytes`, `string`, `fixed`                  | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md)                                              | `fixed(N)`                                      |
-| `enum`                                      | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md)                                                                    | `enum`                       |
+| `enum`                                      | [Enum(8\                                                                                                        | 16)](/docs/en/sql-reference/data-types/enum.md) | `enum`                       |
 | `array(T)`                                  | [Array(T)](/docs/en/sql-reference/data-types/array.md)                                                          | `array(T)`                                      |
 | `union(null, T)`, `union(T, null)`          | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                        | `union(null, T)`                                |
 | `null`                                      | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md)                            | `null`                                          |
 | `int (date)` \**                            | [Date](/docs/en/sql-reference/data-types/date.md)                                                               | `int (date)` \**                                |
 | `long (timestamp-millis)` \**               | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md)                                                  | `long (timestamp-millis)` \*                    |
 | `long (timestamp-micros)` \**               | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md)                                                  | `long (timestamp-micros)` \*                    |
+| `int`                                       | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                       | `int`                                           |
+| `fixed(16)`                                 | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                       | `fixed(16)`                                     |

 \* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
 \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
@ -1919,7 +1925,7 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.

 | Parquet data type (`INSERT`)                       | ClickHouse data type                                            | Parquet data type (`SELECT`) |
-|-----------------------------------------------|-----------------------------------------------------------------|------------------------------|
+|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
 | `BOOL`                                             | [Bool](/docs/en/sql-reference/data-types/boolean.md)            | `BOOL`                       |
 | `UINT8`, `BOOL`                                    | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `UINT8`                      |
 | `INT8`                                             | [Int8](/docs/en/sql-reference/data-types/int-uint.md)           | `INT8`                       |
@ -1940,6 +1946,8 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `LIST`                                             | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                       |
 | `STRUCT`                                           | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                     |
 | `MAP`                                              | [Map](/docs/en/sql-reference/data-types/map.md)                 | `MAP`                        |
+| `UINT32`                                           | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `UINT32`                     |
+| `FIXED_LENGTH_BYTE_ARRAY`                          | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       | `FIXED_LENGTH_BYTE_ARRAY`                     |

 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.

@ -1973,6 +1981,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
 - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
 - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
 - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
+- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.

 ## Arrow {#data-format-arrow}

@ -2007,6 +2016,8 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `LIST`                                  | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                     |
 | `STRUCT`                                | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                   |
 | `MAP`                                   | [Map](/docs/en/sql-reference/data-types/map.md)                 | `MAP`                      |
+| `UINT32`                                | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `UINT32`                   |
+| `FIXED_SIZE_BINARY`, `BINARY`           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       | `FIXED_SIZE_BINARY`        |

 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.

@ -2041,6 +2052,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
 - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
 - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
 - [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`.
+- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`.

 ## ArrowStream {#data-format-arrow-stream}

@ -2055,7 +2067,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.

 | ORC data type (`INSERT`)              | ClickHouse data type                                          | ORC data type (`SELECT`) |
-|---------------------------------------|---------------------------------------------------------|--------------------------|
+|---------------------------------------|---------------------------------------------------------------|--------------------------|
 | `Boolean`                             | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)        | `Boolean`                |
 | `Tinyint`                             | [Int8](/docs/en/sql-reference/data-types/int-uint.md)         | `Tinyint`                |
 | `Smallint`                            | [Int16](/docs/en/sql-reference/data-types/int-uint.md)        | `Smallint`               |
@ -2070,6 +2082,7 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `List`                                | [Array](/docs/en/sql-reference/data-types/array.md)           | `List`                   |
 | `Struct`                              | [Tuple](/docs/en/sql-reference/data-types/tuple.md)           | `Struct`                 |
 | `Map`                                 | [Map](/docs/en/sql-reference/data-types/map.md)               | `Map`                    |
+| `-`                                   | [IPv4](/docs/en/sql-reference/data-types/int-uint.md)         | `Int`                    |

 Other types are not supported.

@ -2096,6 +2109,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
 ### Arrow format settings {#parquet-format-settings}

 - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
+- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
 - [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
 - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
 - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
@ -2265,7 +2279,7 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
 ### Data Types Matching {#data-types-matching-msgpack}

 | MessagePack data type (`INSERT`)                                   | ClickHouse data type                                            | MessagePack data type (`SELECT`)   |
-|--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------|
+|--------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------|
 | `uint N`, `positive fixint`                                        | [UIntN](/docs/en/sql-reference/data-types/int-uint.md)          | `uint N`                           |
 | `int N`, `negative fixint`                                         | [IntN](/docs/en/sql-reference/data-types/int-uint.md)           | `int N`                            |
 | `bool`                                                             | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `uint 8`                           |
@ -2278,6 +2292,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
 | `uint 64`                                                          | [DateTime64](/docs/en/sql-reference/data-types/datetime.md)     | `uint 64`                          |
 | `fixarray`, `array 16`, `array 32`                                 | [Array](/docs/en/sql-reference/data-types/array.md)             | `fixarray`, `array 16`, `array 32` |
 | `fixmap`, `map 16`, `map 32`                                       | [Map](/docs/en/sql-reference/data-types/map.md)                 | `fixmap`, `map 16`, `map 32`       |
+| `uint 32`                                                          | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `uint 32`                          |
+| `bin 8`                                                            | [String](/docs/en/sql-reference/data-types/string.md)           | `bin 8`                            |

 Example:

--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@ -117,7 +117,7 @@ clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM
 4	47	Brayan	['movies','skydiving']
 ```

-# Using structure from insertion table {#using-structure-from-insertion-table}
+## Using structure from insertion table {#using-structure-from-insertion-table}

 When table functions `file/s3/url/hdfs` are used to insert data into a table,
 there is an option to use the structure from the insertion table instead of extracting it from the data.
@ -222,7 +222,7 @@ INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hob

 In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used.

-# Schema inference cache {#schema-inference-cache}
+## Schema inference cache {#schema-inference-cache}

 For most input formats schema inference reads some data to determine its structure and this process can take some time.
 To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
@ -326,14 +326,14 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3'
 └─────────┘
 ```

-# Text formats {#text-formats}
+## Text formats {#text-formats}

 For text formats, ClickHouse reads the data row by row, extracts column values according to the format,
 and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference
 is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000.
 By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section).

-## JSON formats {#json-formats}
+### JSON formats {#json-formats}

 In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them.

@ -464,9 +464,9 @@ most likely this column contains only Nulls or empty Arrays/Maps.
 ...
 ```

-### JSON settings {#json-settings}
+#### JSON settings {#json-settings}

-#### input_format_json_read_objects_as_strings
+##### input_format_json_read_objects_as_strings

 Enabling this setting allows reading nested JSON objects as strings.
 This setting can be used to read nested JSON objects without using JSON object type.
@ -486,7 +486,7 @@ DESC format(JSONEachRow, $$
 └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-#### input_format_json_try_infer_numbers_from_strings
+##### input_format_json_try_infer_numbers_from_strings

 Enabling this setting allows inferring numbers from string values.

@ -507,7 +507,7 @@ DESC format(JSONEachRow, $$
 └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-#### input_format_json_read_numbers_as_strings
+##### input_format_json_read_numbers_as_strings

 Enabling this setting allows reading numeric values as strings.

@ -528,7 +528,7 @@ DESC format(JSONEachRow, $$
 └───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-#### input_format_json_read_bools_as_numbers
+##### input_format_json_read_bools_as_numbers

 Enabling this setting allows reading Bool values as numbers.

@ -549,7 +549,7 @@ DESC format(JSONEachRow, $$
 └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## CSV {#csv}
+### CSV {#csv}

 In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse
 the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number,
@ -726,7 +726,7 @@ $$)
 └──────────────┴───────────────┘
 ```

-## TSV/TSKV {#tsv-tskv}
+### TSV/TSKV {#tsv-tskv}

 In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using
 the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String.
@ -1019,7 +1019,7 @@ DESC format(TSV, '[1,2,3]	42.42	Hello World!')
 └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## CustomSeparated {#custom-separated}
+### CustomSeparated {#custom-separated}

 In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
 the data type for each value according to escaping rule.
@ -1080,7 +1080,7 @@ $$)
 └────────┴───────────────┴────────────┘
 ```

-## Template {#template}
+### Template {#template}

 In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the 
 data type for each value according to its escaping rule.
@ -1120,7 +1120,7 @@ $$)
 └──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## Regexp {#regexp}
+### Regexp {#regexp}

 Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer
 data type for each value according to the specified escaping rule.
@ -1142,9 +1142,9 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$)
 └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## Settings for text formats {settings-for-text-formats}
+### Settings for text formats {#settings-for-text-formats}

-### input_format_max_rows_to_read_for_schema_inference
+#### input_format_max_rows_to_read_for_schema_inference

 This setting controls the maximum number of rows to be read while schema inference.
 The more rows are read, the more time is spent on schema inference, but the greater the chance to
@ -1152,7 +1152,7 @@ correctly determine the types (especially when the data contains a lot of nulls)

 Default value: `25000`.

-### column_names_for_schema_inference
+#### column_names_for_schema_inference

 The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`.

@ -1169,7 +1169,7 @@ DESC format(TSV, 'Hello, World!	42	[1, 2, 3]') settings column_names_for_schema_
 └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-### schema_inference_hints
+#### schema_inference_hints

 The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'.
 This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema.
@ -1189,7 +1189,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
 └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-### schema_inference_make_columns_nullable
+#### schema_inference_make_columns_nullable

 Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
 If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference.
@ -1232,7 +1232,7 @@ DESC format(JSONEachRow, $$
 └─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-### input_format_try_infer_integers
+#### input_format_try_infer_integers

 If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats.
 If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
@ -1289,7 +1289,7 @@ DESC format(JSONEachRow, $$
 └────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-### input_format_try_infer_datetimes
+#### input_format_try_infer_datetimes

 If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats.
 If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`,
@ -1337,7 +1337,7 @@ DESC format(JSONEachRow, $$

 Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)

-### input_format_try_infer_dates
+#### input_format_try_infer_dates

 If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats.
 If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`,
@ -1383,14 +1383,14 @@ DESC format(JSONEachRow, $$
 └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-# Self describing formats {#self-describing-formats}
+## Self describing formats {#self-describing-formats}

 Self-describing formats contain information about the structure of the data in the data itself,
 it can be some header with a description, a binary type tree, or some kind of table.
 To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing
 information about the types and converts it into a schema of the ClickHouse table.

-## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
+### Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}

 ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data.
 While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types.
@ -1412,7 +1412,7 @@ $$)
 └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## JSON formats with metadata {#json-with-metadata}
+### JSON formats with metadata {#json-with-metadata}

 Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types.
 In schema inference for such formats, ClickHouse reads this metadata.
@ -1465,7 +1465,7 @@ $$)
 └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## Avro {#avro}
+### Avro {#avro}

 In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:

@ -1485,7 +1485,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic

 Other Avro types are not supported.

-## Parquet {#parquet}
+### Parquet {#parquet}

 In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:

@ -1513,7 +1513,7 @@ In Parquet format ClickHouse reads its schema from the data and converts it to C

 Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.

-## Arrow {#arrow}
+### Arrow {#arrow}

 In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:

@ -1541,7 +1541,7 @@ In Arrow format ClickHouse reads its schema from the data and converts it to Cli

 Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.

-## ORC {#orc}
+### ORC {#orc}

 In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:

@ -1564,17 +1564,17 @@ In ORC format ClickHouse reads its schema from the data and converts it to Click

 Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.

-## Native {#native}
+### Native {#native}

 Native format is used inside ClickHouse and contains the schema in the data.
 In schema inference, ClickHouse reads the schema from the data without any transformations.

-# Formats with external schema {#formats-with-external-schema}
+## Formats with external schema {#formats-with-external-schema}

 Such formats require a schema describing the data in a separate file in a specific schema language.
 To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema.

-# Protobuf {#protobuf}
+### Protobuf {#protobuf}

 In schema inference for Protobuf format ClickHouse uses the following type matches:

@ -1592,7 +1592,7 @@ In schema inference for Protobuf format ClickHouse uses the following type match
 | `repeated T`                  | [Array(T)](../sql-reference/data-types/array.md)  |
 | `message`, `group`            | [Tuple](../sql-reference/data-types/tuple.md)     |

-# CapnProto {#capnproto}
+### CapnProto {#capnproto}

 In schema inference for CapnProto format ClickHouse uses the following type matches:

@ -1615,13 +1615,13 @@ In schema inference for CapnProto format ClickHouse uses the following type matc
 | `struct`                           | [Tuple](../sql-reference/data-types/tuple.md)          |
 | `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) |

-# Strong-typed binary formats {#strong-typed-binary-formats}
+## Strong-typed binary formats {#strong-typed-binary-formats}

 In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table.
 In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts
 the type (and possibly name) for each value from the data and then converts these types to ClickHouse types.

-## MsgPack {msgpack}
+### MsgPack {#msgpack}

 In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table
 using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches:
@ -1641,7 +1641,7 @@ using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the

 By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.

-## BSONEachRow {#bsoneachrow}
+### BSONEachRow {#bsoneachrow}

 In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts
 values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches:
@ -1661,11 +1661,11 @@ values, names, and types from the data and then transforms these types to ClickH

 By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.

-# Formats with constant schema {#formats-with-constant-schema}
+## Formats with constant schema {#formats-with-constant-schema}

 Data in such formats always have the same schema.

-## LineAsString {#line-as-string}
+### LineAsString {#line-as-string}

 In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`.

@ -1680,7 +1680,7 @@ DESC format(LineAsString, 'Hello\nworld!')
 └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## JSONAsString {#json-as-string}
+### JSONAsString {#json-as-string}

 In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`.

@ -1695,7 +1695,7 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}')
 └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-## JSONAsObject {#json-as-object}
+### JSONAsObject {#json-as-object}

 In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`.

--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -967,6 +967,7 @@ The maximum number of jobs that can be scheduled on the Global Thread pool. Incr
 Possible values:

 -   Positive integer.
+-   0 — No limit.

 Default value: `10000`.

@ -976,6 +977,69 @@ Default value: `10000`.
 <thread_pool_queue_size>12000</thread_pool_queue_size>
 ```

+## max_io_thread_pool_size {#max-io-thread-pool-size}
+
+ClickHouse uses threads from the IO Thread pool to do some IO operations (e.g. to interact with S3). `max_io_thread_pool_size` limits the maximum number of threads in the pool.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `100`.
+
+## max_io_thread_pool_free_size {#max-io-thread-pool-free-size}
+
+If the number of **idle** threads in the IO Thread pool exceeds `max_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `0`.
+
+## io_thread_pool_queue_size {#io-thread-pool-queue-size}
+
+The maximum number of jobs that can be scheduled on the IO Thread pool.
+
+Possible values:
+
+-   Positive integer.
+-   0 — No limit.
+
+Default value: `10000`.
+
+## max_backups_io_thread_pool_size {#max-backups-io-thread-pool-size}
+
+ClickHouse uses threads from the Backups IO Thread pool to do S3 backup IO operations. `max_backups_io_thread_pool_size` limits the maximum number of threads in the pool.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `1000`.
+
+## max_backups_io_thread_pool_free_size {#max-backups-io-thread-pool-free-size}
+
+If the number of **idle** threads in the Backups IO Thread pool exceeds `max_backup_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
+
+Possible values:
+
+-   Positive integer.
+-   Zero. 
+
+Default value: `0`.
+
+## backups_io_thread_pool_queue_size {#backups-io-thread-pool-queue-size}
+
+The maximum number of jobs that can be scheduled on the Backups IO Thread pool. It is recommended to keep this queue unlimited due to the current S3 backup logic.
+
+Possible values:
+
+-   Positive integer.
+-   0 — No limit.
+
+Default value: `0`.
+
 ## background_pool_size {#background_pool_size}

 Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied  at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
@ -1255,11 +1319,11 @@ Settings:
 ``` xml
 <prometheus>
    <endpoint>/metrics</endpoint>
-        <port>8001</port>
+    <port>9363</port>
    <metrics>true</metrics>
    <events>true</events>
    <asynchronous_metrics>true</asynchronous_metrics>
-    </prometheus>
+</prometheus>
 ```

 ## query_log {#server_configuration_parameters-query-log}
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@ -1014,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column

 Enabled by default.

+### output_format_arrow_compression_method {#output_format_arrow_compression_method}
+
+Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
+
+Default value: `none`.
+
 ## ORC format settings {#orc-format-settings}

 ### input_format_orc_import_nested {#input_format_orc_import_nested}
@ -1057,6 +1063,12 @@ Use ORC String type instead of Binary for String columns.

 Disabled by default.

+### output_format_orc_compression_method {#output_format_orc_compression_method}
+
+Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed)
+
+Default value: `none`.
+
 ## Parquet format settings {#parquet-format-settings}

 ### input_format_parquet_import_nested {#input_format_parquet_import_nested}
@ -1112,6 +1124,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`,

 Default value: `2.latest`.

+### output_format_parquet_compression_method {#output_format_parquet_compression_method}
+
+Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed)
+
+Default value: `snappy`.
+
 ## Hive format settings {#hive-format-settings}

 ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
@ -1474,7 +1492,7 @@ Default value: `65505`.

 The name of table that will be used in the output INSERT statement.

-Default value: `'table''`.
+Default value: `table`.

 ### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names}

@ -1514,4 +1532,12 @@ Disabled by default.

 The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit.

-Default value: `1GiB`
+Default value: `1GiB`.
+
+## Native format settings {#native-format-settings}
+
+### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
+
+Allow types conversion in Native input format between columns from input data and requested columns. 
+
+Enabled by default.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -1248,7 +1248,9 @@ Possible values:
 Default value: 1.

 :::warning
-Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas).
+Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
+If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
+If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
 :::

 ## totals_mode {#totals-mode}
@ -1273,16 +1275,47 @@ Default value: `1`.

 **Additional Info**

-This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
+This options will produce different results depending on the settings used.
+
+:::warning
+This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
+:::
+
+### Parallel processing using `SAMPLE` key
+
+A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:

 - The position of the sampling key in the partitioning key does not allow efficient range scans.
 - Adding a sampling key to the table makes filtering by other columns less efficient.
 - The sampling key is an expression that is expensive to calculate.
 - The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.

-:::warning
-This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
-:::
+### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
+
+This setting is useful for any replicated table.
+
+## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
+
+An arbitrary integer expression that can be used to split work between replicas for a specific table.
+The value can be any integer expression.
+A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
+and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
+
+Simple expressions using primary keys are preferred.
+
+If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
+Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
+
+## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
+
+How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
+
+Possible values:
+
+-   `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`.
+-   `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values.
+
+Default value: `default`.

 ## compile_expressions {#compile-expressions}

--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@ -135,11 +135,13 @@ Example of configuration for versions later or equal to 22.8:
            </cache>
        </disks>
        <policies>
+            <s3-cache>
                <volumes>
                    <main>
                        <disk>cache</disk>
                    </main>
                </volumes>
+            </s3-cache>
        <policies>
    </storage_configuration>
 ```
@ -159,11 +161,13 @@ Example of configuration for versions earlier than 22.8:
            </s3>
        </disks>
        <policies>
+            <s3-cache>
                <volumes>
                    <main>
                        <disk>s3</disk>
                    </main>
                </volumes>
+            </s3-cache>
        <policies>
    </storage_configuration>
 ```
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@ -14,10 +14,6 @@ Accepts data that represent tables and queries them using [ClickHouse SQL dialec

 By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument.

-:::warning
-It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
-:::
-
 For temporary data, a unique temporary data directory is created by default.

 ## Usage {#usage}
--- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
@ -11,15 +11,15 @@ sidebar_title: exponentialMovingAverage
 **Syntax**

 ```sql
-exponentialMovingAverage(x)(value, timestamp)
+exponentialMovingAverage(x)(value, timeunit)
 ```

-Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
+Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.

 **Arguments**

 -   `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
-   `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b).

 **Parameters**

@ -148,3 +148,58 @@ Result:
 │     1 │   49 │                0.825 │ █████████████████████████████████████████▎│
 └───────┴──────┴──────────────────────┴────────────────────────────────────────────┘
 ```
+
+```sql
+CREATE TABLE data
+ENGINE = Memory AS
+SELECT
+    10 AS value,
+    toDateTime('2020-01-01') + (3600 * number) AS time
+FROM numbers_mt(10);
+
+
+-- Calculate timeunit using intDiv
+SELECT
+    value,
+    time,
+    exponentialMovingAverage(1)(value, intDiv(toUInt32(time), 3600)) OVER (ORDER BY time ASC) AS res,
+    intDiv(toUInt32(time), 3600) AS timeunit
+FROM data
+ORDER BY time ASC;
+
+┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
+│    10 │ 2020-01-01 00:00:00 │           5 │   438288 │
+│    10 │ 2020-01-01 01:00:00 │         7.5 │   438289 │
+│    10 │ 2020-01-01 02:00:00 │        8.75 │   438290 │
+│    10 │ 2020-01-01 03:00:00 │       9.375 │   438291 │
+│    10 │ 2020-01-01 04:00:00 │      9.6875 │   438292 │
+│    10 │ 2020-01-01 05:00:00 │     9.84375 │   438293 │
+│    10 │ 2020-01-01 06:00:00 │    9.921875 │   438294 │
+│    10 │ 2020-01-01 07:00:00 │   9.9609375 │   438295 │
+│    10 │ 2020-01-01 08:00:00 │  9.98046875 │   438296 │
+│    10 │ 2020-01-01 09:00:00 │ 9.990234375 │   438297 │
+└───────┴─────────────────────┴─────────────┴──────────┘
+
+
+-- Calculate timeunit using toRelativeHourNum
+SELECT
+    value,
+    time,
+    exponentialMovingAverage(1)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC) AS res,
+    toRelativeHourNum(time) AS timeunit
+FROM data
+ORDER BY time ASC;
+
+┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
+│    10 │ 2020-01-01 00:00:00 │           5 │   438288 │
+│    10 │ 2020-01-01 01:00:00 │         7.5 │   438289 │
+│    10 │ 2020-01-01 02:00:00 │        8.75 │   438290 │
+│    10 │ 2020-01-01 03:00:00 │       9.375 │   438291 │
+│    10 │ 2020-01-01 04:00:00 │      9.6875 │   438292 │
+│    10 │ 2020-01-01 05:00:00 │     9.84375 │   438293 │
+│    10 │ 2020-01-01 06:00:00 │    9.921875 │   438294 │
+│    10 │ 2020-01-01 07:00:00 │   9.9609375 │   438295 │
+│    10 │ 2020-01-01 08:00:00 │  9.98046875 │   438296 │
+│    10 │ 2020-01-01 09:00:00 │ 9.990234375 │   438297 │
+└───────┴─────────────────────┴─────────────┴──────────┘
+```
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -1126,15 +1126,48 @@ Rounds the time to the half hour.

 ## toYYYYMM

-Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM).
+Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
+
+### example
+```sql
+SELECT
+    toYYYYMM(now(), 'US/Eastern')
+```
+```response
+┌─toYYYYMM(now(), 'US/Eastern')─┐
+│                        202303 │ 
+└───────────────────────────────┘
+```

 ## toYYYYMMDD

-Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).
+Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
+
+### example
+```sql
+SELECT
+    toYYYYMMDD(now(), 'US/Eastern')
+```
+```response
+┌─toYYYYMMDD(now(), 'US/Eastern')─┐
+│                        20230302 │
+└─────────────────────────────────┘
+```

 ## toYYYYMMDDhhmmss

-Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).
+Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
+
+### example
+```sql
+SELECT
+    toYYYYMMDDhhmmss(now(), 'US/Eastern')
+```
+```response
+┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
+│                        20230302112209 │
+└───────────────────────────────────────┘
+```

 ## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters

--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@ -280,12 +280,20 @@ SELECT

 ## toIPv4OrDefault(string)

-Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0.
+Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4).

 ## toIPv4OrNull(string)

 Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null.

+## toIPv6OrDefault(string)
+
+Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6).
+
+## toIPv6OrNull(string)
+
+Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null.
+
 ## toIPv6

 Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -330,7 +330,7 @@ repeat(s, n)
 **Arguments**

 -   `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
-   `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md).
+-   `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md).

 **Returned value**

--- a/docs/en/sql-reference/operators/in.md
+++ b/docs/en/sql-reference/operators/in.md
@ -233,8 +233,9 @@ If `some_predicate` is not selective enough, it will return large amount of data

 ### Distributed Subqueries and max_parallel_replicas

-When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:
+When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.

+For example, the following:
 ```sql
 SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
 SETTINGS max_parallel_replicas=3
@ -247,8 +248,12 @@ SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM
 SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M
 ```

-where M is between 1 and 3 depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
+where M is between 1 and 3 depending on which replica the local query is executing on.

-Therefore adding the max_parallel_replicas setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
+These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
+
+Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.

 One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
+
+If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour.
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -110,25 +110,23 @@ If the type is not `Nullable` and if `NULL` is specified, it will be treated as

 See also [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable) setting.

-## Default Values
+## Default Values {#default_values}

-The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
+The column description can specify a default value expression in the form of `DEFAULT expr`, `MATERIALIZED expr`, or `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`.

-Example: `URLDomain String DEFAULT domain(URL)`.
+The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns.

-If an expression for the default value is not defined, the default values will be set to zeros for numbers, empty strings for strings, empty arrays for arrays, and `1970-01-01` for dates or zero unix timestamp for DateTime, NULL for Nullable.
+The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.

-If the default expression is defined, the column type is optional. If there isn’t an explicitly defined type, the default expression type is used. Example: `EventDate DEFAULT toDate(EventTime)` – the ‘Date’ type will be used for the ‘EventDate’ column.
+If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`.

-If the data type and default expression are defined explicitly, this expression will be cast to the specified type using type casting functions. Example: `Hits UInt32 DEFAULT 0` means the same thing as `Hits UInt32 DEFAULT toUInt32(0)`.
-
-Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
+A default value expression `expr` may reference arbitrary table columns and constants. ClickHouse checks that changes of the table structure do not introduce loops in the expression calculation. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.

 ### DEFAULT

 `DEFAULT expr`

-Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
+Normal default value. If the value of such a column is not specified in an INSERT query, it is computed from `expr`.

 Example:

@ -154,9 +152,9 @@ SELECT * FROM test;

 `MATERIALIZED expr`

-Materialized expression. Such a column can’t be specified for INSERT, because it is always calculated.
-For an INSERT without a list of columns, these columns are not considered.
-In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
+Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries.
+
+Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`.

 Example:

@ -192,8 +190,9 @@ SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;

 `EPHEMERAL [expr]`

-Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
-INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved -  the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
+Ephemeral column. Columns of this type are not stored in the table and it is not possible to SELECT from them. The only purpose of ephemeral columns is to build default value expressions of other columns from them.
+
+An insert without explicitly specified columns will skip columns of this type. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`.

 Example:

@ -205,7 +204,7 @@ CREATE OR REPLACE TABLE test
    hexed FixedString(4) DEFAULT unhex(unhexed)
 )
 ENGINE = MergeTree
-ORDER BY id
+ORDER BY id;

 INSERT INTO test (id, unhexed) Values (1, '5a90b714');

@ -227,9 +226,9 @@ hex(hexed): 5A90B714

 `ALIAS expr`

-Synonym. Such a column isn’t stored in the table at all.
-Its values can’t be inserted in a table, and it is not substituted when using an asterisk in a SELECT query.
-It can be used in SELECTs if the alias is expanded during query parsing.
+Calculated columns (synonym). Column of this type are not stored in the table and it is not possible to INSERT values into them.
+
+When SELECT queries explicitly reference columns of this type, the value is computed at query time from `expr`. By default, `SELECT *` excludes ALIAS columns. This behavior can be disabled with setting `asteriks_include_alias_columns`.

 When using the ALTER query to add new columns, old data for these columns is not written. Instead, when reading old data that does not have values for the new columns, expressions are computed on the fly by default. However, if running the expressions requires different columns that are not indicated in the query, these columns will additionally be read, but only for the blocks of data that need it.

@ -576,7 +575,7 @@ SELECT * FROM base.t1;
 You can add a comment to the table when you creating it.

 :::note
-The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
+The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
 :::


--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@ -70,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table
 Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.

 Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
+
+Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
+
+By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
+
+Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
 :::

 If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@ -6,21 +6,22 @@ sidebar_label: file

 # file

-Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
+Creates a table from a file. This table function is similar to [url](/docs/en/sql-reference/table-functions/url.md) and [hdfs](/docs/en/sql-reference/table-functions/hdfs.md) ones.

-`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
+`file` function can be used in `SELECT` and `INSERT` queries on data in [File](/docs/en/engines/table-engines/special/file.md) tables.

 **Syntax**

 ``` sql
-file(path [,format] [,structure])
+file(path [,format] [,structure] [,compression])
 ```

 **Parameters**

-   `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
-   `format` — The [format](../../interfaces/formats.md#formats) of the file.
+-   `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
+-   `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
 -   `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
+-   `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query.  The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.

 **Returned value**

@ -53,7 +54,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
 └─────────┴─────────┴─────────┘
 ```

-Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
+Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:

 ``` sql
 SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
@ -143,4 +144,4 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3

 **See Also**

-   [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
+-   [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
--- a/docs/zh/guides/improving-query-performance/skipping-indexes.md
+++ b/docs/zh/guides/improving-query-performance/skipping-indexes.md
@ -1,6 +1,6 @@
 ---
 slug: /zh/guides/improving-query-performance/skipping-indexes
-sidebar_label: Data Skipping Indexes
+sidebar_label: 跳数索引
 sidebar_position: 2
 ---

--- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
+++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
@ -1,6 +1,6 @@
 ---
 slug: /zh/guides/improving-query-performance/sparse-primary-indexes
-sidebar_label: Sparse Primary Indexes
+sidebar_label: 主键稀疏索引
 sidebar_position: 20
 ---

--- a/packages/clickhouse-keeper.service
+++ b/packages/clickhouse-keeper.service
@ -14,7 +14,8 @@ User=clickhouse
 Group=clickhouse
 Restart=always
 RestartSec=30
-RuntimeDirectory=%p  # %p is resolved to the systemd unit name
+# %p is resolved to the systemd unit name
+RuntimeDirectory=%p
 ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
 # Minus means that this file is optional.
 EnvironmentFile=-/etc/default/%p
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@ -1867,8 +1867,8 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
    String query;
    {
        WriteBufferFromOwnString wb;
-        wb << "SELECT DISTINCT " << partition_name << " AS partition FROM"
-           << " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC";
+        wb << "SELECT " << partition_name << " AS partition FROM "
+           << getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC";
        query = wb.str();
    }

--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@ -20,7 +20,7 @@
 #include <Common/formatReadable.h>
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/OpenSSLHelpers.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/getResource.h>
 #include <base/sleep.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@ -128,6 +128,7 @@ if (BUILD_STANDALONE_KEEPER)
            ch_contrib::lz4
            ch_contrib::zstd
            ch_contrib::cityhash
+            ch_contrib::jemalloc
            common ch_contrib::double_conversion
            ch_contrib::dragonbox_to_chars
            pcg_random
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -41,6 +41,7 @@
 #include <Common/TLDListsHolder.h>
 #include <Common/Config/AbstractConfigurationComparison.h>
 #include <Core/ServerUUID.h>
+#include <IO/BackupsIOThreadPool.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/IOThreadPool.h>
@ -773,6 +774,11 @@ try
        server_settings.max_io_thread_pool_free_size,
        server_settings.io_thread_pool_queue_size);

+    BackupsIOThreadPool::initialize(
+        server_settings.max_backups_io_thread_pool_size,
+        server_settings.max_backups_io_thread_pool_free_size,
+        server_settings.backups_io_thread_pool_queue_size);
+
    /// Initialize global local cache for remote filesystem.
    if (config().has("local_cache_for_remote_fs"))
    {
--- a/src/Access/EnabledRoles.h
+++ b/src/Access/EnabledRoles.h
@ -44,10 +44,11 @@ private:
    friend class RoleCache;
    explicit EnabledRoles(const Params & params_);

-    void setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, scope_guard * notifications);
-
    const Params params;

+    /// Called by RoleCache to store `EnabledRolesInfo` in this `EnabledRoles` after the calculation is done.
+    void setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, scope_guard * notifications);
+
    std::shared_ptr<const EnabledRolesInfo> info;
    mutable std::mutex info_mutex;

--- a/src/Access/RoleCache.cpp
+++ b/src/Access/RoleCache.cpp
@ -57,7 +57,9 @@ namespace


 RoleCache::RoleCache(const AccessControl & access_control_)
-    : access_control(access_control_), cache(600000 /* 10 minutes */) {}
+    : access_control(access_control_), cache(600000 /* 10 minutes */)
+{
+}


 RoleCache::~RoleCache() = default;
@ -70,18 +72,18 @@ RoleCache::getEnabledRoles(const std::vector<UUID> & roles, const std::vector<UU
    EnabledRoles::Params params;
    params.current_roles.insert(roles.begin(), roles.end());
    params.current_roles_with_admin_option.insert(roles_with_admin_option.begin(), roles_with_admin_option.end());
-    auto it = enabled_roles.find(params);
-    if (it != enabled_roles.end())
+    auto it = enabled_roles_by_params.find(params);
+    if (it != enabled_roles_by_params.end())
    {
-        auto from_cache = it->second.lock();
-        if (from_cache)
-            return from_cache;
-        enabled_roles.erase(it);
+        if (auto enabled_roles = it->second.enabled_roles.lock())
+            return enabled_roles;
+        enabled_roles_by_params.erase(it);
    }

    auto res = std::shared_ptr<EnabledRoles>(new EnabledRoles(params));
-    collectEnabledRoles(*res, nullptr);
-    enabled_roles.emplace(std::move(params), res);
+    SubscriptionsOnRoles subscriptions_on_roles;
+    collectEnabledRoles(*res, subscriptions_on_roles, nullptr);
+    enabled_roles_by_params.emplace(std::move(params), EnabledRolesWithSubscriptions{res, std::move(subscriptions_on_roles)});
    return res;
 }

@ -90,21 +92,23 @@ void RoleCache::collectEnabledRoles(scope_guard * notifications)
 {
    /// `mutex` is already locked.

-    for (auto i = enabled_roles.begin(), e = enabled_roles.end(); i != e;)
+    for (auto i = enabled_roles_by_params.begin(), e = enabled_roles_by_params.end(); i != e;)
    {
-        auto elem = i->second.lock();
-        if (!elem)
-            i = enabled_roles.erase(i);
+        auto & item = i->second;
+        if (auto enabled_roles = item.enabled_roles.lock())
+        {
+            collectEnabledRoles(*enabled_roles, item.subscriptions_on_roles, notifications);
+            ++i;
+        }
        else
        {
-            collectEnabledRoles(*elem, notifications);
-            ++i;
+            i = enabled_roles_by_params.erase(i);
        }
    }
 }


-void RoleCache::collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifications)
+void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsOnRoles & subscriptions_on_roles, scope_guard * notifications)
 {
    /// `mutex` is already locked.

@ -112,43 +116,57 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifi
    auto new_info = std::make_shared<EnabledRolesInfo>();
    boost::container::flat_set<UUID> skip_ids;

-    auto get_role_function = [this](const UUID & id) { return getRole(id); };
+    /// We need to collect and keep not only enabled roles but also subscriptions for them to be able to recalculate EnabledRolesInfo when some of the roles change.
+    SubscriptionsOnRoles new_subscriptions_on_roles;
+    new_subscriptions_on_roles.reserve(subscriptions_on_roles.size());

-    for (const auto & current_role : enabled.params.current_roles)
+    auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); };
+
+    for (const auto & current_role : enabled_roles.params.current_roles)
        collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false);

-    for (const auto & current_role : enabled.params.current_roles_with_admin_option)
+    for (const auto & current_role : enabled_roles.params.current_roles_with_admin_option)
        collectRoles(*new_info, skip_ids, get_role_function, current_role, true, true);

+    /// Remove duplicates from `subscriptions_on_roles`.
+    std::sort(new_subscriptions_on_roles.begin(), new_subscriptions_on_roles.end());
+    new_subscriptions_on_roles.erase(std::unique(new_subscriptions_on_roles.begin(), new_subscriptions_on_roles.end()), new_subscriptions_on_roles.end());
+    subscriptions_on_roles = std::move(new_subscriptions_on_roles);
+
    /// Collect data from the collected roles.
-    enabled.setRolesInfo(new_info, notifications);
+    enabled_roles.setRolesInfo(new_info, notifications);
 }


-RolePtr RoleCache::getRole(const UUID & role_id)
+RolePtr RoleCache::getRole(const UUID & role_id, SubscriptionsOnRoles & subscriptions_on_roles)
 {
    /// `mutex` is already locked.

    auto role_from_cache = cache.get(role_id);
    if (role_from_cache)
+    {
+        subscriptions_on_roles.emplace_back(role_from_cache->second);
        return role_from_cache->first;
+    }

-    auto subscription = access_control.subscribeForChanges(role_id,
-                                                    [this, role_id](const UUID &, const AccessEntityPtr & entity)
+    auto on_role_changed_or_removed = [this, role_id](const UUID &, const AccessEntityPtr & entity)
    {
        auto changed_role = entity ? typeid_cast<RolePtr>(entity) : nullptr;
        if (changed_role)
            roleChanged(role_id, changed_role);
        else
            roleRemoved(role_id);
-    });
+    };
+
+    auto subscription_on_role = std::make_shared<scope_guard>(access_control.subscribeForChanges(role_id, on_role_changed_or_removed));

    auto role = access_control.tryRead<Role>(role_id);
    if (role)
    {
-        auto cache_value = Poco::SharedPtr<std::pair<RolePtr, scope_guard>>(
-            new std::pair<RolePtr, scope_guard>{role, std::move(subscription)});
+        auto cache_value = Poco::SharedPtr<std::pair<RolePtr, std::shared_ptr<scope_guard>>>(
+            new std::pair<RolePtr, std::shared_ptr<scope_guard>>{role, subscription_on_role});
        cache.add(role_id, cache_value);
+        subscriptions_on_roles.emplace_back(subscription_on_role);
        return role;
    }

@ -162,12 +180,17 @@ void RoleCache::roleChanged(const UUID & role_id, const RolePtr & changed_role)
    scope_guard notifications;

    std::lock_guard lock{mutex};
+
    auto role_from_cache = cache.get(role_id);
-    if (!role_from_cache)
-        return;
+    if (role_from_cache)
+    {
+        /// We update the role stored in a cache entry only if that entry has not expired yet.
        role_from_cache->first = changed_role;
        cache.update(role_id, role_from_cache);
-    collectEnabledRoles(&notifications);
+    }
+
+    /// An enabled role for some users has been changed, we need to recalculate the access rights.
+    collectEnabledRoles(&notifications); /// collectEnabledRoles() must be called with the `mutex` locked.
 }


@ -177,8 +200,12 @@ void RoleCache::roleRemoved(const UUID & role_id)
    scope_guard notifications;

    std::lock_guard lock{mutex};
+
+    /// If a cache entry with the role has expired already, that remove() will do nothing.
    cache.remove(role_id);
-    collectEnabledRoles(&notifications);
+
+    /// An enabled role for some users has been removed, we need to recalculate the access rights.
+    collectEnabledRoles(&notifications); /// collectEnabledRoles() must be called with the `mutex` locked.
 }

 }
--- a/src/Access/RoleCache.h
+++ b/src/Access/RoleCache.h
@ -24,15 +24,29 @@ public:
        const std::vector<UUID> & current_roles_with_admin_option);

 private:
-    void collectEnabledRoles(scope_guard * notifications);
-    void collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifications);
-    RolePtr getRole(const UUID & role_id);
+    using SubscriptionsOnRoles = std::vector<std::shared_ptr<scope_guard>>;
+
+    void collectEnabledRoles(scope_guard * notifications) TSA_REQUIRES(mutex);
+    void collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsOnRoles & subscriptions_on_roles, scope_guard * notifications) TSA_REQUIRES(mutex);
+    RolePtr getRole(const UUID & role_id, SubscriptionsOnRoles & subscriptions_on_roles) TSA_REQUIRES(mutex);
    void roleChanged(const UUID & role_id, const RolePtr & changed_role);
    void roleRemoved(const UUID & role_id);

    const AccessControl & access_control;
-    Poco::AccessExpireCache<UUID, std::pair<RolePtr, scope_guard>> cache;
-    std::map<EnabledRoles::Params, std::weak_ptr<EnabledRoles>> enabled_roles;
+
+    Poco::AccessExpireCache<UUID, std::pair<RolePtr, std::shared_ptr<scope_guard>>> TSA_GUARDED_BY(mutex) cache;
+
+    struct EnabledRolesWithSubscriptions
+    {
+        std::weak_ptr<EnabledRoles> enabled_roles;
+
+        /// We need to keep subscriptions for all enabled roles to be able to recalculate EnabledRolesInfo when some of the roles change.
+        /// `cache` also keeps subscriptions but that's not enough because values can be purged from the `cache` anytime.
+        SubscriptionsOnRoles subscriptions_on_roles;
+    };
+
+    std::map<EnabledRoles::Params, EnabledRolesWithSubscriptions> TSA_GUARDED_BY(mutex) enabled_roles_by_params;
+
    mutable std::mutex mutex;
 };

--- a/src/Analyzer/ColumnNode.h
+++ b/src/Analyzer/ColumnNode.h
@ -3,6 +3,7 @@
 #include <Core/NamesAndTypes.h>

 #include <Analyzer/IQueryTreeNode.h>
+#include <DataTypes/DataTypeNullable.h>

 namespace DB
 {
@ -117,6 +118,11 @@ public:
        return column.type;
    }

+    void convertToNullable() override
+    {
+        column.type = makeNullableSafe(column.type);
+    }
+
    void dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const override;

 protected:
--- a/src/Analyzer/FunctionNode.cpp
+++ b/src/Analyzer/FunctionNode.cpp
@ -99,7 +99,7 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
    buffer << ", function_type: " << function_type;

    if (function)
-        buffer << ", result_type: " + function->getResultType()->getName();
+        buffer << ", result_type: " + getResultType()->getName();

    const auto & parameters = getParameters();
    if (!parameters.getNodes().empty())
@ -177,6 +177,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
      */
    result_function->function = function;
    result_function->kind = kind;
+    result_function->wrap_with_nullable = wrap_with_nullable;

    return result_function;
 }
--- a/src/Analyzer/FunctionNode.h
+++ b/src/Analyzer/FunctionNode.h
@ -8,6 +8,7 @@
 #include <Common/typeid_cast.h>
 #include <Core/ColumnsWithTypeAndName.h>
 #include <Core/IResolvedFunction.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <Functions/IFunction.h>

 namespace DB
@ -187,7 +188,16 @@ public:
            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
                "Function node with name '{}' is not resolved",
                function_name);
-        return function->getResultType();
+        auto type = function->getResultType();
+        if (wrap_with_nullable)
+          return makeNullableSafe(type);
+        return type;
+    }
+
+    void convertToNullable() override
+    {
+        chassert(kind == FunctionKind::ORDINARY);
+        wrap_with_nullable = true;
    }

    void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
@ -205,6 +215,7 @@ private:
    String function_name;
    FunctionKind kind = FunctionKind::UNKNOWN;
    IResolvedFunctionPtr function;
+    bool wrap_with_nullable = false;

    static constexpr size_t parameters_child_index = 0;
    static constexpr size_t arguments_child_index = 1;
--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@ -90,6 +90,11 @@ public:
        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getResultType is not supported for {} query node", getNodeTypeName());
    }

+    virtual void convertToNullable()
+    {
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method convertToNullable is not supported for {} query node", getNodeTypeName());
+    }
+
    struct CompareOptions
    {
        bool compare_aliases = true;
--- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
+++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
@ -1,3 +1,5 @@
+#include <Analyzer/Passes/ArrayExistsToHasPass.h>
+
 #include <Functions/FunctionFactory.h>

 #include <Interpreters/Context.h>
@ -8,15 +10,15 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/LambdaNode.h>

-#include "ArrayExistsToHasPass.h"
-
 namespace DB
 {
+
 namespace
 {
-    class RewriteArrayExistsToHasVisitor : public InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>
-    {
-    public:
+
+class RewriteArrayExistsToHasVisitor : public InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>
+{
+public:
    using Base = InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>;
    using Base::Base;

@ -25,54 +27,68 @@ namespace
        if (!getSettings().optimize_rewrite_array_exists_to_has)
            return;

-            auto * function_node = node->as<FunctionNode>();
-            if (!function_node || function_node->getFunctionName() != "arrayExists")
+        auto * array_exists_function_node = node->as<FunctionNode>();
+        if (!array_exists_function_node || array_exists_function_node->getFunctionName() != "arrayExists")
            return;

-            auto & function_arguments_nodes = function_node->getArguments().getNodes();
-            if (function_arguments_nodes.size() != 2)
+        auto & array_exists_function_arguments_nodes = array_exists_function_node->getArguments().getNodes();
+        if (array_exists_function_arguments_nodes.size() != 2)
            return;

        /// lambda function must be like: x -> x = elem
-            auto * lambda_node = function_arguments_nodes[0]->as<LambdaNode>();
+        auto * lambda_node = array_exists_function_arguments_nodes[0]->as<LambdaNode>();
        if (!lambda_node)
            return;

        auto & lambda_arguments_nodes = lambda_node->getArguments().getNodes();
        if (lambda_arguments_nodes.size() != 1)
            return;
-            auto * column_node = lambda_arguments_nodes[0]->as<ColumnNode>();
+
+        const auto & lambda_argument_column_node = lambda_arguments_nodes[0];
+        if (lambda_argument_column_node->getNodeType() != QueryTreeNodeType::COLUMN)
+            return;

        auto * filter_node = lambda_node->getExpression()->as<FunctionNode>();
        if (!filter_node || filter_node->getFunctionName() != "equals")
            return;

-            auto filter_arguments_nodes = filter_node->getArguments().getNodes();
+        const auto & filter_arguments_nodes = filter_node->getArguments().getNodes();
        if (filter_arguments_nodes.size() != 2)
            return;

-            ColumnNode * filter_column_node = nullptr;
-            if (filter_arguments_nodes[1]->as<ConstantNode>() && (filter_column_node = filter_arguments_nodes[0]->as<ColumnNode>())
-                && filter_column_node->getColumnName() == column_node->getColumnName())
+        const auto & filter_lhs_argument_node = filter_arguments_nodes[0];
+        auto filter_lhs_argument_node_type = filter_lhs_argument_node->getNodeType();
+
+        const auto & filter_rhs_argument_node = filter_arguments_nodes[1];
+        auto filter_rhs_argument_node_type = filter_rhs_argument_node->getNodeType();
+
+        QueryTreeNodePtr has_constant_element_argument;
+
+        if (filter_lhs_argument_node_type == QueryTreeNodeType::COLUMN &&
+            filter_rhs_argument_node_type == QueryTreeNodeType::CONSTANT &&
+            filter_lhs_argument_node->isEqual(*lambda_argument_column_node))
        {
            /// Rewrite arrayExists(x -> x = elem, arr) -> has(arr, elem)
-                function_arguments_nodes[0] = std::move(function_arguments_nodes[1]);
-                function_arguments_nodes[1] = std::move(filter_arguments_nodes[1]);
-                function_node->resolveAsFunction(
-                    FunctionFactory::instance().get("has", getContext())->build(function_node->getArgumentColumns()));
+            has_constant_element_argument = filter_rhs_argument_node;
        }
-            else if (
-                filter_arguments_nodes[0]->as<ConstantNode>() && (filter_column_node = filter_arguments_nodes[1]->as<ColumnNode>())
-                && filter_column_node->getColumnName() == column_node->getColumnName())
+        else if (filter_lhs_argument_node_type == QueryTreeNodeType::CONSTANT &&
+            filter_rhs_argument_node_type == QueryTreeNodeType::COLUMN &&
+            filter_rhs_argument_node->isEqual(*lambda_argument_column_node))
        {
            /// Rewrite arrayExists(x -> elem = x, arr) -> has(arr, elem)
-                function_arguments_nodes[0] = std::move(function_arguments_nodes[1]);
-                function_arguments_nodes[1] = std::move(filter_arguments_nodes[0]);
-                function_node->resolveAsFunction(
-                    FunctionFactory::instance().get("has", getContext())->build(function_node->getArgumentColumns()));
+            has_constant_element_argument = filter_lhs_argument_node;
        }
+        else
+        {
+            return;
        }
-    };
+
+        auto has_function = FunctionFactory::instance().get("has", getContext());
+        array_exists_function_arguments_nodes[0] = std::move(array_exists_function_arguments_nodes[1]);
+        array_exists_function_arguments_nodes[1] = std::move(has_constant_element_argument);
+        array_exists_function_node->resolveAsFunction(has_function->build(array_exists_function_node->getArgumentColumns()));
+    }
+};

 }

--- a/src/Analyzer/Passes/ArrayExistsToHasPass.h
+++ b/src/Analyzer/Passes/ArrayExistsToHasPass.h
@ -4,8 +4,15 @@

 namespace DB
 {
-/// Rewrite possible 'arrayExists(func, arr)' to 'has(arr, elem)' to improve performance
-/// arrayExists(x -> x = 1, arr) -> has(arr, 1)
+
+/** Rewrite possible 'arrayExists(func, arr)' to 'has(arr, elem)' to improve performance.
+  *
+  * Example: SELECT arrayExists(x -> x = 1, arr);
+  * Result: SELECT has(arr, 1);
+  *
+  * Example: SELECT arrayExists(x -> 1 = x, arr);
+  * Result: SELECT has(arr, 1);
+  */
 class RewriteArrayExistsToHasPass final : public IQueryTreePass
 {
 public:
@ -15,4 +22,5 @@ public:

    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
 };
+
 }
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
@ -22,8 +22,7 @@ public:

    void visitImpl(QueryTreeNodePtr & node)
    {
-        const auto & context = getContext();
-        if (!context->getSettingsRef().final)
+        if (!getSettings().final)
            return;

        const auto * query_node = node->as<QueryNode>();
--- a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
@ -0,0 +1,237 @@
+#include <Analyzer/Passes/LogicalExpressionOptimizerPass.h>
+
+#include <Functions/FunctionFactory.h>
+
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/ConstantNode.h>
+#include <Analyzer/HashUtils.h>
+
+#include <DataTypes/DataTypeString.h>
+
+namespace DB
+{
+
+class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<LogicalExpressionOptimizerVisitor>
+{
+public:
+    using Base = InDepthQueryTreeVisitorWithContext<LogicalExpressionOptimizerVisitor>;
+
+    explicit LogicalExpressionOptimizerVisitor(ContextPtr context)
+        : Base(std::move(context))
+    {}
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        auto * function_node = node->as<FunctionNode>();
+
+        if (!function_node)
+            return;
+
+        if (function_node->getFunctionName() == "or")
+        {
+            tryReplaceOrEqualsChainWithIn(node);
+            return;
+        }
+
+        if (function_node->getFunctionName() == "and")
+        {
+            tryReplaceAndEqualsChainsWithConstant(node);
+            return;
+        }
+    }
+private:
+    void tryReplaceAndEqualsChainsWithConstant(QueryTreeNodePtr & node)
+    {
+        auto & function_node = node->as<FunctionNode &>();
+        assert(function_node.getFunctionName() == "and");
+
+        if (function_node.getResultType()->isNullable())
+            return;
+
+        QueryTreeNodes and_operands;
+
+        QueryTreeNodePtrWithHashMap<const ConstantNode *> node_to_constants;
+
+        for (const auto & argument : function_node.getArguments())
+        {
+            auto * argument_function = argument->as<FunctionNode>();
+            if (!argument_function || argument_function->getFunctionName() != "equals")
+            {
+                and_operands.push_back(argument);
+                continue;
+            }
+
+            const auto & equals_arguments = argument_function->getArguments().getNodes();
+            const auto & lhs = equals_arguments[0];
+            const auto & rhs = equals_arguments[1];
+
+            const auto has_and_with_different_constant = [&](const QueryTreeNodePtr & expression, const ConstantNode * constant)
+            {
+                if (auto it = node_to_constants.find(expression); it != node_to_constants.end())
+                {
+                    if (!it->second->isEqual(*constant))
+                        return true;
+                }
+                else
+                {
+                    node_to_constants.emplace(expression, constant);
+                    and_operands.push_back(argument);
+                }
+
+                return false;
+            };
+
+            bool collapse_to_false = false;
+
+            if (const auto * lhs_literal = lhs->as<ConstantNode>())
+                collapse_to_false = has_and_with_different_constant(rhs, lhs_literal);
+            else if (const auto * rhs_literal = rhs->as<ConstantNode>())
+                collapse_to_false = has_and_with_different_constant(lhs, rhs_literal);
+            else
+                and_operands.push_back(argument);
+
+            if (collapse_to_false)
+            {
+                auto false_value = std::make_shared<ConstantValue>(0u, function_node.getResultType());
+                auto false_node = std::make_shared<ConstantNode>(std::move(false_value));
+                node = std::move(false_node);
+                return;
+            }
+        }
+
+        if (and_operands.size() == 1)
+        {
+            /// AND operator can have UInt8 or bool as its type.
+            /// bool is used if a bool constant is at least one operand.
+            /// Because we reduce the number of operands here by eliminating the same equality checks,
+            /// the only situation we can end up here is we had AND check where all the equality checks are the same so we know the type is UInt8.
+            /// Otherwise, we will have > 1 operands and we don't have to do anything.
+            assert(!function_node.getResultType()->isNullable() && and_operands[0]->getResultType()->equals(*function_node.getResultType()));
+            node = std::move(and_operands[0]);
+            return;
+        }
+
+        auto and_function_resolver = FunctionFactory::instance().get("and", getContext());
+        function_node.getArguments().getNodes() = std::move(and_operands);
+        function_node.resolveAsFunction(and_function_resolver);
+    }
+
+    void tryReplaceOrEqualsChainWithIn(QueryTreeNodePtr & node)
+    {
+        auto & function_node = node->as<FunctionNode &>();
+        assert(function_node.getFunctionName() == "or");
+
+        QueryTreeNodes or_operands;
+
+        QueryTreeNodePtrWithHashMap<QueryTreeNodes> node_to_equals_functions;
+        QueryTreeNodePtrWithHashMap<QueryTreeNodeConstRawPtrWithHashSet> node_to_constants;
+
+        for (const auto & argument : function_node.getArguments())
+        {
+            auto * argument_function = argument->as<FunctionNode>();
+            if (!argument_function || argument_function->getFunctionName() != "equals")
+            {
+                or_operands.push_back(argument);
+                continue;
+            }
+
+            /// collect all equality checks (x = value)
+
+            const auto & equals_arguments = argument_function->getArguments().getNodes();
+            const auto & lhs = equals_arguments[0];
+            const auto & rhs = equals_arguments[1];
+
+            const auto add_equals_function_if_not_present = [&](const auto & expression_node, const ConstantNode * constant)
+            {
+                auto & constant_set = node_to_constants[expression_node];
+                if (!constant_set.contains(constant))
+                {
+                    constant_set.insert(constant);
+                    node_to_equals_functions[expression_node].push_back(argument);
+                }
+            };
+
+            if (const auto * lhs_literal = lhs->as<ConstantNode>())
+                add_equals_function_if_not_present(rhs, lhs_literal);
+            else if (const auto * rhs_literal = rhs->as<ConstantNode>())
+                add_equals_function_if_not_present(lhs, rhs_literal);
+            else
+                or_operands.push_back(argument);
+        }
+
+        auto in_function_resolver = FunctionFactory::instance().get("in", getContext());
+
+        for (auto & [expression, equals_functions] : node_to_equals_functions)
+        {
+            const auto & settings = getSettings();
+            if (equals_functions.size() < settings.optimize_min_equality_disjunction_chain_length && !expression.node->getResultType()->lowCardinality())
+            {
+                std::move(equals_functions.begin(), equals_functions.end(), std::back_inserter(or_operands));
+                continue;
+            }
+
+            Tuple args;
+            args.reserve(equals_functions.size());
+            /// first we create tuple from RHS of equals functions
+            for (const auto & equals : equals_functions)
+            {
+                const auto * equals_function = equals->as<FunctionNode>();
+                assert(equals_function && equals_function->getFunctionName() == "equals");
+
+                const auto & equals_arguments = equals_function->getArguments().getNodes();
+                if (const auto * rhs_literal = equals_arguments[1]->as<ConstantNode>())
+                {
+                    args.push_back(rhs_literal->getValue());
+                }
+                else
+                {
+                    const auto * lhs_literal = equals_arguments[0]->as<ConstantNode>();
+                    assert(lhs_literal);
+                    args.push_back(lhs_literal->getValue());
+                }
+            }
+
+            auto rhs_node = std::make_shared<ConstantNode>(std::move(args));
+
+            auto in_function = std::make_shared<FunctionNode>("in");
+
+            QueryTreeNodes in_arguments;
+            in_arguments.reserve(2);
+            in_arguments.push_back(expression.node);
+            in_arguments.push_back(std::move(rhs_node));
+
+            in_function->getArguments().getNodes() = std::move(in_arguments);
+            in_function->resolveAsFunction(in_function_resolver);
+
+            or_operands.push_back(std::move(in_function));
+        }
+
+        if (or_operands.size() == 1)
+        {
+            /// if the result type of operand is the same as the result type of OR
+            /// we can replace OR with the operand
+            if (or_operands[0]->getResultType()->equals(*function_node.getResultType()))
+            {
+                assert(!function_node.getResultType()->isNullable());
+                node = std::move(or_operands[0]);
+                return;
+            }
+
+            /// otherwise add a stub 0 to make OR correct
+            or_operands.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0)));
+        }
+
+        auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
+        function_node.getArguments().getNodes() = std::move(or_operands);
+        function_node.resolveAsFunction(or_function_resolver);
+    }
+};
+
+void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+{
+    LogicalExpressionOptimizerVisitor visitor(std::move(context));
+    visitor.visit(query_tree_node);
+}
+
+}
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@ -0,0 +1,82 @@
+#pragma once
+
+#include <Analyzer/IQueryTreePass.h>
+
+namespace DB
+{
+
+/**
+ * This pass tries to do optimizations on logical expression:
+ *
+ * 1. Replaces chains of equality functions inside an OR with a single IN operator.
+ * The replacement is done if:
+ *  - one of the operands  of the equality function is a constant
+ *  - length of chain is at least 'optimize_min_equality_disjunction_chain_length' long OR the expression has type of LowCardinality
+ *
+ * E.g. (optimize_min_equality_disjunction_chain_length = 2)
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 OR b = 'test' OR a = 2;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE b = 'test' OR a IN (1, 2);
+ * -------------------------------
+ *
+ * 2. Removes duplicate OR checks
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 OR b = 'test' OR a = 1;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE a = 1 OR b = 'test';
+ * -------------------------------
+ *
+ * 3. Replaces AND chains with a single constant.
+ * The replacement is done if:
+ *  - one of the operands  of the equality function is a constant
+ *  - constants are different for same expression
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 AND b = 'test' AND a = 2;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE 0;
+ * -------------------------------
+ *
+ * 4. Removes duplicate AND checks
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 AND b = 'test' AND a = 1;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE a = 1 AND b = 'test';
+ * -------------------------------
+ */
+
+class LogicalExpressionOptimizerPass final : public IQueryTreePass
+{
+public:
+    String getName() override { return "LogicalExpressionOptimizer"; }
+
+    String getDescription() override { return "Transform equality chain to a single IN function or a constant if possible"; }
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -199,7 +199,6 @@ namespace ErrorCodes
  * TODO: SELECT (compound_expression).*, (compound_expression).COLUMNS are not supported on parser level.
  * TODO: SELECT a.b.c.*, a.b.c.COLUMNS. Qualified matcher where identifier size is greater than 2 are not supported on parser level.
  * TODO: Support function identifier resolve from parent query scope, if lambda in parent scope does not capture any columns.
-  * TODO: Support group_by_use_nulls.
  * TODO: Scalar subqueries cache.
  */

@ -472,6 +471,12 @@ public:
            alias_name_to_expressions[node_alias].push_back(node);
        }

+        if (const auto * function = node->as<FunctionNode>())
+        {
+            if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->getFunctionName()))
+                ++aggregate_functions_counter;
+        }
+
        expressions.emplace_back(node);
    }

@ -490,6 +495,12 @@ public:
                alias_name_to_expressions.erase(it);
        }

+        if (const auto * function = top_expression->as<FunctionNode>())
+        {
+            if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->getFunctionName()))
+                --aggregate_functions_counter;
+        }
+
        expressions.pop_back();
    }

@ -508,6 +519,11 @@ public:
        return alias_name_to_expressions.contains(alias);
    }

+    bool hasAggregateFunction() const
+    {
+        return aggregate_functions_counter > 0;
+    }
+
    QueryTreeNodePtr getExpressionWithAlias(const std::string & alias) const
    {
        auto expression_it = alias_name_to_expressions.find(alias);
@ -554,6 +570,7 @@ public:

 private:
    QueryTreeNodes expressions;
+    size_t aggregate_functions_counter = 0;
    std::unordered_map<std::string, QueryTreeNodes> alias_name_to_expressions;
 };

@ -686,7 +703,11 @@ struct IdentifierResolveScope
        if (auto * union_node = scope_node->as<UnionNode>())
            context = union_node->getContext();
        else if (auto * query_node = scope_node->as<QueryNode>())
+        {
            context = query_node->getContext();
+            group_by_use_nulls = context->getSettingsRef().group_by_use_nulls &&
+                (query_node->isGroupByWithGroupingSets() || query_node->isGroupByWithRollup() || query_node->isGroupByWithCube());
+        }
    }

    QueryTreeNodePtr scope_node;
@ -734,9 +755,14 @@ struct IdentifierResolveScope
    /// Table expression node to data
    std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data;

+    QueryTreeNodePtrWithHashSet nullable_group_by_keys;
+
    /// Use identifier lookup to result cache
    bool use_identifier_lookup_to_result_cache = true;

+    /// Apply nullability to aggregation keys
+    bool group_by_use_nulls = false;
+
    /// JOINs count
    size_t joins_count = 0;

@ -5407,10 +5433,18 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
        }
    }

+    if (node
+        && scope.nullable_group_by_keys.contains(node)
+        && !scope.expressions_in_resolve_process_stack.hasAggregateFunction())
+    {
+        node = node->clone();
+        node->convertToNullable();
+    }
+
    /** Update aliases after expression node was resolved.
      * Do not update node in alias table if we resolve it for duplicate alias.
      */
-    if (!node_alias.empty() && use_alias_table)
+    if (!node_alias.empty() && use_alias_table && !scope.group_by_use_nulls)
    {
        auto it = scope.alias_name_to_expression_node.find(node_alias);
        if (it != scope.alias_name_to_expression_node.end())
@ -6418,9 +6452,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
    auto & query_node_typed = query_node->as<QueryNode &>();
    const auto & settings = scope.context->getSettingsRef();

-    if (settings.group_by_use_nulls)
-        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "GROUP BY use nulls is not supported");
-
    bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();

    if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.isGroupByWithTotals())
@ -6556,16 +6587,11 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
        resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor);
    }

+    if (!scope.group_by_use_nulls)
        scope.use_identifier_lookup_to_result_cache = true;

    /// Resolve query node sections.

-    auto projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope);
-    if (query_node_typed.getProjection().getNodes().empty())
-        throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED,
-            "Empty list of columns in projection. In scope {}",
-            scope.scope_node->formatASTForErrorMessage());
-
    if (query_node_typed.hasWith())
        resolveExpressionNodeList(query_node_typed.getWithNode(), scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/);

@ -6586,6 +6612,15 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier

                resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
            }
+
+            if (scope.group_by_use_nulls)
+            {
+                for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
+                {
+                    for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
+                        scope.nullable_group_by_keys.insert(group_by_elem);
+                }
+            }
        }
        else
        {
@ -6593,6 +6628,12 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);

            resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+
+            if (scope.group_by_use_nulls)
+            {
+                for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
+                    scope.nullable_group_by_keys.insert(group_by_elem);
+            }
        }
    }

@ -6645,6 +6686,12 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
        convertLimitOffsetExpression(query_node_typed.getOffset(), "OFFSET", scope);
    }

+    auto projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope);
+    if (query_node_typed.getProjection().getNodes().empty())
+        throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED,
+            "Empty list of columns in projection. In scope {}",
+            scope.scope_node->formatASTForErrorMessage());
+
    /** Resolve nodes with duplicate aliases.
      * Table expressions cannot have duplicate aliases.
      *
@ -6708,7 +6755,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
            "ARRAY JOIN",
            "in PREWHERE");

-    validateAggregates(query_node);
+    validateAggregates(query_node, { .group_by_use_nulls = scope.group_by_use_nulls });

    /** WITH section can be safely removed, because WITH section only can provide aliases to query expressions
      * and CTE for other sections to use.
--- a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
+++ b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
@ -6,6 +6,9 @@ namespace DB
 {

 /** Rewrite _shard_num column into shardNum() function.
+  *
+  * Example: SELECT _shard_num FROM distributed_table;
+  * Result: SELECT shardNum() FROM distributed_table;
  */
 class ShardNumColumnToFunctionPass final : public IQueryTreePass
 {
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@ -355,21 +355,67 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
    if (select_limit_by)
        current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);

-    /// Combine limit expression with limit setting
+    /// Combine limit expression with limit and offset settings into final limit expression
+    /// The sequence of application is the following - offset expression, limit expression, offset setting, limit setting.
+    /// Since offset setting is applied after limit expression, but we want to transfer settings into expression
+    /// we must decrease limit expression by offset setting and then add offset setting to offset expression.
+    ///    select_limit - limit expression
+    ///    limit        - limit setting
+    ///    offset       - offset setting
+    ///
+    /// if select_limit
+    ///   -- if offset >= select_limit                (expr 0)
+    ///      then (0) (0 rows)
+    ///   -- else if limit > 0                        (expr 1)
+    ///      then min(select_limit - offset, limit)   (expr 2)
+    ///   -- else
+    ///      then (select_limit - offset)             (expr 3)
+    /// else if limit > 0
+    ///    then limit
+    ///
+    /// offset = offset + of_expr
    auto select_limit = select_query_typed.limitLength();
-    if (select_limit && limit)
+    if (select_limit)
    {
-        auto function_node = std::make_shared<FunctionNode>("least");
-        function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
-        function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
+        /// Shortcut
+        if (offset == 0 && limit == 0)
+        {
+            current_query_tree->getLimit() = buildExpression(select_limit, current_context);
+        }
+        else
+        {
+            /// expr 3
+            auto expr_3 = std::make_shared<FunctionNode>("minus");
+            expr_3->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
+            expr_3->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
+
+            /// expr 2
+            auto expr_2 = std::make_shared<FunctionNode>("least");
+            expr_2->getArguments().getNodes().push_back(expr_3->clone());
+            expr_2->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
+
+            /// expr 0
+            auto expr_0 = std::make_shared<FunctionNode>("greaterOrEquals");
+            expr_0->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
+            expr_0->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
+
+            /// expr 1
+            auto expr_1 = std::make_shared<ConstantNode>(limit > 0);
+
+            auto function_node = std::make_shared<FunctionNode>("multiIf");
+            function_node->getArguments().getNodes().push_back(expr_0);
+            function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(0));
+            function_node->getArguments().getNodes().push_back(expr_1);
+            function_node->getArguments().getNodes().push_back(expr_2);
+            function_node->getArguments().getNodes().push_back(expr_3);
+
            current_query_tree->getLimit() = std::move(function_node);
        }
-    else if (limit)
+    }
+    else if (limit > 0)
        current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
-    else if (select_limit)
-        current_query_tree->getLimit() = buildExpression(select_limit, current_context);

-    /// Combine offset expression with offset setting
+    /// Combine offset expression with offset setting into final offset expression
    auto select_offset = select_query_typed.limitOffset();
    if (select_offset && offset)
    {
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@ -38,6 +38,7 @@
 #include <Analyzer/Passes/AutoFinalOnQueryPass.h>
 #include <Analyzer/Passes/ArrayExistsToHasPass.h>
 #include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
+#include <Analyzer/Passes/LogicalExpressionOptimizerPass.h>
 #include <Analyzer/Passes/CrossToInnerJoinPass.h>
 #include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>

@ -147,7 +148,6 @@ private:

 /** ClickHouse query tree pass manager.
  *
-  * TODO: Support logical expressions optimizer.
  * TODO: Support setting convert_query_to_cnf.
  * TODO: Support setting optimize_using_constraints.
  * TODO: Support setting optimize_substitute_columns.
@ -262,6 +262,8 @@ void addQueryTreePasses(QueryTreePassManager & manager)

    manager.addPass(std::make_unique<ConvertOrLikeChainPass>());

+    manager.addPass(std::make_unique<LogicalExpressionOptimizerPass>());
+
    manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
    manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
    manager.addPass(std::make_unique<CrossToInnerJoinPass>());
--- a/src/Analyzer/ValidationUtils.cpp
+++ b/src/Analyzer/ValidationUtils.cpp
@ -105,7 +105,7 @@ private:
    const QueryTreeNodePtr & query_node;
 };

-void validateAggregates(const QueryTreeNodePtr & query_node)
+void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params)
 {
    const auto & query_node_typed = query_node->as<QueryNode &>();
    auto join_tree_node_type = query_node_typed.getJoinTree()->getNodeType();
@ -182,7 +182,9 @@ void validateAggregates(const QueryTreeNodePtr & query_node)
                if (grouping_set_key->as<ConstantNode>())
                    continue;

-                group_by_keys_nodes.push_back(grouping_set_key);
+                group_by_keys_nodes.push_back(grouping_set_key->clone());
+                if (params.group_by_use_nulls)
+                    group_by_keys_nodes.back()->convertToNullable();
            }
        }
        else
@ -190,7 +192,9 @@ void validateAggregates(const QueryTreeNodePtr & query_node)
            if (node->as<ConstantNode>())
                continue;

-            group_by_keys_nodes.push_back(node);
+            group_by_keys_nodes.push_back(node->clone());
+            if (params.group_by_use_nulls)
+                group_by_keys_nodes.back()->convertToNullable();
        }
    }

--- a/src/Analyzer/ValidationUtils.h
+++ b/src/Analyzer/ValidationUtils.h
@ -5,6 +5,11 @@
 namespace DB
 {

+struct ValidationParams
+{
+    bool group_by_use_nulls;
+};
+
 /** Validate aggregates in query node.
  *
  * 1. Check that there are no aggregate functions and GROUPING function in JOIN TREE, WHERE, PREWHERE, in another aggregate functions.
@ -15,7 +20,7 @@ namespace DB
  * PROJECTION.
  * 5. Throws exception if there is GROUPING SETS or ROLLUP or CUBE or WITH TOTALS without aggregation.
  */
-void validateAggregates(const QueryTreeNodePtr & query_node);
+void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params);

 /** Assert that there are no function nodes with specified function name in node children.
  * Do not visit subqueries.
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@ -6,7 +6,7 @@
 #include <IO/WriteHelpers.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/escapeForFileName.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Backups/BackupCoordinationStage.h>


@ -166,13 +166,25 @@ namespace
 }

 BackupCoordinationRemote::BackupCoordinationRemote(
-    const String & root_zookeeper_path_, const String & backup_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_)
-    : root_zookeeper_path(root_zookeeper_path_)
+    const BackupKeeperSettings & keeper_settings_,
+    const String & root_zookeeper_path_,
+    const String & backup_uuid_,
+    zkutil::GetZooKeeper get_zookeeper_,
+    bool is_internal_)
+    : keeper_settings(keeper_settings_)
+    , root_zookeeper_path(root_zookeeper_path_)
    , zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
    , backup_uuid(backup_uuid_)
    , get_zookeeper(get_zookeeper_)
    , is_internal(is_internal_)
 {
+    zookeeper_retries_info = ZooKeeperRetriesInfo(
+        "BackupCoordinationRemote",
+        &Poco::Logger::get("BackupCoordinationRemote"),
+        keeper_settings.keeper_max_retries,
+        keeper_settings.keeper_retry_initial_backoff_ms,
+        keeper_settings.keeper_retry_max_backoff_ms);
+
    createRootNodes();
    stage_sync.emplace(
        zookeeper_path + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination"));
@ -486,19 +498,131 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)

 std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
 {
-    auto zk = getZooKeeper();
-    std::vector<FileInfo> file_infos;
-    Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
-    for (const String & escaped_name : escaped_names)
+    /// There could be tons of files inside /file_names or /file_infos
+    /// Thus we use MultiRead requests for processing them
+    /// We also use [Zoo]Keeper retries and it should be safe, because
+    /// this function is called at the end after the actual copying is finished.
+
+    auto split_vector = [](Strings && vec, size_t max_batch_size) -> std::vector<Strings>
    {
-        String size_and_checksum = zk->get(zookeeper_path + "/file_names/" + escaped_name);
-        UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
-        FileInfo file_info;
-        if (size) /// we don't keep FileInfos for empty files
-            file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
-        file_info.file_name = unescapeForFileName(escaped_name);
-        file_infos.emplace_back(std::move(file_info));
+        std::vector<Strings> result;
+        size_t left_border = 0;
+
+        auto move_to_result = [&](auto && begin, auto && end)
+        {
+            auto batch = Strings();
+            batch.reserve(max_batch_size);
+            std::move(begin, end, std::back_inserter(batch));
+            result.push_back(std::move(batch));
+        };
+
+        if (max_batch_size == 0)
+        {
+            move_to_result(vec.begin(), vec.end());
+            return result;
        }
+
+        for (size_t pos = 0; pos < vec.size(); ++pos)
+        {
+            if (pos >= left_border + max_batch_size)
+            {
+                move_to_result(vec.begin() + left_border, vec.begin() + pos);
+                left_border = pos;
+            }
+        }
+
+        if (vec.begin() + left_border != vec.end())
+            move_to_result(vec.begin() + left_border, vec.end());
+
+        return result;
+    };
+
+    std::vector<Strings> batched_escaped_names;
+    {
+        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getChildren", zookeeper_retries_info);
+        retries_ctl.retryLoop([&]()
+        {
+            auto zk = getZooKeeper();
+            batched_escaped_names = split_vector(zk->getChildren(zookeeper_path + "/file_names"), keeper_settings.batch_size_for_keeper_multiread);
+        });
+    }
+
+    std::vector<FileInfo> file_infos;
+    file_infos.reserve(batched_escaped_names.size());
+
+    for (auto & batch : batched_escaped_names)
+    {
+        zkutil::ZooKeeper::MultiGetResponse sizes_and_checksums;
+        {
+            Strings file_names_paths;
+            file_names_paths.reserve(batch.size());
+            for (const String & escaped_name : batch)
+                file_names_paths.emplace_back(zookeeper_path + "/file_names/" + escaped_name);
+
+
+            ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getSizesAndChecksums", zookeeper_retries_info);
+            retries_ctl.retryLoop([&]
+            {
+                auto zk = getZooKeeper();
+                sizes_and_checksums = zk->get(file_names_paths);
+            });
+        }
+
+        Strings non_empty_file_names;
+        Strings non_empty_file_infos_paths;
+        std::vector<FileInfo> non_empty_files_infos;
+
+        /// Process all files and understand whether there are some empty files
+        /// Save non empty file names for further batch processing
+        {
+            std::vector<FileInfo> empty_files_infos;
+            for (size_t i = 0; i < batch.size(); ++i)
+            {
+                auto file_name = batch[i];
+                if (sizes_and_checksums[i].error != Coordination::Error::ZOK)
+                    throw zkutil::KeeperException(sizes_and_checksums[i].error);
+                const auto & size_and_checksum = sizes_and_checksums[i].data;
+                auto size = deserializeSizeAndChecksum(size_and_checksum).first;
+
+                if (size)
+                {
+                    /// Save it later for batch processing
+                    non_empty_file_names.emplace_back(file_name);
+                    non_empty_file_infos_paths.emplace_back(zookeeper_path + "/file_infos/" + size_and_checksum);
+                    continue;
+                }
+
+                /// File is empty
+                FileInfo empty_file_info;
+                empty_file_info.file_name = unescapeForFileName(file_name);
+                empty_files_infos.emplace_back(std::move(empty_file_info));
+            }
+
+            std::move(empty_files_infos.begin(), empty_files_infos.end(), std::back_inserter(file_infos));
+        }
+
+        zkutil::ZooKeeper::MultiGetResponse non_empty_file_infos_serialized;
+        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getFileInfos", zookeeper_retries_info);
+        retries_ctl.retryLoop([&]()
+        {
+            auto zk = getZooKeeper();
+            non_empty_file_infos_serialized = zk->get(non_empty_file_infos_paths);
+        });
+
+        /// Process non empty files
+        for (size_t i = 0; i < non_empty_file_names.size(); ++i)
+        {
+            FileInfo file_info;
+            if (non_empty_file_infos_serialized[i].error != Coordination::Error::ZOK)
+                throw zkutil::KeeperException(non_empty_file_infos_serialized[i].error);
+            file_info = deserializeFileInfo(non_empty_file_infos_serialized[i].data);
+            file_info.file_name = unescapeForFileName(non_empty_file_names[i]);
+            non_empty_files_infos.emplace_back(std::move(file_info));
+        }
+
+        std::move(non_empty_files_infos.begin(), non_empty_files_infos.end(), std::back_inserter(file_infos));
+    }
+
    return file_infos;
 }

@ -604,7 +728,7 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
        return false;

    auto zk = getZooKeeper();
-    std::string backup_stage_path = zookeeper_path +"/stage";
+    std::string backup_stage_path = zookeeper_path + "/stage";

    if (!zk->exists(root_zookeeper_path))
        zk->createAncestors(root_zookeeper_path);
--- a/src/Backups/BackupCoordinationRemote.h
+++ b/src/Backups/BackupCoordinationRemote.h
@ -4,6 +4,7 @@
 #include <Backups/BackupCoordinationReplicatedAccess.h>
 #include <Backups/BackupCoordinationReplicatedTables.h>
 #include <Backups/BackupCoordinationStageSync.h>
+#include <Storages/MergeTree/ZooKeeperRetries.h>


 namespace DB
@ -16,7 +17,20 @@ constexpr size_t MAX_ZOOKEEPER_ATTEMPTS = 10;
 class BackupCoordinationRemote : public IBackupCoordination
 {
 public:
-    BackupCoordinationRemote(const String & root_zookeeper_path_, const String & backup_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_);
+    struct BackupKeeperSettings
+    {
+        UInt64 keeper_max_retries;
+        UInt64 keeper_retry_initial_backoff_ms;
+        UInt64 keeper_retry_max_backoff_ms;
+        UInt64 batch_size_for_keeper_multiread;
+    };
+
+    BackupCoordinationRemote(
+        const BackupKeeperSettings & keeper_settings_,
+        const String & root_zookeeper_path_,
+        const String & backup_uuid_,
+        zkutil::GetZooKeeper get_zookeeper_,
+        bool is_internal_);
    ~BackupCoordinationRemote() override;

    void setStage(const String & current_host, const String & new_stage, const String & message) override;
@ -68,12 +82,14 @@ private:
    void prepareReplicatedTables() const;
    void prepareReplicatedAccess() const;

+    const BackupKeeperSettings keeper_settings;
    const String root_zookeeper_path;
    const String zookeeper_path;
    const String backup_uuid;
    const zkutil::GetZooKeeper get_zookeeper;
    const bool is_internal;

+    mutable ZooKeeperRetriesInfo zookeeper_retries_info;
    std::optional<BackupCoordinationStageSync> stage_sync;

    mutable std::mutex mutex;
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@ -4,7 +4,7 @@
 #include <Common/quoteString.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Interpreters/Context.h>
-#include <IO/IOThreadPool.h>
+#include <IO/BackupsIOThreadPool.h>
 #include <IO/ReadBufferFromS3.h>
 #include <IO/WriteBufferFromS3.h>
 #include <IO/HTTPHeaderEntries.h>
@ -167,7 +167,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
        std::string src_bucket = object_storage->getObjectsNamespace();
        auto file_path = fs::path(s3_uri.key) / dest_file_name;
        copyS3File(client, src_bucket, objects[0].absolute_path, src_offset, src_size, s3_uri.bucket, file_path, request_settings, {},
-                   threadPoolCallbackRunner<void>(IOThreadPool::get(), "BackupWriterS3"));
+                   threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
    }
 }

@ -175,7 +175,7 @@ void BackupWriterS3::copyDataToFile(
    const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
 {
    copyDataToS3File(create_read_buffer, offset, size, client, s3_uri.bucket, fs::path(s3_uri.key) / dest_file_name, request_settings, {},
-                     threadPoolCallbackRunner<void>(IOThreadPool::get(), "BackupWriterS3"));
+                     threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
 }

 BackupWriterS3::~BackupWriterS3() = default;
@ -222,7 +222,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
        request_settings,
        std::nullopt,
        DBMS_DEFAULT_BUFFER_SIZE,
-        threadPoolCallbackRunner<void>(IOThreadPool::get(), "BackupWriterS3"));
+        threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
 }

 void BackupWriterS3::removeFile(const String & file_name)
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@ -6,7 +6,7 @@
 #include <Backups/BackupCoordinationLocal.h>
 #include <Backups/BackupCoordinationRemote.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/quoteString.h>
 #include <Common/XMLUtils.h>
 #include <Interpreters/Context.h>
@ -16,11 +16,11 @@
 #include <IO/Archives/createArchiveWriter.h>
 #include <IO/ConcatSeekableReadBuffer.h>
 #include <IO/HashingReadBuffer.h>
-#include <IO/ReadBufferFromFileBase.h>
 #include <IO/ReadHelpers.h>
 #include <IO/SeekableReadBuffer.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
 #include <IO/copyData.h>
 #include <Poco/Util/XMLConfiguration.h>
 #include <Poco/DOM/DOMParser.h>
@ -317,11 +317,19 @@ void BackupImpl::writeBackupMetadata()
 {
    assert(!is_internal_backup);

-    Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
-    config->setInt("version", CURRENT_BACKUP_VERSION);
-    config->setBool("deduplicate_files", deduplicate_files);
-    config->setString("timestamp", toString(LocalDateTime{timestamp}));
-    config->setString("uuid", toString(*uuid));
+    checkLockFile(true);
+
+    std::unique_ptr<WriteBuffer> out;
+    if (use_archives)
+        out = getArchiveWriter("")->writeFile(".backup");
+    else
+        out = writer->writeFile(".backup");
+
+    *out << "<config>";
+    *out << "<version>" << CURRENT_BACKUP_VERSION << "</version>";
+    *out << "<deduplicate_files>" << deduplicate_files << "</deduplicate_files>";
+    *out << "<timestamp>" << toString(LocalDateTime{timestamp}) << "</timestamp>";
+    *out << "<uuid>" << toString(*uuid) << "</uuid>";

    auto all_file_infos = coordination->getAllFileInfos();

@ -336,8 +344,8 @@ void BackupImpl::writeBackupMetadata()

        if (base_backup_in_use)
        {
-            config->setString("base_backup", base_backup_info->toString());
-            config->setString("base_backup_uuid", toString(*base_backup_uuid));
+            *out << "<base_backup>" << xml << base_backup_info->toString() << "</base_backup>";
+            *out << "<base_backup_uuid>" << toString(*base_backup_uuid) << "</base_backup_uuid>";
        }
    }

@ -346,31 +354,32 @@ void BackupImpl::writeBackupMetadata()
    num_entries = 0;
    size_of_entries = 0;

-    for (size_t i = 0; i != all_file_infos.size(); ++i)
+    *out << "<contents>";
+    for (const auto & info : all_file_infos)
    {
-        const auto & info = all_file_infos[i];
-        String prefix = i ? "contents.file[" + std::to_string(i) + "]." : "contents.file.";
-        config->setString(prefix + "name", info.file_name);
-        config->setUInt64(prefix + "size", info.size);
+        *out << "<file>";
+
+        *out << "<name>" << xml << info.file_name << "</name>";
+        *out << "<size>" << info.size << "</size>";

        if (info.size)
        {
-            config->setString(prefix + "checksum", hexChecksum(info.checksum));
+            *out << "<checksum>" << hexChecksum(info.checksum) << "</checksum>";
            if (info.base_size)
            {
-                config->setBool(prefix + "use_base", true);
+                *out << "<use_base>true</use_base>";
                if (info.base_size != info.size)
                {
-                    config->setUInt64(prefix + "base_size", info.base_size);
-                    config->setString(prefix + "base_checksum", hexChecksum(info.base_checksum));
+                    *out << "<base_size>" << info.base_size << "</base_size>";
+                    *out << "<base_checksum>" << hexChecksum(info.base_checksum) << "</base_checksum>";
                }
            }
            if (!info.data_file_name.empty() && (info.data_file_name != info.file_name))
-                config->setString(prefix + "data_file", info.data_file_name);
+                *out << "<data_file>" << xml << info.data_file_name << "</data_file>";
            if (!info.archive_suffix.empty())
-                config->setString(prefix + "archive_suffix", info.archive_suffix);
+                *out << "<archive_suffix>" << xml << info.archive_suffix << "</archive_suffix>";
            if (info.pos_in_archive != static_cast<size_t>(-1))
-                config->setUInt64(prefix + "pos_in_archive", info.pos_in_archive);
+                *out << "<pos_in_archive>" << info.pos_in_archive << "</pos_in_archive>";
        }

        total_size += info.size;
@ -380,23 +389,16 @@ void BackupImpl::writeBackupMetadata()
            ++num_entries;
            size_of_entries += info.size - info.base_size;
        }
+
+        *out << "</file>";
    }
+    *out << "</contents>";

-    std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    config->save(stream);
-    String str = stream.str();
+    *out << "</config>";

-    checkLockFile(true);
-
-    std::unique_ptr<WriteBuffer> out;
-    if (use_archives)
-        out = getArchiveWriter("")->writeFile(".backup");
-    else
-        out = writer->writeFile(".backup");
-    out->write(str.data(), str.size());
    out->finalize();

-    uncompressed_size = size_of_entries + str.size();
+    uncompressed_size = size_of_entries + out->count();
 }


--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@ -38,12 +38,14 @@ namespace Stage = BackupCoordinationStage;

 namespace
 {
-    std::shared_ptr<IBackupCoordination> makeBackupCoordination(const String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
+    std::shared_ptr<IBackupCoordination> makeBackupCoordination(std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings, String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
    {
        if (!root_zk_path.empty())
        {
+            if (!keeper_settings.has_value())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Parameter keeper_settings is empty while root_zk_path is not. This is bug");
            auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
-            return std::make_shared<BackupCoordinationRemote>(root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
+            return std::make_shared<BackupCoordinationRemote>(*keeper_settings, root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
        }
        else
        {
@ -169,7 +171,15 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
        /// if it's not created here. However to handle errors better it's better to make a coordination here because this way
        /// if an exception will be thrown in startMakingBackup() other hosts will know about that.
        root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
-        backup_coordination = makeBackupCoordination(root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
+
+        BackupCoordinationRemote::BackupKeeperSettings keeper_settings
+        {
+            .keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
+            .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
+            .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
+            .batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
+        };
+        backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
    }

    auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
@ -265,10 +275,17 @@ void BackupsWorker::doBackup(
            context->checkAccess(required_access);

        String root_zk_path;
-
+        std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings;
        ClusterPtr cluster;
        if (on_cluster)
        {
+            keeper_settings = BackupCoordinationRemote::BackupKeeperSettings
+            {
+                .keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
+                .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
+                .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
+                .batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
+            };
            root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
            backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
            cluster = context->getCluster(backup_query->cluster);
@ -277,7 +294,7 @@ void BackupsWorker::doBackup(

        /// Make a backup coordination.
        if (!backup_coordination)
-            backup_coordination = makeBackupCoordination(root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
+            backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);

        if (!allow_concurrent_backups && backup_coordination->hasConcurrentBackups(std::ref(num_active_backups)))
            throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent backups not supported, turn on setting 'allow_concurrent_backups'");
--- a/src/Backups/IBackupCoordination.h
+++ b/src/Backups/IBackupCoordination.h
@ -1,7 +1,9 @@
 #pragma once

-#include <Core/Types.h>
 #include <optional>
+#include <fmt/format.h>
+#include <base/hex.h>
+#include <Core/Types.h>


 namespace DB
@ -85,6 +87,22 @@ public:

        /// Position in the archive.
        UInt64 pos_in_archive = static_cast<UInt64>(-1);
+
+        /// Note: this format doesn't allow to parse data back
+        /// It is useful only for debugging purposes
+        [[ maybe_unused ]] String describe()
+        {
+            String result;
+            result += fmt::format("file_name: {};\n", file_name);
+            result += fmt::format("size: {};\n", size);
+            result += fmt::format("checksum: {};\n", getHexUIntLowercase(checksum));
+            result += fmt::format("base_size: {};\n", base_size);
+            result += fmt::format("base_checksum: {};\n", getHexUIntLowercase(checksum));
+            result += fmt::format("data_file_name: {};\n", data_file_name);
+            result += fmt::format("archive_suffix: {};\n", archive_suffix);
+            result += fmt::format("pos_in_archive: {};\n", pos_in_archive);
+            return result;
+        }
    };

    /// Adds file information.
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -1360,7 +1360,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
            throw;
        }

-        if (have_data_in_stdin)
+        if (have_data_in_stdin && !cancelled)
            sendDataFromStdin(sample, columns_description_for_query, parsed_query);
    }
    else if (parsed_insert_query->data)
@ -1370,7 +1370,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
        try
        {
            sendDataFrom(data_in, sample, columns_description_for_query, parsed_query, have_data_in_stdin);
-            if (have_data_in_stdin)
+            if (have_data_in_stdin && !cancelled)
                sendDataFromStdin(sample, columns_description_for_query, parsed_query);
        }
        catch (Exception & e)
@ -1834,7 +1834,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
    {
        /// disable logs if expects errors
        TestHint test_hint(all_queries_text);
-        if (test_hint.clientError() || test_hint.serverError())
+        if (test_hint.hasClientErrors() || test_hint.hasServerErrors())
            processTextAsSingleQuery("SET send_logs_level = 'fatal'");
    }

@ -1876,17 +1876,17 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                // the query ends because we failed to parse it, so we consume
                // the entire line.
                TestHint hint(String(this_query_begin, this_query_end - this_query_begin));
-                if (hint.serverError())
+                if (hint.hasServerErrors())
                {
                    // Syntax errors are considered as client errors
-                    current_exception->addMessage("\nExpected server error '{}'.", hint.serverError());
+                    current_exception->addMessage("\nExpected server error: {}.", hint.serverErrors());
                    current_exception->rethrow();
                }

-                if (hint.clientError() != current_exception->code())
+                if (!hint.hasExpectedClientError(current_exception->code()))
                {
-                    if (hint.clientError())
-                        current_exception->addMessage("\nExpected client error: " + std::to_string(hint.clientError()));
+                    if (hint.hasClientErrors())
+                        current_exception->addMessage("\nExpected client error: {}.", hint.clientErrors());

                    current_exception->rethrow();
                }
@ -1935,37 +1935,37 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                bool error_matches_hint = true;
                if (have_error)
                {
-                    if (test_hint.serverError())
+                    if (test_hint.hasServerErrors())
                    {
                        if (!server_exception)
                        {
                            error_matches_hint = false;
                            fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n",
-                                       test_hint.serverError(), full_query);
+                                       test_hint.serverErrors(), full_query);
                        }
-                        else if (server_exception->code() != test_hint.serverError())
+                        else if (!test_hint.hasExpectedServerError(server_exception->code()))
                        {
                            error_matches_hint = false;
                            fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n",
-                                       test_hint.serverError(), server_exception->code(), full_query);
+                                       test_hint.serverErrors(), server_exception->code(), full_query);
                        }
                    }
-                    if (test_hint.clientError())
+                    if (test_hint.hasClientErrors())
                    {
                        if (!client_exception)
                        {
                            error_matches_hint = false;
                            fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n",
-                                       test_hint.clientError(), full_query);
+                                       test_hint.clientErrors(), full_query);
                        }
-                        else if (client_exception->code() != test_hint.clientError())
+                        else if (!test_hint.hasExpectedClientError(client_exception->code()))
                        {
                            error_matches_hint = false;
                            fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n",
-                                       test_hint.clientError(), client_exception->code(), full_query);
+                                       test_hint.clientErrors(), client_exception->code(), full_query);
                        }
                    }
-                    if (!test_hint.clientError() && !test_hint.serverError())
+                    if (!test_hint.hasClientErrors() && !test_hint.hasServerErrors())
                    {
                        // No error was expected but it still occurred. This is the
                        // default case without test hint, doesn't need additional
@ -1975,19 +1975,19 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                }
                else
                {
-                    if (test_hint.clientError())
+                    if (test_hint.hasClientErrors())
                    {
                        error_matches_hint = false;
                        fmt::print(stderr,
                                   "The query succeeded but the client error '{}' was expected (query: {}).\n",
-                                   test_hint.clientError(), full_query);
+                                   test_hint.clientErrors(), full_query);
                    }
-                    if (test_hint.serverError())
+                    if (test_hint.hasServerErrors())
                    {
                        error_matches_hint = false;
                        fmt::print(stderr,
                                   "The query succeeded but the server error '{}' was expected (query: {}).\n",
-                                   test_hint.serverError(), full_query);
+                                   test_hint.serverErrors(), full_query);
                    }
                }

--- a/src/Client/TestHint.cpp
+++ b/src/Client/TestHint.cpp
@ -1,32 +1,15 @@
-#include "TestHint.h"
+#include <charconv>
+#include <string_view>
+
+#include <Client/TestHint.h>

-#include <Common/Exception.h>
-#include <Common/ErrorCodes.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/ReadHelpers.h>
 #include <Parsers/Lexer.h>
+#include <Common/ErrorCodes.h>
+#include <Common/Exception.h>

-namespace
+namespace DB::ErrorCodes
 {
-
-/// Parse error as number or as a string (name of the error code const)
-int parseErrorCode(DB::ReadBufferFromString & in)
-{
-    int code = -1;
-    String code_name;
-
-    auto * pos = in.position();
-    tryReadText(code, in);
-    if (pos != in.position())
-    {
-        return code;
-    }
-
-    /// Try parse as string
-    readStringUntilWhitespace(code_name, in);
-    return DB::ErrorCodes::getErrorCodeByName(code_name);
-}
-
+    extern const int CANNOT_PARSE_TEXT;
 }

 namespace DB
@ -60,8 +43,8 @@ TestHint::TestHint(const String & query_)
                    size_t pos_end = comment.find('}', pos_start);
                    if (pos_end != String::npos)
                    {
-                        String hint(comment.begin() + pos_start + 1, comment.begin() + pos_end);
-                        parse(hint, is_leading_hint);
+                        Lexer comment_lexer(comment.c_str() + pos_start + 1, comment.c_str() + pos_end, 0);
+                        parse(comment_lexer, is_leading_hint);
                    }
                }
            }
@ -69,27 +52,30 @@ TestHint::TestHint(const String & query_)
    }
 }

-void TestHint::parse(const String & hint, bool is_leading_hint)
+bool TestHint::hasExpectedClientError(int error)
 {
-    ReadBufferFromString in(hint);
-    String item;
+    return std::find(client_errors.begin(), client_errors.end(), error) != client_errors.end();
+}

-    while (!in.eof())
+bool TestHint::hasExpectedServerError(int error)
+{
+    return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end();
+}
+
+void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
+{
+    std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff"};
+
+    std::unordered_set<std::string_view> command_errors{
+        "serverError",
+        "clientError",
+    };
+
+    for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken())
    {
-        readStringUntilWhitespace(item, in);
-        if (in.eof())
-            break;
-
-        skipWhitespaceIfAny(in);
-
-        if (!is_leading_hint)
+        String item = String(token.begin, token.end);
+        if (token.type == TokenType::BareWord && commands.contains(item))
        {
-            if (item == "serverError")
-                server_error = parseErrorCode(in);
-            else if (item == "clientError")
-                client_error = parseErrorCode(in);
-        }
-
            if (item == "echo")
                echo.emplace(true);
            if (item == "echoOn")
@ -97,6 +83,56 @@ void TestHint::parse(const String & hint, bool is_leading_hint)
            if (item == "echoOff")
                echo.emplace(false);
        }
+        else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item))
+        {
+            /// Everything after this must be a list of errors separated by comma
+            ErrorVector error_codes;
+            while (!token.isEnd())
+            {
+                token = comment_lexer.nextToken();
+                if (token.type == TokenType::Whitespace)
+                    continue;
+                if (token.type == TokenType::Number)
+                {
+                    int code;
+                    auto [p, ec] = std::from_chars(token.begin, token.end, code);
+                    if (p == token.begin)
+                        throw DB::Exception(
+                            DB::ErrorCodes::CANNOT_PARSE_TEXT,
+                            "Could not parse integer number for errorcode: {}",
+                            std::string_view(token.begin, token.end));
+                    error_codes.push_back(code);
+                }
+                else if (token.type == TokenType::BareWord)
+                {
+                    int code = DB::ErrorCodes::getErrorCodeByName(std::string_view(token.begin, token.end));
+                    error_codes.push_back(code);
+                }
+                else
+                    throw DB::Exception(
+                        DB::ErrorCodes::CANNOT_PARSE_TEXT,
+                        "Could not parse error code in {}: {}",
+                        getTokenName(token.type),
+                        std::string_view(token.begin, token.end));
+                do
+                {
+                    token = comment_lexer.nextToken();
+                } while (!token.isEnd() && token.type == TokenType::Whitespace);
+
+                if (!token.isEnd() && token.type != TokenType::Comma)
+                    throw DB::Exception(
+                        DB::ErrorCodes::CANNOT_PARSE_TEXT,
+                        "Could not parse error code. Expected ','. Got '{}'",
+                        std::string_view(token.begin, token.end));
+            }
+
+            if (item == "serverError")
+                server_errors = error_codes;
+            else
+                client_errors = error_codes;
+            break;
+        }
+    }
 }

 }
--- a/src/Client/TestHint.h
+++ b/src/Client/TestHint.h
@ -1,21 +1,30 @@
 #pragma once

 #include <optional>
+#include <vector>
+
+#include <fmt/format.h>
+
 #include <Core/Types.h>


 namespace DB
 {

+class Lexer;
+
 /// Checks expected server and client error codes.
 ///
 /// The following comment hints are supported:
 ///
 /// - "-- { serverError 60 }" -- in case of you are expecting server error.
+/// - "-- { serverError 16, 36 }" -- in case of you are expecting one of the 2 errors.
 ///
 /// - "-- { clientError 20 }" -- in case of you are expecting client error.
+/// - "-- { clientError 20, 60, 92 }" -- It's expected that the client will return one of the 3 errors.
 ///
 /// - "-- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
+/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS }" -- by error name.
 ///
 /// - "-- { clientError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
 ///
@ -43,29 +52,73 @@ namespace DB
 class TestHint
 {
 public:
+    using ErrorVector = std::vector<int>;
    TestHint(const String & query_);

-    int serverError() const { return server_error; }
-    int clientError() const { return client_error; }
+    const auto & serverErrors() const { return server_errors; }
+    const auto & clientErrors() const { return client_errors; }
    std::optional<bool> echoQueries() const { return echo; }

+    bool hasClientErrors() { return !client_errors.empty(); }
+    bool hasServerErrors() { return !server_errors.empty(); }
+
+    bool hasExpectedClientError(int error);
+    bool hasExpectedServerError(int error);
+
 private:
    const String & query;
-    int server_error = 0;
-    int client_error = 0;
+    ErrorVector server_errors{};
+    ErrorVector client_errors{};
    std::optional<bool> echo;

-    void parse(const String & hint, bool is_leading_hint);
+    void parse(Lexer & comment_lexer, bool is_leading_hint);

    bool allErrorsExpected(int actual_server_error, int actual_client_error) const
    {
-        return (server_error || client_error) && (server_error == actual_server_error) && (client_error == actual_client_error);
+        if (actual_server_error && std::find(server_errors.begin(), server_errors.end(), actual_server_error) == server_errors.end())
+            return false;
+        if (!actual_server_error && server_errors.size())
+            return false;
+
+        if (actual_client_error && std::find(client_errors.begin(), client_errors.end(), actual_client_error) == client_errors.end())
+            return false;
+        if (!actual_client_error && client_errors.size())
+            return false;
+
+        return true;
    }

    bool lostExpectedError(int actual_server_error, int actual_client_error) const
    {
-        return (server_error && !actual_server_error) || (client_error && !actual_client_error);
+        return (server_errors.size() && !actual_server_error) || (client_errors.size() && !actual_client_error);
    }
 };

 }
+
+template <>
+struct fmt::formatter<DB::TestHint::ErrorVector>
+{
+    static constexpr auto parse(format_parse_context & ctx)
+    {
+        const auto * it = ctx.begin();
+        const auto * end = ctx.end();
+
+        /// Only support {}.
+        if (it != end && *it != '}')
+            throw format_error("Invalid format");
+
+        return it;
+    }
+
+    template <typename FormatContext>
+    auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx)
+    {
+        if (ErrorVector.empty())
+            return format_to(ctx.out(), "{}", 0);
+        else if (ErrorVector.size() == 1)
+            return format_to(ctx.out(), "{}", ErrorVector[0]);
+        else
+            return format_to(ctx.out(), "[{}]", fmt::join(ErrorVector, ", "));
+    }
+};
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@ -220,7 +220,12 @@ public:

    double getRatioOfDefaultRows(double) const override
    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for ColumnAggregateFunction");
+        return 0.0;
+    }
+
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return 0;
    }

    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@ -953,6 +953,11 @@ double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const
    return getRatioOfDefaultRowsImpl<ColumnArray>(sample_ratio);
 }

+UInt64 ColumnArray::getNumberOfDefaultRows() const
+{
+    return getNumberOfDefaultRowsImpl<ColumnArray>();
+}
+
 void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
    return getIndicesOfNonDefaultRowsImpl<ColumnArray>(indices, from, limit);
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@ -173,6 +173,7 @@ public:
    }

    double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;

    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;

--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@ -117,6 +117,7 @@ public:
    void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
    size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
    double getRatioOfDefaultRows(double) const override { throwMustBeDecompressed(); }
+    UInt64 getNumberOfDefaultRows() const override { throwMustBeDecompressed(); }
    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); }

 protected:
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@ -253,6 +253,11 @@ public:
        return data->isDefaultAt(0) ? 1.0 : 0.0;
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return data->isDefaultAt(0) ? s : 0;
+    }
+
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        if (!data->isDefaultAt(0))
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@ -136,6 +136,11 @@ public:
        return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return this->template getNumberOfDefaultRowsImpl<Self>();
+    }
+
    void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
    {
        return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@ -200,6 +200,11 @@ public:
        return getRatioOfDefaultRowsImpl<ColumnFixedString>(sample_ratio);
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getNumberOfDefaultRowsImpl<ColumnFixedString>();
+    }
+
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        return getIndicesOfNonDefaultRowsImpl<ColumnFixedString>(indices, from, limit);
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@ -168,6 +168,11 @@ public:
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
+    }
+
    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
    {
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@ -199,6 +199,11 @@ public:
        return getIndexes().getRatioOfDefaultRows(sample_ratio);
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getIndexes().getNumberOfDefaultRows();
+    }
+
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        return getIndexes().getIndicesOfNonDefaultRows(indices, from, limit);
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@ -296,6 +296,11 @@ double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const
    return getRatioOfDefaultRowsImpl<ColumnMap>(sample_ratio);
 }

+UInt64 ColumnMap::getNumberOfDefaultRows() const
+{
+    return getNumberOfDefaultRowsImpl<ColumnMap>();
+}
+
 void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
    return getIndicesOfNonDefaultRowsImpl<ColumnMap>(indices, from, limit);
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@ -92,6 +92,7 @@ public:
    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
    bool structureEquals(const IColumn & rhs) const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
    void finalize() override { nested->finalize(); }
    bool isFinalized() const override { return nested->isFinalized(); }
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@ -156,6 +156,11 @@ public:
        return getRatioOfDefaultRowsImpl<ColumnNullable>(sample_ratio);
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getNumberOfDefaultRowsImpl<ColumnNullable>();
+    }
+
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@ -254,6 +254,7 @@ public:
    bool hasEqualValues() const override { throwMustBeConcrete(); }
    size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
    double getRatioOfDefaultRows(double) const override { throwMustBeConcrete(); }
+    UInt64 getNumberOfDefaultRows() const override { throwMustBeConcrete(); }
    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeConcrete(); }

 private:
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@ -450,11 +450,14 @@ void ColumnSparse::compareColumn(const IColumn & rhs, size_t rhs_row_num,
    {
        const auto & rhs_sparse = assert_cast<const ColumnSparse &>(rhs);
        PaddedPODArray<Int8> nested_result;
-        values->compareColumn(rhs_sparse.getValuesColumn(), rhs_sparse.getValueIndex(rhs_row_num),
+        values->compareColumn(
+            rhs_sparse.getValuesColumn(),
+            rhs_sparse.getValueIndex(rhs_row_num),
            nullptr, nested_result, direction, nan_direction_hint);

        const auto & offsets_data = getOffsetsData();
-        compare_results.resize_fill(_size, nested_result[0]);
+        compare_results.resize(size());
+        std::fill(compare_results.begin(), compare_results.end(), nested_result[0]);
        for (size_t i = 0; i < offsets_data.size(); ++i)
            compare_results[offsets_data[i]] = nested_result[i + 1];
    }
@ -470,7 +473,7 @@ int ColumnSparse::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs

 bool ColumnSparse::hasEqualValues() const
 {
-    size_t num_defaults = getNumberOfDefaults();
+    size_t num_defaults = getNumberOfDefaultRows();
    if (num_defaults == _size)
        return true;

@ -512,7 +515,7 @@ void ColumnSparse::getPermutationImpl(IColumn::PermutationSortDirection directio
    else
        values->getPermutation(direction, stability, limit + 1, null_direction_hint, perm);

-    size_t num_of_defaults = getNumberOfDefaults();
+    size_t num_of_defaults = getNumberOfDefaultRows();
    size_t row = 0;

    const auto & offsets_data = getOffsetsData();
@ -677,7 +680,7 @@ void ColumnSparse::getExtremes(Field & min, Field & max) const
        return;
    }

-    if (getNumberOfDefaults() == 0)
+    if (getNumberOfDefaultRows() == 0)
    {
        size_t min_idx = 1;
        size_t max_idx = 1;
@ -709,7 +712,12 @@ void ColumnSparse::getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t

 double ColumnSparse::getRatioOfDefaultRows(double) const
 {
-    return static_cast<double>(getNumberOfDefaults()) / _size;
+    return static_cast<double>(getNumberOfDefaultRows()) / _size;
+}
+
+UInt64 ColumnSparse::getNumberOfDefaultRows() const
+{
+    return _size - offsets->size();
 }

 MutableColumns ColumnSparse::scatter(ColumnIndex num_columns, const Selector & selector) const
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@ -132,6 +132,7 @@ public:

    void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;

    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;

@ -150,7 +151,6 @@ public:
    size_t sizeOfValueIfFixed() const override { return values->sizeOfValueIfFixed() + values->sizeOfValueIfFixed(); }
    bool isCollationSupported() const override { return values->isCollationSupported(); }

-    size_t getNumberOfDefaults() const { return _size - offsets->size(); }
    size_t getNumberOfTrailingDefaults() const
    {
        return offsets->empty() ? _size : _size - getOffsetsData().back() - 1;
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@ -187,8 +187,8 @@ public:

    void updateHashFast(SipHash & hash) const override
    {
-        hash.update(reinterpret_cast<const char *>(offsets.data()), size() * sizeof(offsets[0]));
-        hash.update(reinterpret_cast<const char *>(chars.data()), size() * sizeof(chars[0]));
+        hash.update(reinterpret_cast<const char *>(offsets.data()), offsets.size() * sizeof(offsets[0]));
+        hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0]));
    }

    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -273,6 +273,11 @@ public:
        return getRatioOfDefaultRowsImpl<ColumnString>(sample_ratio);
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getNumberOfDefaultRowsImpl<ColumnString>();
+    }
+
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        return getIndicesOfNonDefaultRowsImpl<ColumnString>(indices, from, limit);
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@ -565,6 +565,11 @@ double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
    return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
 }

+UInt64 ColumnTuple::getNumberOfDefaultRows() const
+{
+    return getNumberOfDefaultRowsImpl<ColumnTuple>();
+}
+
 void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
    return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@ -102,6 +102,7 @@ public:
    bool isCollationSupported() const override;
    ColumnPtr compress() const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
    void finalize() override;
    bool isFinalized() const override;
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@ -146,6 +146,11 @@ public:
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getRatioOfDefaultRows' not implemented for ColumnUnique");
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getNumberOfDefaultRows' not implemented for ColumnUnique");
+    }
+
    void getIndicesOfNonDefaultRows(IColumn::Offsets &, size_t, size_t) const override
    {
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getIndicesOfNonDefaultRows' not implemented for ColumnUnique");
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@ -358,6 +358,11 @@ public:
        return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
    }

+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return this->template getNumberOfDefaultRowsImpl<Self>();
+    }
+
    void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
    {
        return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
--- a/Show More
+++ b/Show More