diff --git a/.github/ISSUE_TEMPLATE/96_installation-issues.md b/.github/ISSUE_TEMPLATE/96_installation-issues.md
index c322ccc92ce..e4be8af86b6 100644
--- a/.github/ISSUE_TEMPLATE/96_installation-issues.md
+++ b/.github/ISSUE_TEMPLATE/96_installation-issues.md
@@ -7,6 +7,8 @@ assignees: ''
 
 ---
 
+**I have tried the following solutions**: https://clickhouse.com/docs/en/faq/troubleshooting/#troubleshooting-installation-errors
+
 **Installation type**
 
 Packages, docker, single binary, curl?
diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 30a77a9b27f..c90df6e57b7 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -145,8 +145,8 @@ jobs:
           fetch-depth: 0 # For a proper version and performance artifacts
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -190,8 +190,8 @@ jobs:
           fetch-depth: 0 # For a proper version and performance artifacts
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -233,8 +233,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -276,8 +276,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -319,8 +319,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -364,8 +364,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -409,8 +409,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index da84500559a..f3d672136ef 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -209,8 +209,8 @@ jobs:
           fetch-depth: 0 # For a proper version and performance artifacts
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -251,8 +251,8 @@ jobs:
           fetch-depth: 0 # For a proper version and performance artifacts
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -295,8 +295,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -338,8 +338,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -381,8 +381,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -424,8 +424,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -467,8 +467,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -510,8 +510,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -556,8 +556,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -599,8 +599,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -644,8 +644,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -689,8 +689,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -734,8 +734,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -779,8 +779,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -824,8 +824,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -869,8 +869,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -914,8 +914,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -3011,6 +3011,150 @@ jobs:
           docker ps --quiet | xargs --no-run-if-empty docker kill ||:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH"
+  PerformanceComparisonAarch-0:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, func-tester-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/performance_comparison
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Performance Comparison Aarch64
+          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
+          RUN_BY_HASH_NUM=0
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Performance Comparison
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 performance_comparison_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  PerformanceComparisonAarch-1:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, func-tester-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/performance_comparison
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Performance Comparison Aarch64
+          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
+          RUN_BY_HASH_NUM=1
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Performance Comparison
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 performance_comparison_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  PerformanceComparisonAarch-2:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, func-tester-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/performance_comparison
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Performance Comparison Aarch64
+          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
+          RUN_BY_HASH_NUM=2
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Performance Comparison
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 performance_comparison_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  PerformanceComparisonAarch-3:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, func-tester-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/performance_comparison
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Performance Comparison Aarch64
+          REPO_COPY=${{runner.temp}}/performance_comparison/ClickHouse
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Performance Comparison
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 performance_comparison_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ###################################### SQLANCER FUZZERS ######################################
 ##############################################################################################
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 7dff1e205a1..9ebbe4e090d 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -105,7 +105,7 @@ jobs:
       - name: Build
         run: |
           git -C "$GITHUB_WORKSPACE" submodule sync
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index d3580f4c4d7..257dea65693 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -272,8 +272,8 @@ jobs:
           fetch-depth: 0  # for performance artifact
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -315,8 +315,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -360,8 +360,8 @@ jobs:
           fetch-depth: 0  # for performance artifact
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -403,8 +403,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -446,8 +446,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -489,8 +489,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -532,8 +532,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -575,8 +575,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -621,8 +621,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -664,8 +664,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -707,8 +707,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -750,8 +750,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -793,8 +793,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -836,8 +836,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -879,8 +879,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -922,8 +922,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -965,8 +965,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -1157,7 +1157,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
+          RUN_BY_HASH_TOTAL=4
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1194,7 +1194,81 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestReleaseDatabaseReplicated2:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
+          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=2
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestReleaseDatabaseReplicated3:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_database_replicated
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (release, DatabaseReplicated)
+          REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=4
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1253,7 +1327,7 @@ jobs:
           docker ps --quiet | xargs --no-run-if-empty docker kill ||:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestReleaseS3:
+  FunctionalStatelessTestReleaseS3_0:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, func-tester]
     steps:
@@ -1265,6 +1339,45 @@ jobs:
           CHECK_NAME=Stateless tests (release, s3 storage)
           REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
           KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=0
+          RUN_BY_HASH_TOTAL=2
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestReleaseS3_1:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_s3_storage
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (release, s3 storage)
+          REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=1
+          RUN_BY_HASH_TOTAL=2
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1301,7 +1414,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1338,7 +1451,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1375,7 +1488,118 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestS3Debug3:
+    needs: [BuilderDebDebug]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (debug, s3 storage)
+          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestS3Debug4:
+    needs: [BuilderDebDebug]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (debug, s3 storage)
+          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=4
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestS3Debug5:
+    needs: [BuilderDebDebug]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (debug, s3 storage)
+          REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=5
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1412,7 +1636,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1449,7 +1673,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1486,7 +1710,81 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestS3Tsan3:
+    needs: [BuilderDebTsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (tsan, s3 storage)
+          REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=5
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestS3Tsan4:
+    needs: [BuilderDebTsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (tsan, s3 storage)
+          REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=4
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1558,7 +1856,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
+          RUN_BY_HASH_TOTAL=4
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1595,7 +1893,81 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestAsan2:
+    needs: [BuilderDebAsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (asan)
+          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=2
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestAsan3:
+    needs: [BuilderDebAsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (asan)
+          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=4
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1632,7 +2004,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1669,7 +2041,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1706,7 +2078,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1730,7 +2102,81 @@ jobs:
           docker ps --quiet | xargs --no-run-if-empty docker kill ||:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH"
-  FunctionalStatelessTestUBsan:
+  FunctionalStatelessTestTsan3:
+    needs: [BuilderDebTsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_tsan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (tsan)
+          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=5
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestTsan4:
+    needs: [BuilderDebTsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_tsan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (tsan)
+          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=4
+          RUN_BY_HASH_TOTAL=5
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestUBsan0:
     needs: [BuilderDebUBsan]
     runs-on: [self-hosted, func-tester]
     steps:
@@ -1742,6 +2188,45 @@ jobs:
           CHECK_NAME=Stateless tests (ubsan)
           REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
           KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=0
+          RUN_BY_HASH_TOTAL=2
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestUBsan1:
+    needs: [BuilderDebUBsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_ubsan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (ubsan)
+          REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=1
+          RUN_BY_HASH_TOTAL=2
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1778,7 +2263,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1815,7 +2300,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1852,7 +2337,118 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestMsan3:
+    needs: [BuilderDebMsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_memory
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (msan)
+          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestMsan4:
+    needs: [BuilderDebMsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_memory
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (msan)
+          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=4
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestMsan5:
+    needs: [BuilderDebMsan]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_memory
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (msan)
+          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=5
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1889,7 +2485,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1926,7 +2522,7 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -1963,7 +2559,81 @@ jobs:
           REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
           KILL_TIMEOUT=10800
           RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=5
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestDebug3:
+    needs: [BuilderDebDebug]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (debug)
+          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=5
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestDebug4:
+    needs: [BuilderDebDebug]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (debug)
+          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          RUN_BY_HASH_NUM=4
+          RUN_BY_HASH_TOTAL=5
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2681,7 +3351,7 @@ jobs:
           CHECK_NAME=Integration tests (asan)
           REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2717,7 +3387,7 @@ jobs:
           CHECK_NAME=Integration tests (asan)
           REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2753,7 +3423,115 @@ jobs:
           CHECK_NAME=Integration tests (asan)
           REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
           RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=3
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Integration test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 integration_test_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  IntegrationTestsAsan3:
+    needs: [BuilderDebAsan]
+    runs-on: [self-hosted, stress-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_asan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (asan)
+          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Integration test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 integration_test_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  IntegrationTestsAsan4:
+    needs: [BuilderDebAsan]
+    runs-on: [self-hosted, stress-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_asan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (asan)
+          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+          RUN_BY_HASH_NUM=4
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Integration test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 integration_test_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  IntegrationTestsAsan5:
+    needs: [BuilderDebAsan]
+    runs-on: [self-hosted, stress-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_asan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (asan)
+          REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+          RUN_BY_HASH_NUM=5
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2789,7 +3567,7 @@ jobs:
           CHECK_NAME=Integration tests (tsan)
           REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=4
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2825,7 +3603,7 @@ jobs:
           CHECK_NAME=Integration tests (tsan)
           REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=4
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2861,7 +3639,7 @@ jobs:
           CHECK_NAME=Integration tests (tsan)
           REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
           RUN_BY_HASH_NUM=2
-          RUN_BY_HASH_TOTAL=4
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2897,7 +3675,79 @@ jobs:
           CHECK_NAME=Integration tests (tsan)
           REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
           RUN_BY_HASH_NUM=3
-          RUN_BY_HASH_TOTAL=4
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Integration test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 integration_test_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  IntegrationTestsTsan4:
+    needs: [BuilderDebTsan]
+    runs-on: [self-hosted, stress-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (tsan)
+          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
+          RUN_BY_HASH_NUM=4
+          RUN_BY_HASH_TOTAL=6
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Integration test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 integration_test_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  IntegrationTestsTsan5:
+    needs: [BuilderDebTsan]
+    runs-on: [self-hosted, stress-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (tsan)
+          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
+          RUN_BY_HASH_NUM=5
+          RUN_BY_HASH_TOTAL=6
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2933,7 +3783,7 @@ jobs:
           CHECK_NAME=Integration tests (release)
           REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
           RUN_BY_HASH_NUM=0
-          RUN_BY_HASH_TOTAL=2
+          RUN_BY_HASH_TOTAL=4
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -2969,7 +3819,79 @@ jobs:
           CHECK_NAME=Integration tests (release)
           REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
           RUN_BY_HASH_NUM=1
-          RUN_BY_HASH_TOTAL=2
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Integration test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 integration_test_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  IntegrationTestsRelease2:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, stress-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_release
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (release)
+          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
+          RUN_BY_HASH_NUM=2
+          RUN_BY_HASH_TOTAL=4
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Integration test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 integration_test_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  IntegrationTestsRelease3:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, stress-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_release
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (release)
+          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
+          RUN_BY_HASH_NUM=3
+          RUN_BY_HASH_TOTAL=4
           EOF
       - name: Download json reports
         uses: actions/download-artifact@v2
@@ -3584,20 +4506,32 @@ jobs:
       - FunctionalStatelessTestDebug0
       - FunctionalStatelessTestDebug1
       - FunctionalStatelessTestDebug2
+      - FunctionalStatelessTestDebug3
+      - FunctionalStatelessTestDebug4
       - FunctionalStatelessTestRelease
       - FunctionalStatelessTestReleaseDatabaseReplicated0
       - FunctionalStatelessTestReleaseDatabaseReplicated1
+      - FunctionalStatelessTestReleaseDatabaseReplicated2
+      - FunctionalStatelessTestReleaseDatabaseReplicated3
       - FunctionalStatelessTestReleaseWideParts
       - FunctionalStatelessTestAarch64
       - FunctionalStatelessTestAsan0
       - FunctionalStatelessTestAsan1
+      - FunctionalStatelessTestAsan2
+      - FunctionalStatelessTestAsan3
       - FunctionalStatelessTestTsan0
       - FunctionalStatelessTestTsan1
       - FunctionalStatelessTestTsan2
+      - FunctionalStatelessTestTsan3
+      - FunctionalStatelessTestTsan4
       - FunctionalStatelessTestMsan0
       - FunctionalStatelessTestMsan1
       - FunctionalStatelessTestMsan2
-      - FunctionalStatelessTestUBsan
+      - FunctionalStatelessTestMsan3
+      - FunctionalStatelessTestMsan4
+      - FunctionalStatelessTestMsan5
+      - FunctionalStatelessTestUBsan0
+      - FunctionalStatelessTestUBsan1
       - FunctionalStatefulTestDebug
       - FunctionalStatefulTestRelease
       - FunctionalStatefulTestAarch64
@@ -3605,13 +4539,17 @@ jobs:
       - FunctionalStatefulTestTsan
       - FunctionalStatefulTestMsan
       - FunctionalStatefulTestUBsan
-      - FunctionalStatelessTestReleaseS3
+      - FunctionalStatelessTestReleaseS3_0
+      - FunctionalStatelessTestReleaseS3_1
       - FunctionalStatelessTestS3Debug0
       - FunctionalStatelessTestS3Debug1
       - FunctionalStatelessTestS3Debug2
+      - FunctionalStatelessTestS3Debug4
+      - FunctionalStatelessTestS3Debug5
       - FunctionalStatelessTestS3Tsan0
       - FunctionalStatelessTestS3Tsan1
       - FunctionalStatelessTestS3Tsan2
+      - FunctionalStatelessTestS3Tsan4
       - StressTestDebug
       - StressTestAsan
       - StressTestTsan
@@ -3625,12 +4563,19 @@ jobs:
       - IntegrationTestsAsan0
       - IntegrationTestsAsan1
       - IntegrationTestsAsan2
+      - IntegrationTestsAsan3
+      - IntegrationTestsAsan4
+      - IntegrationTestsAsan5
       - IntegrationTestsRelease0
       - IntegrationTestsRelease1
+      - IntegrationTestsRelease2
+      - IntegrationTestsRelease3
       - IntegrationTestsTsan0
       - IntegrationTestsTsan1
       - IntegrationTestsTsan2
       - IntegrationTestsTsan3
+      - IntegrationTestsTsan4
+      - IntegrationTestsTsan5
       - PerformanceComparisonX86-0
       - PerformanceComparisonX86-1
       - PerformanceComparisonX86-2
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 8148905cec7..bf35ca76fc6 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -136,8 +136,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -178,8 +178,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -220,8 +220,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -263,8 +263,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -306,8 +306,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -349,8 +349,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -392,8 +392,8 @@ jobs:
         uses: actions/checkout@v2
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -437,8 +437,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@@ -482,8 +482,8 @@ jobs:
           fetch-depth: 0 # otherwise we will have no info about contributors
       - name: Build
         run: |
-          git -C "$GITHUB_WORKSPACE" submodule sync --recursive
-          git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
+          git -C "$GITHUB_WORKSPACE" submodule sync
+          git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml
index a9172a8a2e2..e03e5c543c2 100644
--- a/.github/workflows/tags_stable.yml
+++ b/.github/workflows/tags_stable.yml
@@ -38,7 +38,7 @@ jobs:
       with:
         ref: master
         fetch-depth: 0
-    - name: Generate versions
+    - name: Update versions, docker version, changelog, security
       env:
         GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
       run: |
@@ -51,6 +51,7 @@ jobs:
                 --gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \
                 --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}"
         git add "./docs/changelogs/${GITHUB_TAG}.md"
+        python ./utils/security-generator/generate_security.py > SECURITY.md
         git diff HEAD
     - name: Create Pull Request
       uses: peter-evans/create-pull-request@v3
diff --git a/.gitmodules b/.gitmodules
index 293029ad171..618cfe6e76b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -290,3 +290,6 @@
 [submodule "contrib/morton-nd"]
 	path = contrib/morton-nd
 	url = https://github.com/morton-nd/morton-nd
+[submodule "contrib/xxHash"]
+	path = contrib/xxHash
+	url = https://github.com/Cyan4973/xxHash.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68767612892..0e41894b8bd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
 ### Table of Contents
+**[ClickHouse release v22.11, 2022-11-17](#2211)**<br/>
 **[ClickHouse release v22.10, 2022-10-25](#2210)**<br/>
 **[ClickHouse release v22.9, 2022-09-22](#229)**<br/>
 **[ClickHouse release v22.8-lts, 2022-08-18](#228)**<br/>
@@ -11,6 +12,109 @@
 **[ClickHouse release v22.1, 2022-01-18](#221)**<br/>
 **[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br/>
 
+### <a id="2211"></a> ClickHouse release 22.11, 2022-11-17
+
+#### Backward Incompatible Change
+* `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)).
+
+#### New Feature
+* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). This is controlled by the `insert_keeper_max_retries` setting, which is disabled by default. [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)).
+* Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)).
+* Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)).
+* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In the case where the types of Intervals are different, they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Added `**` glob support for recursive directory traversal of the filesystem and S3. Resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Introduce `s3_plain` disk type for write-once-read-many operations. Implement `ATTACH` of `MergeTree` table for `s3_plain` disk. [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)).
+* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)).
+* Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)).
+* Add function `ascii` like in Apache Spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)).
+* Add function `positive_modulo` (`pmod`) which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)).
+* Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)).
+* Add function `randCanonical`, which is similar to the `rand` function in Apache Spark or Impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)).
+* Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)).
+* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add generic implementation for arbitrary structured named collections, access type and `system.named_collections`. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
+#### Performance Improvement
+* Parallelized merging of `uniqExact` states for aggregation without key, i.e. queries like `SELECT uniqExact(number) FROM table`. The improvement becomes noticeable when the number of unique keys approaches 10^6. Also `uniq` performance is slightly optimized. [#43072](https://github.com/ClickHouse/ClickHouse/pull/43072) ([Nikita Taranov](https://github.com/nickitat)).
+* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)).
+* Speed up AND and OR operators when they are sequenced. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
+* Support parallel parsing for `LineAsString` input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)).
+* ClickHouse Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)).
+* A condition like `NOT LIKE 'prefix%'` can use the primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Experimental Feature
+* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)).
+* Ignore MySQL binlog SAVEPOINT event for MaterializedMySQL. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)).
+
+#### Improvement
+* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so that the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)).
+* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode. Resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)).
+* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)).
+* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to STDERR. It allows getting progress even if STDERR is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add support for `FixedString` input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)).
+* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)).
+* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)).
+* Refactor function `tokens` to enable max tokens returned for related functions (disabled by default). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)).
+* Allow to use `Date32` arguments for `formatDateTime` and `FROM_UNIXTIME` functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)).
+* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)).
+* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Increase the size of upload part exponentially for backup to S3 to avoid errors about max 10 000 parts limit of the multipart upload to s3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)).
+* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)).
+* Add `oss` function and `OSS` table engine (this is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)).
+* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Modify the `INFORMATION_SCHEMA` tables in a way so that ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
+* Add some functions for compatibility with PowerBI, when it connects using MySQL protocol [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
+* Better usability for Dashboard on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)).
+
+#### Build/Testing/Packaging Improvement
+* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)).
+* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)).
+* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)).
+* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Remove some libraries from Ubuntu Docker image. [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Updated normaliser to clone the alias ast. Resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Fix race for backup of tables in `Lazy` databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix for `skip_unavailable_shards`: it did not work with the `s3Cluster` table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)).
+* Fix schema inference in `s3Cluster` and improvement in `hdfsCluster`. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix retries while reading from URL table engines / table function. (retriable errors could be retries more times than needed, non-retriable errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* A segmentation fault related to DNS & c-ares has been reported and fixed. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)).
+* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)).
+* Fix typo in a setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix creating a Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)).
+* `(U)Int128` and `(U)Int256` values were correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix a bug in functions parser that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix the locking in `truncate table`. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)).
+* Fix possible crash in `web` disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)).
+* Fix stack-use-after-return under ASAN build in the Create User query parser. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix `lowerUTF8`/`upperUTF8` in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)).
+* Additional bound check was added to LZ4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix rare possible hang on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)).
+* Fix incorrect behavior with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)).
+* A null pointer will be generated when select if as from ‘three table join’ , For example, this SQL query: [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)).
+* Fix memory sanitizer report in Cluster Discovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)).
+* Improve DateTime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)).
+* Fix ATTACH TABLE in `PostgreSQL` database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)).
+* Fix several buffer over-reads in deserialization of carefully crafted aggregate function states. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix function `if` in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix decimal math overflow in parsing DateTime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)).
+* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fixed unexpected behaviour of `Interval` types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)).
+
 ### <a id="2210"></a> ClickHouse release 22.10, 2022-10-26
 
 #### Backward Incompatible Change
@@ -570,7 +674,7 @@
 * Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)).
 * Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)).
 * Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)).
-* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)).
+* Remove subprocess run for Kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)).
 * * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)).
 * Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)).
 * Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)).
@@ -850,8 +954,8 @@
 
 #### Upgrade Notes
 
-* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL.
+* Now, background merges, mutations, and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes to the metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant in this area. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL.
 * `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB.
 * Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d10bc63c15e..06e6f943fd3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -442,8 +442,9 @@ elseif (OS_DARWIN)
     include(cmake/darwin/default_libs.cmake)
 elseif (OS_FREEBSD)
     include(cmake/freebsd/default_libs.cmake)
+else()
+    link_libraries(global-group)
 endif ()
-link_libraries(global-group)
 
 if (NOT (OS_LINUX OR OS_DARWIN))
     # Using system libs can cause a lot of warnings in includes (on macro expansion).
@@ -592,7 +593,7 @@ add_subdirectory (programs)
 add_subdirectory (tests)
 add_subdirectory (utils)
 
-include (cmake/sanitize_target_link_libraries.cmake)
+include (cmake/sanitize_targets.cmake)
 
 # Build native targets if necessary
 get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
diff --git a/README.md b/README.md
index f90df9686c2..4f2483097d6 100644
--- a/README.md
+++ b/README.md
@@ -17,5 +17,7 @@ ClickHouse® is an open-source column-oriented database management system that a
 
 ## Upcoming events
 * [**v22.11 Release Webinar**](https://clickhouse.com/company/events/v22-11-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
-* [**ClickHouse Meetup at the Deutsche Bank office in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/289311596/) Hear from Deutsche Bank on why they chose ClickHouse for big sensitive data in a regulated environment. The ClickHouse team will then present how ClickHouse is used for real time financial data analytics, including tick data, trade analytics and risk management.
-* [**AWS re:Invent**](https://clickhouse.com/company/events/aws-reinvent) Core members of the ClickHouse team -- including 2 of our founders -- will be at re:Invent from November 29 to December 3. We are available on the show floor, but are also determining interest in holding an event during the time there. 
+* [**ClickHosue Meetup at the RELEX Solutions office in Stockholm**](https://www.meetup.com/clickhouse-stockholm-user-group/events/289492084/) - Dec 1 - Formulate by RELEX is a Swedish promotion planning and analytics company. They will share why they chose ClickHouse for their real time analytics and forecasting solution. The ClickHouse team will then present how ClickHouse is used for real time financial data analytics, including tick data, trade analytics and risk management.
+* [**ClickHouse Meetup at the Deutsche Bank office in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/289311596/) - Dec 5 - Hear from Deutsche Bank on why they chose ClickHouse for big sensitive data in a regulated environment. The ClickHouse team will then present how ClickHouse is used for real time financial data analytics, including tick data, trade analytics and risk management.
+* [**ClickHouse Meetup at the Rokt offices in Manhattan**](https://www.meetup.com/clickhouse-new-york-user-group/events/289403909/) - Dec 6 - We are very excited to be holding our next in-person ClickHouse meetup at the Rokt offices in Manhattan. Featuring talks from Bloomberg, Disney Streaming, Prequel, Rokt, and ClickHouse
+
diff --git a/SECURITY.md b/SECURITY.md
index 0fb333c8ea3..a4f431d7552 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,3 +1,6 @@
+<!--
+the file is autogenerated by utils/security-generator/generate_security.py
+-->
 
 # Security Policy
 
@@ -10,6 +13,7 @@ The following versions of ClickHouse server are currently being supported with s
 
 | Version | Supported |
 |:-|:-|
+| 22.11 | ✔️ |
 | 22.10 | ✔️ |
 | 22.9 | ✔️ |
 | 22.8 | ✔️ |
@@ -61,5 +65,5 @@ As the security issue moves from triage, to identified fix, to release planning
 
 ## Public Disclosure Timing
 
-A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. 
+A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days.
 
diff --git a/base/base/bit_cast.h b/base/base/bit_cast.h
index b2b6915764d..8198991e98e 100644
--- a/base/base/bit_cast.h
+++ b/base/base/bit_cast.h
@@ -12,7 +12,21 @@
 template <typename To, typename From>
 std::decay_t<To> bit_cast(const From & from)
 {
+    /**
+     * Assume the source value is 0xAABBCCDD (i.e. sizeof(from) == 4).
+     * Its BE representation is 0xAABBCCDD, the LE representation is 0xDDCCBBAA.
+     * Further assume, sizeof(res) == 8 and that res is initially zeroed out.
+     * With LE, the result after bit_cast will be 0xDDCCBBAA00000000 --> input value == output value.
+     * With BE, the result after bit_cast will be 0x00000000AABBCCDD --> input value == output value.
+     */
     To res {};
-    memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
+    if constexpr (std::endian::native == std::endian::little)
+      memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
+    else
+    {
+      uint32_t offset_to = (sizeof(res) > sizeof(from)) ? (sizeof(res) - sizeof(from)) : 0;
+      uint32_t offset_from = (sizeof(from) > sizeof(res)) ? (sizeof(from) - sizeof(res)) : 0;
+      memcpy(reinterpret_cast<char *>(&res) + offset_to, reinterpret_cast<const char *>(&from) + offset_from, std::min(sizeof(res), sizeof(from)));
+    }
     return res;
 }
diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c
index d10bc6ba723..bae03ad590a 100644
--- a/base/glibc-compatibility/glibc-compatibility.c
+++ b/base/glibc-compatibility/glibc-compatibility.c
@@ -220,13 +220,13 @@ struct statx {
 	uint32_t stx_dev_minor;
 	uint64_t spare[14];
 };
-#endif
 
 int statx(int fd, const char *restrict path, int flag,
                  unsigned int mask, struct statx *restrict statxbuf)
 {
 	return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
 }
+#endif
 
 
 #include <syscall.h>
diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 11b37f5a7c8..d06d3918612 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
 
 # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54468)
+SET(VERSION_REVISION 54469)
 SET(VERSION_MAJOR 22)
-SET(VERSION_MINOR 11)
+SET(VERSION_MINOR 12)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 98ab5a3c189232ea2a3dddb9d2be7196ae8b3434)
-SET(VERSION_DESCRIBE v22.11.1.1-testing)
-SET(VERSION_STRING 22.11.1.1)
+SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77)
+SET(VERSION_DESCRIBE v22.12.1.1-testing)
+SET(VERSION_STRING 22.12.1.1)
 # end of autochange
diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake
index 1f92663a4b9..3e6e4907a71 100644
--- a/cmake/darwin/default_libs.cmake
+++ b/cmake/darwin/default_libs.cmake
@@ -23,6 +23,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
 include (cmake/cxx.cmake)
+link_libraries(global-group)
 
 target_link_libraries(global-group INTERFACE
     $<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake
index 65d5f0511d9..3e1f22ef2e4 100644
--- a/cmake/freebsd/default_libs.cmake
+++ b/cmake/freebsd/default_libs.cmake
@@ -24,6 +24,7 @@ find_package(Threads REQUIRED)
 
 include (cmake/unwind.cmake)
 include (cmake/cxx.cmake)
+link_libraries(global-group)
 
 target_link_libraries(global-group INTERFACE
     $<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index 21bead7020c..23c5fc3e14f 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -34,6 +34,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
+include (cmake/unwind.cmake)
+include (cmake/cxx.cmake)
+
+# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies
+# which are ok with static libraries but not with dynamic ones
+link_libraries(global-group)
+
 if (NOT OS_ANDROID)
     if (NOT USE_MUSL)
         # Our compatibility layer doesn't build under Android, many errors in musl.
@@ -42,9 +49,6 @@ if (NOT OS_ANDROID)
     add_subdirectory(base/harmful)
 endif ()
 
-include (cmake/unwind.cmake)
-include (cmake/cxx.cmake)
-
 target_link_libraries(global-group INTERFACE
     -Wl,--start-group
     $<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
diff --git a/cmake/sanitize_target_link_libraries.cmake b/cmake/sanitize_targets.cmake
similarity index 65%
rename from cmake/sanitize_target_link_libraries.cmake
rename to cmake/sanitize_targets.cmake
index d66ea338a52..8f61da2009d 100644
--- a/cmake/sanitize_target_link_libraries.cmake
+++ b/cmake/sanitize_targets.cmake
@@ -1,3 +1,13 @@
+# https://stackoverflow.com/a/62311397/328260
+macro (get_all_targets_recursive targets dir)
+    get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
+    foreach (subdir ${subdirectories})
+        get_all_targets_recursive (${targets} ${subdir})
+    endforeach ()
+    get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
+    list (APPEND ${targets} ${current_targets})
+endmacro ()
+
 # When you will try to link target with the directory (that exists), cmake will
 # skip this without an error, only the following warning will be reported:
 #
@@ -18,23 +28,12 @@
 #   -- but cannot be used with link_libraries()
 # - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize
 #   -- this will work.
-
-# https://stackoverflow.com/a/62311397/328260
 function (get_all_targets var)
     set (targets)
     get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR})
     set (${var} ${targets} PARENT_SCOPE)
 endfunction()
-macro (get_all_targets_recursive targets dir)
-    get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
-    foreach (subdir ${subdirectories})
-        get_all_targets_recursive (${targets} ${subdir})
-    endforeach ()
-    get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
-    list (APPEND ${targets} ${current_targets})
-endmacro ()
-
-macro (sanitize_link_libraries target)
+function (sanitize_link_libraries target)
     get_target_property(target_type ${target} TYPE)
     if (${target_type} STREQUAL "INTERFACE_LIBRARY")
         get_property(linked_libraries TARGET ${target} PROPERTY INTERFACE_LINK_LIBRARIES)
@@ -48,9 +47,35 @@ macro (sanitize_link_libraries target)
             message(FATAL_ERROR "${target} requested to link with directory: ${linked_library}")
         endif()
     endforeach()
-endmacro()
-
+endfunction()
 get_all_targets (all_targets)
 foreach (target ${all_targets})
     sanitize_link_libraries(${target})
 endforeach()
+
+#
+# Do not allow to define -W* from contrib publically (INTERFACE/PUBLIC).
+#
+function (get_contrib_targets var)
+    set (targets)
+    get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}/contrib)
+    set (${var} ${targets} PARENT_SCOPE)
+endfunction()
+function (sanitize_interface_flags target)
+    get_target_property(target_type ${target} TYPE)
+    get_property(compile_definitions TARGET ${target} PROPERTY INTERFACE_COMPILE_DEFINITIONS)
+    get_property(compile_options TARGET ${target} PROPERTY INTERFACE_COMPILE_OPTIONS)
+    if (NOT "${compile_options}" STREQUAL "")
+        message(FATAL_ERROR "${target} set INTERFACE_COMPILE_OPTIONS to ${compile_options}. This is forbidden.")
+    endif()
+    if ("${compile_definitions}" MATCHES "-Wl,")
+        # linker option - OK
+    elseif ("${compile_definitions}" MATCHES "-W")
+        message(FATAL_ERROR "${target} contains ${compile_definitions} flags in INTERFACE_COMPILE_DEFINITIONS. This is forbidden.")
+    endif()
+endfunction()
+get_contrib_targets (contrib_targets)
+foreach (contrib_target ${contrib_targets})
+    sanitize_interface_flags(${contrib_target})
+endforeach()
+
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 8ebd4ab55d3..ec7382846c2 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -167,7 +167,9 @@ add_contrib (c-ares-cmake c-ares)
 add_contrib (qpl-cmake qpl)
 add_contrib (morton-nd-cmake morton-nd)
 
-add_contrib(annoy-cmake annoy)
+add_contrib (annoy-cmake annoy)
+
+add_contrib (xxHash-cmake xxHash)
 
 # Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
 # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt
index 53c6ff58f83..8dc154e9d91 100644
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@@ -57,7 +57,7 @@ add_library(cxx ${SRCS})
 set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
 
 target_include_directories(cxx SYSTEM BEFORE PRIVATE $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/src>)
-target_include_directories(cxx SYSTEM BEFORE PUBLIC  $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
+target_include_directories(cxx SYSTEM BEFORE PUBLIC  $<$<COMPILE_LANGUAGE:CXX>:$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>>)
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
 
 # Enable capturing stack traces for all exceptions.
diff --git a/contrib/xxHash b/contrib/xxHash
new file mode 160000
index 00000000000..3078dc6039f
--- /dev/null
+++ b/contrib/xxHash
@@ -0,0 +1 @@
+Subproject commit 3078dc6039f8c0bffcb1904f81cfe6b2c3209435
diff --git a/contrib/xxHash-cmake/CMakeLists.txt b/contrib/xxHash-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..314094e9523
--- /dev/null
+++ b/contrib/xxHash-cmake/CMakeLists.txt
@@ -0,0 +1,13 @@
+set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/xxHash")
+set (SRCS
+    "${LIBRARY_DIR}/xxhash.c"
+)
+
+add_library(xxHash ${SRCS})
+target_include_directories(xxHash SYSTEM BEFORE INTERFACE "${LIBRARY_DIR}")
+
+# XXH_INLINE_ALL - Make all functions inline, with implementations being directly included within xxhash.h. Inlining functions is beneficial for speed on small keys.
+# https://github.com/Cyan4973/xxHash/tree/v0.8.1#build-modifiers
+target_compile_definitions(xxHash PUBLIC XXH_INLINE_ALL)
+
+add_library(ch_contrib::xxHash ALIAS xxHash)
diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 06c3c0d80f0..b3da09facda 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -6,29 +6,24 @@ FROM clickhouse/test-util:$FROM_TAG
 # Rust toolchain and libraries
 ENV RUSTUP_HOME=/rust/rustup
 ENV CARGO_HOME=/rust/cargo
-RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
-RUN chmod 777 -R /rust
 ENV PATH="/rust/cargo/env:${PATH}"
 ENV PATH="/rust/cargo/bin:${PATH}"
-RUN rustup target add aarch64-unknown-linux-gnu && \
-        rustup target add x86_64-apple-darwin && \
-        rustup target add x86_64-unknown-freebsd && \
-        rustup target add aarch64-apple-darwin && \
-        rustup target add powerpc64le-unknown-linux-gnu
-RUN apt-get install \
+RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
+    chmod 777 -R /rust && \
+    rustup target add aarch64-unknown-linux-gnu && \
+    rustup target add x86_64-apple-darwin && \
+    rustup target add x86_64-unknown-freebsd && \
+    rustup target add aarch64-apple-darwin && \
+    rustup target add powerpc64le-unknown-linux-gnu
+
+RUN apt-get update && \
+    apt-get install --yes \
         gcc-aarch64-linux-gnu \
         build-essential \
         libc6 \
         libc6-dev \
-        libc6-dev-arm64-cross \
-        --yes
-
-# Install CMake 3.20+ for Rust compilation
-# Used https://askubuntu.com/a/1157132 as reference
-RUN apt purge cmake --yes
-RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
-RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
-RUN apt update && apt install cmake --yes
+        libc6-dev-arm64-cross && \
+    apt-get clean
 
 ENV CC=clang-${LLVM_VERSION}
 ENV CXX=clang++-${LLVM_VERSION}
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 8f1cf6ee98b..b717cec2d33 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="22.10.2.11"
+ARG VERSION="22.11.1.1360"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index f50160321e1..8a5dc04681e 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="22.10.2.11"
+ARG VERSION="22.11.1.1360"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index de9125d565b..b4d3405bfd9 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -137,6 +137,7 @@ function clone_submodules
             contrib/hashidsxx
             contrib/c-ares
             contrib/morton-nd
+            contrib/xxHash
         )
 
         git submodule sync
diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile
index eb4b09c173f..aa71074c02a 100644
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@@ -38,7 +38,7 @@ COPY * /
 SHELL ["/bin/bash", "-c"]
 CMD set -o pipefail \
     && cd /workspace \
-    && /run-fuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
+    && timeout -s 9 1h /run-fuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
 
 # docker run --network=host --volume <workspace>:/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/fuzzer
 
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index dbb56b258ed..bd539ca978b 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031
+# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031,SC2010,SC2015
 
 set -x
 
@@ -10,11 +10,6 @@ set -e
 set -u
 set -o pipefail
 
-trap "exit" INT TERM
-# The watchdog is in the separate process group, so we have to kill it separately
-# if the script terminates earlier.
-trap 'kill $(jobs -pr) ${watchdog_pid:-} ||:' EXIT
-
 stage=${stage:-}
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 echo "$script_dir"
@@ -110,26 +105,6 @@ function configure
 EOL
 }
 
-function watchdog
-{
-    sleep 1800
-
-    echo "Fuzzing run has timed out"
-    for _ in {1..10}
-    do
-        # Only kill by pid the particular client that runs the fuzzing, or else
-        # we can kill some clickhouse-client processes this script starts later,
-        # e.g. for checking server liveness.
-        if ! kill $fuzzer_pid
-        then
-            break
-        fi
-        sleep 1
-    done
-
-    kill -9 -- $fuzzer_pid ||:
-}
-
 function filter_exists_and_template
 {
     local path
@@ -175,8 +150,6 @@ function fuzz
 
     mkdir -p /var/run/clickhouse-server
 
-    # interferes with gdb
-    export CLICKHOUSE_WATCHDOG_ENABLE=0
     # NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server
     clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db  2>&1 | pigz > server.log.gz &
     server_pid=$!
@@ -214,7 +187,7 @@ detach
 quit
 " > script.gdb
 
-    gdb -batch -command script.gdb -p $server_pid  &
+    gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" &
     sleep 5
     # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
     time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
@@ -236,7 +209,7 @@ quit
     # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
     # SC2046: Quote this to prevent word splitting. Actually I need word splitting.
     # shellcheck disable=SC2012,SC2046
-    clickhouse-client \
+    timeout -s TERM --preserve-status 30m clickhouse-client \
         --receive_timeout=10 \
         --receive_data_timeout_ms=10000 \
         --stacktrace \
@@ -249,16 +222,6 @@ quit
     fuzzer_pid=$!
     echo "Fuzzer pid is $fuzzer_pid"
 
-    # Start a watchdog that should kill the fuzzer on timeout.
-    # The shell won't kill the child sleep when we kill it, so we have to put it
-    # into a separate process group so that we can kill them all.
-    set -m
-    watchdog &
-    watchdog_pid=$!
-    set +m
-    # Check that the watchdog has started.
-    kill -0 $watchdog_pid
-
     # Wait for the fuzzer to complete.
     # Note that the 'wait || ...' thing is required so that the script doesn't
     # exit because of 'set -e' when 'wait' returns nonzero code.
@@ -266,8 +229,6 @@ quit
     wait "$fuzzer_pid" || fuzzer_exit_code=$?
     echo "Fuzzer exit code is $fuzzer_exit_code"
 
-    kill -- -$watchdog_pid ||:
-
     # If the server dies, most often the fuzzer returns code 210: connetion
     # refused, and sometimes also code 32: attempt to read after eof. For
     # simplicity, check again whether the server is accepting connections, using
@@ -333,6 +294,8 @@ quit
         pigz core.*
         mv core.*.gz core.gz
     fi
+
+    dmesg -T | grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' && echo "OOM in dmesg" ||:
 }
 
 case "$stage" in
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 78f627bf45e..50e87f56732 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -131,7 +131,14 @@ function stop()
     # Preserve the pid, since the server can hung after the PID will be deleted.
     pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
 
-    clickhouse stop --do-not-kill && return
+    # --max-tries is supported only since 22.12
+    if dpkg --compare-versions "$(clickhouse local -q 'select version()')" ge "22.12"; then
+        # Increase default waiting timeout for sanitizers and debug builds
+        clickhouse stop --max-tries 180 --do-not-kill && return
+    else
+        clickhouse stop --do-not-kill && return
+    fi
+
     # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
     kill -TERM "$(pidof gdb)" ||:
     sleep 5
@@ -254,7 +261,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
 
 start
 
-./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" \
+./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
     && echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \
     || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv
 
@@ -388,6 +395,11 @@ else
     rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
     rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:
 
+    # Turn on after 22.12
+    rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||:
+    # it uses recently introduced settings which previous versions may not have
+    rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||:
+
     start
 
     clickhouse-client --query="SELECT 'Server version: ', version()"
@@ -448,11 +460,12 @@ else
     # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
     # NOTE  Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected
     #       ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part")
+    # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
     echo "Check for Error messages in server log:"
     zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
                -e "Code: 236. DB::Exception: Cancelled mutating parts" \
                -e "REPLICA_IS_ALREADY_ACTIVE" \
-               -e "REPLICA_IS_ALREADY_EXIST" \
+               -e "REPLICA_ALREADY_EXISTS" \
                -e "ALL_REPLICAS_LOST" \
                -e "DDLWorker: Cannot parse DDL task query" \
                -e "RaftInstance: failed to accept a rpc connection due to error 125" \
@@ -482,6 +495,7 @@ else
                -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
                -e "Code: 269. DB::Exception: Destination table is myself" \
                -e "Coordination::Exception: Connection loss" \
+               -e "MutateFromLogEntryTask" \
         /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
         && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
         || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile
index 57544bdc090..f1cf029e9a2 100644
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@@ -13,6 +13,7 @@ RUN apt-get update \
         apt-transport-https \
         apt-utils \
         ca-certificates \
+        curl \
         dnsutils \
         gnupg \
         iputils-ping \
@@ -24,10 +25,16 @@ RUN apt-get update \
     && echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
     && apt-key add /tmp/llvm-snapshot.gpg.key \
     && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
-    && echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
+    && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
         /etc/apt/sources.list \
     && apt-get clean
 
+# Install cmake 3.20+ for rust support
+# Used https://askubuntu.com/a/1157132 as reference
+RUN curl -s https://apt.kitware.com/keys/kitware-archive-latest.asc | \
+        gpg --dearmor - > /etc/apt/trusted.gpg.d/kitware.gpg && \
+    echo "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" >> /etc/apt/sources.list
+
 # initial packages
 RUN apt-get update \
     && apt-get install \
@@ -37,7 +44,6 @@ RUN apt-get update \
         clang-${LLVM_VERSION} \
         clang-tidy-${LLVM_VERSION} \
         cmake \
-        curl \
         fakeroot \
         gdb \
         git \
diff --git a/docs/changelogs/v22.11.1.1360-stable.md b/docs/changelogs/v22.11.1.1360-stable.md
new file mode 100644
index 00000000000..77ad54b4fd8
--- /dev/null
+++ b/docs/changelogs/v22.11.1.1360-stable.md
@@ -0,0 +1,249 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.11.1.1360-stable (0d211ed1984) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189)
+
+#### Backward Incompatible Change
+* JSONExtract family of functions will now attempt to coerce to the request type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)).
+
+#### New Feature
+* - Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)).
+* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)).
+* Add Hudi and DeltaLake table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do)).
+* Add 4LW command `csnp` for manually creating snapshots. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)).
+* Support for keeper request retries during insert into replicated merge trees. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)).
+* Add function ascii like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)).
+* Add function pmod which return non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)).
+* Published function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)).
+* Added S3 PUTs and GETs request per second rate throttling. Settings `s3_max_get_rps`, `s3_max_get_burst`, `s3_max_put_rps`, `s3_max_put_burst` are used to configure token bucket throttler. Can be used with both S3 ObjectStorage and S3 table function. Different limits can be configured for different S3 disks or endpoints. [#43014](https://github.com/ClickHouse/ClickHouse/pull/43014) ([Sergei Trifonov](https://github.com/serxa)).
+* Add table function hudi and deltaLake. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)).
+* Add function factorial, as in Impala or Spark. [#43110](https://github.com/ClickHouse/ClickHouse/pull/43110) ([李扬](https://github.com/taiyang-li)).
+* Add function randCanonical, which is similar to rand function in spark or impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)).
+
+#### Performance Improvement
+* Currently, the only saturable operators are And and Or, and their code paths are affected by this change. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
+* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)).
+* Fixed slowness in JSONExtract with LowCardinality(String) tuples. [#42761](https://github.com/ClickHouse/ClickHouse/pull/42761) ([AlfVII](https://github.com/AlfVII)).
+* Support parallel parsing for LineAsString input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)).
+* Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)).
+* Parallelized merging of `uniqExact` states for aggregation without a key, i.e. queries like `SELECT uniqExact(number) FROM table`. The improvement becomes noticeable when the number of unique keys approaches 10^6. Also `uniq` performance is slightly optimized. This closes [#4510](https://github.com/ClickHouse/ClickHouse/issues/4510). [#43072](https://github.com/ClickHouse/ClickHouse/pull/43072) ([Nikita Taranov](https://github.com/nickitat)).
+
+#### Improvement
+* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)).
+* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)).
+* ClickHouse Client and ClickHouse Local will show progress by default even in non-interactive mode. If `/dev/tty` is available, the progress will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)).
+* - Add `notLike` to key condition atom map, so condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)).
+* Add support for FixedString input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)).
+* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)).
+* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)).
+* Added ** glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Add a new variable call `limit` in query_info, indicating whether this query is a limit-trivial query. If so, we will adjust the approximate total rows for later estimation. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)).
+* Implement `ATTACH` of `MergeTree` table for `s3_plain` disk (plus some fixes for `s3_plain`). [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)).
+* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)).
+* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)).
+* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)).
+* Refactor FunctionTokens to enable max tokens returned for related functions(default disabled). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)).
+* Added new field allow_readonly in system.table_functions to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Allow to use Date32 arguments for formatDateTime and FROM_UNIXTIME functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)).
+* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)).
+* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Increase the size of upload part exponentially for backup to S3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)).
+* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)).
+* bugfix： [#42856](https://github.com/ClickHouse/ClickHouse/issues/42856) ignore Mysql binlog SAVEPOINT event. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)).
+* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Add generic implementation for arbitrary structured named collections, access type and system.named_collections. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* add oss function and StorageOSS (This is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)).
+* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Modify the `INFORMATION_SCHEMA` tables in a way so that now ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
+* Disable `deltaLake` and `hudi` table functions in readonly mode. [#43316](https://github.com/ClickHouse/ClickHouse/pull/43316) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### Bug Fix
+* Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Fix race for backup of tables in Lazy databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)).
+* fix skip_unavailable_shards does not work using s3Cluster table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)).
+
+#### Build/Testing/Packaging Improvement
+* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)).
+* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)).
+* use llvm `l64.lld` in macOS suppress ld warnings, close [#42282](https://github.com/ClickHouse/ClickHouse/issues/42282). [#42470](https://github.com/ClickHouse/ClickHouse/pull/42470) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
+* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)).
+* * Improve bugfix validation check: fix bug with skipping the check, port separate status in CI, run after check labels and style check. Close [#40349](https://github.com/ClickHouse/ClickHouse/issues/40349). [#42702](https://github.com/ClickHouse/ClickHouse/pull/42702) ([Vladimir C](https://github.com/vdimir)).
+* Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Fix schema inference in s3Cluster and improve in hdfsCluster. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix retries while reading from http table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} <Fatal> BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} <Fatal> BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} <Fatal> BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} <Fatal> BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} <Fatal> BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} <Fatal> BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} <Fatal> BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)).
+* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)).
+* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix create Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)).
+* `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)).
+* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)).
+* Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)).
+* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)).
+* * Fix incorrect saved_block_sample with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)).
+* A null pointer will be generated when select if as from ‘three table join’ , For example, the SQL:. [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)).
+* Fix memory sanitizer report in ClusterDiscovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)).
+* Fix datetime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)).
+* Fixes for s3_plain disk that will allow to attach Wide parts. [#42950](https://github.com/ClickHouse/ClickHouse/pull/42950) ([Azat Khuzhin](https://github.com/azat)).
+* Fix ATTACH TABLE in PostgreSQL database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)).
+* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)).
+* - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix function if in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix decimal math overflow in parsing datetime with 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)).
+* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fixed unexpected behaviour of Interval types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)).
+* * Fix logical error in `sumMap/minMap/maxMap` functions executing `TOTALS/ROLLUP/CUBE` on `NULL` values. Close [#43022](https://github.com/ClickHouse/ClickHouse/issues/43022). [#43232](https://github.com/ClickHouse/ClickHouse/pull/43232) ([Vladimir C](https://github.com/vdimir)).
+* - Fix ubsan in AggregateFunctionMinMaxAny::read with high sizes. [#43249](https://github.com/ClickHouse/ClickHouse/pull/43249) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix IS (NOT) NULL operator priority in regard to other operators. [#43265](https://github.com/ClickHouse/ClickHouse/pull/43265) ([Nikolay Degterinsky](https://github.com/evillique)).
+
+#### Build Improvement
+
+* ... Add support for format ipv6 on s390x. [#42412](https://github.com/ClickHouse/ClickHouse/pull/42412) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Sonar Cloud Workflow"'. [#42725](https://github.com/ClickHouse/ClickHouse/pull/42725) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert " Keeper retries during insert (clean)"'. [#43116](https://github.com/ClickHouse/ClickHouse/pull/43116) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* NO CL ENTRY:  'Revert "Revert " Keeper retries during insert (clean)""'. [#43122](https://github.com/ClickHouse/ClickHouse/pull/43122) ([Igor Nikonov](https://github.com/devcrafter)).
+* NO CL ENTRY:  'Revert "Optimize TTL merge, completely expired parts can be removed in time"'. [#43134](https://github.com/ClickHouse/ClickHouse/pull/43134) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* NO CL ENTRY:  'Revert "Randomize keeper fault injection settings in stress tests"'. [#43218](https://github.com/ClickHouse/ClickHouse/pull/43218) ([Alexander Gololobov](https://github.com/davenger)).
+* NO CL ENTRY:  'Revert "S3 request per second rate throttling"'. [#43306](https://github.com/ClickHouse/ClickHouse/pull/43306) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Better logging for docs builder [#41903](https://github.com/ClickHouse/ClickHouse/pull/41903) ([filimonov](https://github.com/filimonov)).
+* Save full server log in AST Fuzzer checks [#42316](https://github.com/ClickHouse/ClickHouse/pull/42316) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Build with libcxx(abi) 15 [#42513](https://github.com/ClickHouse/ClickHouse/pull/42513) ([Robert Schulze](https://github.com/rschu1ze)).
+* Sonar Cloud Workflow [#42534](https://github.com/ClickHouse/ClickHouse/pull/42534) ([Julio Jimenez](https://github.com/juliojimenez)).
+* Invalid type in where for Merge table (logical error) [#42576](https://github.com/ClickHouse/ClickHouse/pull/42576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix frequent memory drift message and clarify things in comments [#42582](https://github.com/ClickHouse/ClickHouse/pull/42582) ([Azat Khuzhin](https://github.com/azat)).
+* Add functions for PowerBI connect [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
+* Try to save `IDataPartStorage` interface [#42618](https://github.com/ClickHouse/ClickHouse/pull/42618) ([Anton Popov](https://github.com/CurtizJ)).
+* Remove Ubuntu cruft [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Analyzer change setting into allow_experimental_analyzer [#42649](https://github.com/ClickHouse/ClickHouse/pull/42649) ([Maksim Kita](https://github.com/kitaisreal)).
+* Analyzer IQueryTreeNode remove getName method [#42651](https://github.com/ClickHouse/ClickHouse/pull/42651) ([Maksim Kita](https://github.com/kitaisreal)).
+* Minor fix iotest_nonblock build [#42658](https://github.com/ClickHouse/ClickHouse/pull/42658) ([Jordi Villar](https://github.com/jrdi)).
+* Add tests and doc for some url-related functions [#42664](https://github.com/ClickHouse/ClickHouse/pull/42664) ([Vladimir C](https://github.com/vdimir)).
+* Update version_date.tsv and changelogs after v22.10.1.1875-stable [#42676](https://github.com/ClickHouse/ClickHouse/pull/42676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix error handling in clickhouse_helper.py [#42678](https://github.com/ClickHouse/ClickHouse/pull/42678) ([Ilya Yatsishin](https://github.com/qoega)).
+* Fix execution of version_helper.py to use git tweaks [#42679](https://github.com/ClickHouse/ClickHouse/pull/42679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* MergeTree indexes use RPNBuilderTree [#42681](https://github.com/ClickHouse/ClickHouse/pull/42681) ([Maksim Kita](https://github.com/kitaisreal)).
+* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Support optimize_syntax_fuse_functions for sum/count/avg via analyzer [#42689](https://github.com/ClickHouse/ClickHouse/pull/42689) ([Vladimir C](https://github.com/vdimir)).
+* Update version after release [#42699](https://github.com/ClickHouse/ClickHouse/pull/42699) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Update version_date.tsv and changelogs after v22.10.1.1877-stable [#42700](https://github.com/ClickHouse/ClickHouse/pull/42700) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* OrderByLimitByDuplicateEliminationPass improve performance [#42704](https://github.com/ClickHouse/ClickHouse/pull/42704) ([Maksim Kita](https://github.com/kitaisreal)).
+* Analyzer improve subqueries representation [#42705](https://github.com/ClickHouse/ClickHouse/pull/42705) ([Maksim Kita](https://github.com/kitaisreal)).
+* Update version_date.tsv and changelogs after v22.9.4.32-stable [#42712](https://github.com/ClickHouse/ClickHouse/pull/42712) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update version_date.tsv and changelogs after v22.8.7.34-lts [#42713](https://github.com/ClickHouse/ClickHouse/pull/42713) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update version_date.tsv and changelogs after v22.7.7.24-stable [#42714](https://github.com/ClickHouse/ClickHouse/pull/42714) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Move SonarCloud Job to nightly [#42718](https://github.com/ClickHouse/ClickHouse/pull/42718) ([Julio Jimenez](https://github.com/juliojimenez)).
+* Update version_date.tsv and changelogs after v22.8.8.3-lts [#42738](https://github.com/ClickHouse/ClickHouse/pull/42738) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Minor fix implicit cast CaresPTRResolver [#42747](https://github.com/ClickHouse/ClickHouse/pull/42747) ([Jordi Villar](https://github.com/jrdi)).
+* Fix build on master [#42752](https://github.com/ClickHouse/ClickHouse/pull/42752) ([Igor Nikonov](https://github.com/devcrafter)).
+* Update version_date.tsv and changelogs after v22.3.14.18-lts [#42759](https://github.com/ClickHouse/ClickHouse/pull/42759) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix anchor links [#42760](https://github.com/ClickHouse/ClickHouse/pull/42760) ([Sergei Trifonov](https://github.com/serxa)).
+* Update version_date.tsv and changelogs after v22.3.14.23-lts [#42764](https://github.com/ClickHouse/ClickHouse/pull/42764) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update README.md [#42783](https://github.com/ClickHouse/ClickHouse/pull/42783) ([Yuko Takagi](https://github.com/yukotakagi)).
+* Slightly better code with projections [#42794](https://github.com/ClickHouse/ClickHouse/pull/42794) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix some races in MergeTree [#42805](https://github.com/ClickHouse/ClickHouse/pull/42805) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix typo in comments [#42809](https://github.com/ClickHouse/ClickHouse/pull/42809) ([Gabriel](https://github.com/Gabriel39)).
+* Fix compilation of LLVM with cmake cache [#42816](https://github.com/ClickHouse/ClickHouse/pull/42816) ([Azat Khuzhin](https://github.com/azat)).
+* Fix link in docs [#42821](https://github.com/ClickHouse/ClickHouse/pull/42821) ([Sergei Trifonov](https://github.com/serxa)).
+* Link to proper place in docs [#42822](https://github.com/ClickHouse/ClickHouse/pull/42822) ([Sergei Trifonov](https://github.com/serxa)).
+* Fix argument type check in AggregateFunctionAnalysisOfVariance [#42823](https://github.com/ClickHouse/ClickHouse/pull/42823) ([Vladimir C](https://github.com/vdimir)).
+* Tests/lambda analyzer [#42824](https://github.com/ClickHouse/ClickHouse/pull/42824) ([Denny Crane](https://github.com/den-crane)).
+* Fix Missing Quotes - Sonar Nightly [#42831](https://github.com/ClickHouse/ClickHouse/pull/42831) ([Julio Jimenez](https://github.com/juliojimenez)).
+* Add exclusions from the Snyk scan [#42834](https://github.com/ClickHouse/ClickHouse/pull/42834) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix Missing Env Vars - Sonar Nightly [#42843](https://github.com/ClickHouse/ClickHouse/pull/42843) ([Julio Jimenez](https://github.com/juliojimenez)).
+* Fix typo [#42855](https://github.com/ClickHouse/ClickHouse/pull/42855) ([GoGoWen](https://github.com/GoGoWen)).
+* Add timezone to 02458_datediff_date32 [#42857](https://github.com/ClickHouse/ClickHouse/pull/42857) ([Vladimir C](https://github.com/vdimir)).
+* Adjust cancel and rerun workflow names to the actual [#42862](https://github.com/ClickHouse/ClickHouse/pull/42862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Analyzer subquery in JOIN TREE with aggregation [#42865](https://github.com/ClickHouse/ClickHouse/pull/42865) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix getauxval for sanitizer builds [#42866](https://github.com/ClickHouse/ClickHouse/pull/42866) ([Amos Bird](https://github.com/amosbird)).
+* Update version_date.tsv and changelogs after v22.10.2.11-stable [#42871](https://github.com/ClickHouse/ClickHouse/pull/42871) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Better usability for dashboard.html on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)).
+* Some fixes for ReplicatedMergeTree [#42878](https://github.com/ClickHouse/ClickHouse/pull/42878) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Validate Query Tree in debug [#42879](https://github.com/ClickHouse/ClickHouse/pull/42879) ([Dmitry Novik](https://github.com/novikd)).
+* changed type name for s3 plain storage [#42890](https://github.com/ClickHouse/ClickHouse/pull/42890) ([Aleksandr](https://github.com/AVMusorin)).
+* Cleanup implementation of regexpReplace(All|One) [#42907](https://github.com/ClickHouse/ClickHouse/pull/42907) ([Robert Schulze](https://github.com/rschu1ze)).
+* Do not show status for Bugfix validate check in non bugfix PRs [#42932](https://github.com/ClickHouse/ClickHouse/pull/42932) ([Vladimir C](https://github.com/vdimir)).
+* fix(typo): Passible -> Possible [#42933](https://github.com/ClickHouse/ClickHouse/pull/42933) ([Yakko Majuri](https://github.com/yakkomajuri)).
+* Pin the cryptography version to not break lambdas [#42934](https://github.com/ClickHouse/ClickHouse/pull/42934) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix: bad cast from type DB::ColumnLowCardinality to DB::ColumnString [#42937](https://github.com/ClickHouse/ClickHouse/pull/42937) ([Igor Nikonov](https://github.com/devcrafter)).
+* Attach thread pool for loading parts to the query [#42947](https://github.com/ClickHouse/ClickHouse/pull/42947) ([Azat Khuzhin](https://github.com/azat)).
+* Fix macOS M1 builds due to sprintf deprecation [#42962](https://github.com/ClickHouse/ClickHouse/pull/42962) ([Jordi Villar](https://github.com/jrdi)).
+* Less use of CH-specific bit_cast() [#42968](https://github.com/ClickHouse/ClickHouse/pull/42968) ([Robert Schulze](https://github.com/rschu1ze)).
+* Remove some utils [#42972](https://github.com/ClickHouse/ClickHouse/pull/42972) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix a bug in CAST function parser [#42980](https://github.com/ClickHouse/ClickHouse/pull/42980) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix old bug to remove `refs/head` from ref name [#42981](https://github.com/ClickHouse/ClickHouse/pull/42981) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add debug information to nightly builds [#42997](https://github.com/ClickHouse/ClickHouse/pull/42997) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add some guard rails around aggregation memory management [#42999](https://github.com/ClickHouse/ClickHouse/pull/42999) ([Raúl Marín](https://github.com/Algunenano)).
+* Add `on: workflow_call` to debug CI [#43000](https://github.com/ClickHouse/ClickHouse/pull/43000) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Analyzer added identifier typo corrections [#43002](https://github.com/ClickHouse/ClickHouse/pull/43002) ([Maksim Kita](https://github.com/kitaisreal)).
+* Simple fixes for restart replica description [#43004](https://github.com/ClickHouse/ClickHouse/pull/43004) ([Igor Nikonov](https://github.com/devcrafter)).
+* Cleanup match code [#43006](https://github.com/ClickHouse/ClickHouse/pull/43006) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix TSan errors (correctly ignore _exit interception) [#43009](https://github.com/ClickHouse/ClickHouse/pull/43009) ([Azat Khuzhin](https://github.com/azat)).
+* fix bandwidth throttlers initialization order [#43015](https://github.com/ClickHouse/ClickHouse/pull/43015) ([Sergei Trifonov](https://github.com/serxa)).
+* Add test for issue [#42520](https://github.com/ClickHouse/ClickHouse/issues/42520) [#43027](https://github.com/ClickHouse/ClickHouse/pull/43027) ([Robert Schulze](https://github.com/rschu1ze)).
+* Analyzer improve ARRAY JOIN with JOIN [#43048](https://github.com/ClickHouse/ClickHouse/pull/43048) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix projection part removal with zero-copy replication [#43060](https://github.com/ClickHouse/ClickHouse/pull/43060) ([alesapin](https://github.com/alesapin)).
+* Fix msan warning [#43065](https://github.com/ClickHouse/ClickHouse/pull/43065) ([Raúl Marín](https://github.com/Algunenano)).
+* Analyzer AST key condition crash fix [#43070](https://github.com/ClickHouse/ClickHouse/pull/43070) ([Maksim Kita](https://github.com/kitaisreal)).
+* Better logging for mark range filtering on projection parts [#43076](https://github.com/ClickHouse/ClickHouse/pull/43076) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix ub type punning [#43088](https://github.com/ClickHouse/ClickHouse/pull/43088) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Analyzer improve aliases support for table expressions [#43089](https://github.com/ClickHouse/ClickHouse/pull/43089) ([Maksim Kita](https://github.com/kitaisreal)).
+* Throw not implemented for window frame type 'groups' in analyzer [#43090](https://github.com/ClickHouse/ClickHouse/pull/43090) ([Vladimir C](https://github.com/vdimir)).
+* Disable clickhouse local and client non-interactive progress by default. [#43092](https://github.com/ClickHouse/ClickHouse/pull/43092) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Make error message after dropping current user more correct. [#43097](https://github.com/ClickHouse/ClickHouse/pull/43097) ([Vitaly Baranov](https://github.com/vitlibar)).
+* More stable test [#43102](https://github.com/ClickHouse/ClickHouse/pull/43102) ([alesapin](https://github.com/alesapin)).
+* Rewrite tests for memory overcommit [#43105](https://github.com/ClickHouse/ClickHouse/pull/43105) ([Dmitry Novik](https://github.com/novikd)).
+* Fix trailing \n from SQLancer status [#43114](https://github.com/ClickHouse/ClickHouse/pull/43114) ([Ilya Yatsishin](https://github.com/qoega)).
+* Fix `test_keeper_four_word_command::test_cmd_stat` [#43115](https://github.com/ClickHouse/ClickHouse/pull/43115) ([Antonio Andelic](https://github.com/antonio2368)).
+* Enable keeper fault injection for inserts in functional tests [#43117](https://github.com/ClickHouse/ClickHouse/pull/43117) ([Igor Nikonov](https://github.com/devcrafter)).
+* Analyzer aggregation crash fix [#43118](https://github.com/ClickHouse/ClickHouse/pull/43118) ([Maksim Kita](https://github.com/kitaisreal)).
+* Analyzer aggregation totals crash fix [#43119](https://github.com/ClickHouse/ClickHouse/pull/43119) ([Maksim Kita](https://github.com/kitaisreal)).
+* Improve commit_status_helper.py [#43121](https://github.com/ClickHouse/ClickHouse/pull/43121) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Skip hash logging on sanitizer builds [#43129](https://github.com/ClickHouse/ClickHouse/pull/43129) ([Raúl Marín](https://github.com/Algunenano)).
+* Analyzer improve JOIN with constants [#43141](https://github.com/ClickHouse/ClickHouse/pull/43141) ([Maksim Kita](https://github.com/kitaisreal)).
+* Remove POCO_CLICKHOUSE_PATCH [#43146](https://github.com/ClickHouse/ClickHouse/pull/43146) ([Azat Khuzhin](https://github.com/azat)).
+* Update CompressionCodecDeflateQpl.cpp [#43150](https://github.com/ClickHouse/ClickHouse/pull/43150) ([Tiaonmmn](https://github.com/Tiaonmmn)).
+* Randomize keeper fault injection settings in stress tests [#43187](https://github.com/ClickHouse/ClickHouse/pull/43187) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix for missing columns bug with projections an ALTER UPDATE [#43189](https://github.com/ClickHouse/ClickHouse/pull/43189) ([Alexander Gololobov](https://github.com/davenger)).
+* A workaround for LLVM bug, https://github.com/llvm/llvm-project/issues/58633 [#43195](https://github.com/ClickHouse/ClickHouse/pull/43195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Stop `ConfigReloader` first to avoid data race [#43201](https://github.com/ClickHouse/ClickHouse/pull/43201) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix typo [#43203](https://github.com/ClickHouse/ClickHouse/pull/43203) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Miscellaneous changes [#43206](https://github.com/ClickHouse/ClickHouse/pull/43206) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix flaky 02449_check_dependencies_and_table_shutdown [#43212](https://github.com/ClickHouse/ClickHouse/pull/43212) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add test to check [#43167](https://github.com/ClickHouse/ClickHouse/issues/43167) for all builds [#43216](https://github.com/ClickHouse/ClickHouse/pull/43216) ([Ilya Yatsishin](https://github.com/qoega)).
+* Don't throw if shared ID already created in `StorageReplicatedMergeTree` [#43244](https://github.com/ClickHouse/ClickHouse/pull/43244) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix nullptr dereference in collectScopeValidIdentifiersForTypoCorrection [#43245](https://github.com/ClickHouse/ClickHouse/pull/43245) ([Vladimir C](https://github.com/vdimir)).
+* Better message in wait_zookeeper_to_start [#43256](https://github.com/ClickHouse/ClickHouse/pull/43256) ([Vladimir C](https://github.com/vdimir)).
+* Make test_global_overcommit_tracker non-parallel [#43266](https://github.com/ClickHouse/ClickHouse/pull/43266) ([Dmitry Novik](https://github.com/novikd)).
+* Rename canonicalRand to randCanonical [#43283](https://github.com/ClickHouse/ClickHouse/pull/43283) ([Nikita Taranov](https://github.com/nickitat)).
+* check limits for an AST in select parser fuzzer [#43285](https://github.com/ClickHouse/ClickHouse/pull/43285) ([Sema Checherinda](https://github.com/CheSema)).
+* Allow autoremoval of old parts if detach_not_byte_identical_parts enabled [#43287](https://github.com/ClickHouse/ClickHouse/pull/43287) ([filimonov](https://github.com/filimonov)).
+* `pmod`: compatibility with Spark, better documentation [#43313](https://github.com/ClickHouse/ClickHouse/pull/43313) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
diff --git a/docs/changelogs/v22.8.9.24-lts.md b/docs/changelogs/v22.8.9.24-lts.md
new file mode 100644
index 00000000000..e1f4c2bcdf0
--- /dev/null
+++ b/docs/changelogs/v22.8.9.24-lts.md
@@ -0,0 +1,31 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.8.9.24-lts (a1b69551d40) FIXME as compared to v22.8.8.3-lts (ac5a6cababc)
+
+#### Performance Improvement
+* Backported in [#43012](https://github.com/ClickHouse/ClickHouse/issues/43012): Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### Improvement
+* Backported in [#42840](https://github.com/ClickHouse/ClickHouse/issues/42840): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#42964](https://github.com/ClickHouse/ClickHouse/issues/42964): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#43040](https://github.com/ClickHouse/ClickHouse/issues/43040): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#42720](https://github.com/ClickHouse/ClickHouse/issues/42720): Fixed `Unknown identifier (aggregate-function)` exception which appears when a user tries to calculate WINDOW ORDER BY/PARTITION BY expressions over aggregate functions: ``` CREATE TABLE default.tenk1 ( `unique1` Int32, `unique2` Int32, `ten` Int32 ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192; SELECT ten, sum(unique1) + sum(unique2) AS res, rank() OVER (ORDER BY sum(unique1) + sum(unique2) ASC) AS rank FROM _complex GROUP BY ten ORDER BY ten ASC; ``` which gives: ``` Code: 47. DB::Exception: Received from localhost:9000. DB::Exception: Unknown identifier: sum(unique1); there are columns: unique1, unique2, ten: While processing sum(unique1) + sum(unique2) ASC. (UNKNOWN_IDENTIFIER) ```. [#39762](https://github.com/ClickHouse/ClickHouse/pull/39762) ([Vladimir Chebotaryov](https://github.com/quickhouse)).
+* Backported in [#42748](https://github.com/ClickHouse/ClickHouse/issues/42748): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} <Fatal> BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} <Fatal> BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} <Fatal> BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} <Fatal> BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} <Fatal> BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} <Fatal> BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} <Fatal> BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#43062](https://github.com/ClickHouse/ClickHouse/issues/43062): Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Do not warn about kvm-clock [#41217](https://github.com/ClickHouse/ClickHouse/pull/41217) ([Sergei Trifonov](https://github.com/serxa)).
+* Revert revert 41268 disable s3 parallel write for part moves to disk s3 [#42617](https://github.com/ClickHouse/ClickHouse/pull/42617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index db983ab9c68..484fd265c3d 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -127,6 +127,10 @@ The following settings can be set before query execution or placed into configur
 -   `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
 -   `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
 -   `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`.
+-   `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
+-   `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
+-   `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
+-   `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
 
 Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
 
@@ -142,6 +146,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
 -   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
 -   `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional.
+-   `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional.
 
 **Example:**
 
diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md
index 486c41c2496..21f857510f7 100644
--- a/docs/en/engines/table-engines/log-family/index.md
+++ b/docs/en/engines/table-engines/log-family/index.md
@@ -28,7 +28,7 @@ Engines:
 
     During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently.
 
--   Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md/#alter-mutations).
+-   Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations).
 
 -   Do not support indexes.
 
diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index 3b2431e4b5b..647395d0093 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -2,13 +2,20 @@
 
 The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
 
-The next query finds the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
+The next queries find the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
 ``` sql 
 SELECT * 
 FROM table_name 
 WHERE L2Distance(Column, Point) < MaxDistance 
 LIMIT N
 ```
+
+``` sql 
+SELECT * 
+FROM table_name 
+ORDER BY L2Distance(Column, Point)
+LIMIT N
+```
 But it will take some time for execution because of the long calculation of the distance between `TargetEmbedding` and all other vectors. This is where ANN indexes can help. They store a compact approximation of the search space (e.g. using clustering, search trees, etc.) and are able to compute approximate neighbors quickly.
 
 ## Indexes Structure
@@ -53,7 +60,7 @@ CREATE TABLE t
 (
   `id` Int64,
   `number` Tuple(Float32, Float32, Float32),
-  INDEX x number TYPE annoy GRANULARITY N
+  INDEX x number TYPE index_name(parameters) GRANULARITY N
 )
 ENGINE = MergeTree
 ORDER BY id;
@@ -64,7 +71,7 @@ CREATE TABLE t
 (
   `id` Int64,
   `number` Array(Float32),
-  INDEX x number TYPE annoy GRANULARITY N
+  INDEX x number TYPE index_name(parameters) GRANULARITY N
 )
 ENGINE = MergeTree
 ORDER BY id;
@@ -92,7 +99,7 @@ CREATE TABLE t
 (
   id Int64,
   number Tuple(Float32, Float32, Float32),
-  INDEX x number TYPE annoy(T) GRANULARITY N
+  INDEX x number TYPE annoy(Trees, DistanceName) GRANULARITY N
 )
 ENGINE = MergeTree
 ORDER BY id;
@@ -103,7 +110,7 @@ CREATE TABLE t
 (
   id Int64,
   number Array(Float32),
-  INDEX x number TYPE annoy(T) GRANULARITY N
+  INDEX x number TYPE annoy(Trees, DistanceName) GRANULARITY N
 )
 ENGINE = MergeTree
 ORDER BY id;
@@ -111,9 +118,19 @@ ORDER BY id;
 !!! note "Note"
     Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(number) = 256`.
 
-Parameter `T` is the number of trees which algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). 
+Parameter `Trees` is the number of trees which algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). By default it is set to `100`. Parameter `DistanceName` is name of distance function. By default it is set to `L2Distance`. It can be set without changing first parameter, for example
+```sql
+CREATE TABLE t
+(
+  id Int64,
+  number Array(Float32),
+  INDEX x number TYPE annoy('cosineDistance') GRANULARITY N
+)
+ENGINE = MergeTree
+ORDER BY id;
+```
 
-Annoy supports only `L2Distance`.
+Annoy supports `L2Distance` and `cosineDistance`.
 
 In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
 
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 7dfb5a9fed7..7614a09c018 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -537,7 +537,7 @@ TTL time_column
 TTL time_column + interval
 ```
 
-To define `interval`, use [time interval](/docs/en/sql-reference/operators/index.md/#operators-datetime) operators, for example:
+To define `interval`, use [time interval](/docs/en/sql-reference/operators/index.md#operators-datetime) operators, for example:
 
 ``` sql
 TTL date_time + INTERVAL 1 MONTH
@@ -860,7 +860,7 @@ The number of threads performing background moves of data parts can be changed b
 In the case of `MergeTree` tables, data is getting to disk in different ways:
 
 -   As a result of an insert (`INSERT` query).
--   During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md/#alter-mutations).
+-   During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations).
 -   When downloading from another replica.
 -   As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition).
 
@@ -940,6 +940,10 @@ Optional parameters:
 -   `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
 -   `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
 -   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
+-   `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
+-   `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
+-   `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
+-   `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
 
 S3 disk can be configured as `main` or `cold` storage:
 ``` xml
diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md
index 67b595d0fa0..4867140789f 100644
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@@ -20,7 +20,7 @@ Replication works at the level of an individual table, not the entire server. A
 
 Replication does not depend on sharding. Each shard has its own independent replication.
 
-Compressed data for `INSERT` and `ALTER` queries is replicated (for more information, see the documentation for [ALTER](/docs/en/sql-reference/statements/alter/index.md/#query_language_queries_alter)).
+Compressed data for `INSERT` and `ALTER` queries is replicated (for more information, see the documentation for [ALTER](/docs/en/sql-reference/statements/alter/index.md#query_language_queries_alter)).
 
 `CREATE`, `DROP`, `ATTACH`, `DETACH` and `RENAME` queries are executed on a single server and are not replicated:
 
@@ -85,7 +85,7 @@ Example of setting the addresses of the auxiliary ZooKeeper cluster:
 </auxiliary_zookeepers>
 ```
 
-To store table datameta in a auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
+To store table metadata in an auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
 ReplicatedMergeTree engine as follow:
 
 ```
diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md
index 0e51a8b7696..a49214bd00a 100644
--- a/docs/en/engines/table-engines/special/join.md
+++ b/docs/en/engines/table-engines/special/join.md
@@ -59,7 +59,7 @@ Main use-cases for `Join`-engine tables are following:
 
 ### Deleting Data {#deleting-data}
 
-`ALTER DELETE` queries for `Join`-engine tables are implemented as [mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations). `DELETE` mutation reads filtered data and overwrites data of memory and disk.
+`ALTER DELETE` queries for `Join`-engine tables are implemented as [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations). `DELETE` mutation reads filtered data and overwrites data of memory and disk.
 
 ### Limitations and Settings {#join-limitations-and-settings}
 
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 56708def497..3221b1a06fa 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -13,7 +13,7 @@ The supported formats are:
 | Format                                                                                    | Input | Output |
 |-------------------------------------------------------------------------------------------|------|--------|
 | [TabSeparated](#tabseparated)                                                             | ✔    | ✔      |
-| [TabSeparatedRaw](#tabseparatedraw)                                                       | ✔    | ✔      |
+| [TabSeparatedRaw](#tabseparatedraw)                                 | ✔    | ✔      |
 | [TabSeparatedWithNames](#tabseparatedwithnames)                                           | ✔    | ✔      |
 | [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes)                           | ✔    | ✔      |
 | [TabSeparatedRawWithNames](#tabseparatedrawwithnames)                                     | ✔    | ✔      |
@@ -48,6 +48,7 @@ The supported formats are:
 | [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames)                 | ✔    | ✔      |
 | [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔    | ✔      |
 | [JSONObjectEachRow](#jsonobjecteachrow)                                                   | ✔    | ✔      |
+| [BSONEachRow](#bsoneachrow)                                                               | ✔    | ✔      |
 | [TSKV](#tskv)                                                                             | ✔    | ✔      |
 | [Pretty](#pretty)                                                                         | ✗    | ✔      |
 | [PrettyNoEscapes](#prettynoescapes)                                                       | ✗    | ✔      |
@@ -1210,6 +1211,69 @@ SELECT * FROM json_each_row_nested
 - [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`.
 - [output_format_json_validate_utf8](../operations/settings/settings.md#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`.
 
+## BSONEachRow {#bsoneachrow}
+
+In this format, ClickHouse formats/parses data as a sequence of BSON documents without any separator between them.
+Each row is formatted as a single document and each column is formatted as a single BSON document field with column name as a key.
+
+For output it uses the following correspondence between ClickHouse types and BSON types:
+
+| ClickHouse type                                                                                           | BSON Type                                                                                                 |
+|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
+| [Bool](../sql-reference/data-types/boolean.md)                                                            | `\x08` boolean                                                                                            |
+| [Int8/UInt8](../sql-reference/data-types/int-uint.md)                                                     | `\x10` int32                                                                                              |
+| [Int16UInt16](../sql-reference/data-types/int-uint.md)                                                    | `\x10` int32                                                                                              |
+| [Int32](../sql-reference/data-types/int-uint.md)                                                          | `\x10` int32                                                                                              |
+| [UInt32](../sql-reference/data-types/int-uint.md)                                                         | `\x12` int64                                                                                              |
+| [Int64/UInt64](../sql-reference/data-types/int-uint.md)                                                   | `\x12` int64                                                                                              |
+| [Float32/Float64](../sql-reference/data-types/float.md)                                                   | `\x01` double                                                                                             |
+| [Date](../sql-reference/data-types/date.md)/[Date32](../sql-reference/data-types/date32.md)               | `\x10` int32                                                                                              |
+| [DateTime](../sql-reference/data-types/datetime.md)                                                       | `\x12` int64                                                                                                |
+| [DateTime64](../sql-reference/data-types/datetime64.md)                                                   | `\x09` datetime                                                                                             |
+| [Decimal32](../sql-reference/data-types/decimal.md)                                                       | `\x10` int32                                                                                                |
+| [Decimal64](../sql-reference/data-types/decimal.md)                                                       | `\x12` int64                                                                                                |
+| [Decimal128](../sql-reference/data-types/decimal.md)                                                      | `\x05` binary, `\x00` binary subtype, size = 16                                                               |
+| [Decimal256](../sql-reference/data-types/decimal.md)                                                      | `\x05` binary, `\x00` binary subtype, size = 32                                                               |
+| [Int128/UInt128](../sql-reference/data-types/int-uint.md)                                                 | `\x05` binary, `\x00` binary subtype, size = 16                                                               |
+| [Int256/UInt256](../sql-reference/data-types/int-uint.md)                                                 | `\x05` binary, `\x00` binary subtype, size = 32                                                               |
+| [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled |
+| [UUID](../sql-reference/data-types/uuid.md)                                                               | `\x05` binary, `\x04` uuid subtype, size = 16                                                                 |
+| [Array](../sql-reference/data-types/array.md)                                                             | `\x04` array                                                                                                |
+| [Tuple](../sql-reference/data-types/tuple.md)                                                             | `\x04` array                                                                                                |
+| [Named Tuple](../sql-reference/data-types/tuple.md)                                                       | `\x03` document                                                                                             |
+| [Map](../sql-reference/data-types/map.md) (with String keys)                                              | `\x03` document                                                                                             |
+
+For input it uses the following correspondence between BSON types and ClickHouse types:
+
+| BSON Type                                | ClickHouse Type                                                                                                                                              |
+|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `\x01` double                            | [Float32/Float64](../sql-reference/data-types/float.md)                                                                                                      |
+| `\x02` string                            | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md)                                                    |
+| `\x03` document                          | [Map](../sql-reference/data-types/map.md)/[Named Tuple](../sql-reference/data-types/tuple.md)                                                                |
+| `\x04` array                             | [Array](../sql-reference/data-types/array.md)/[Tuple](../sql-reference/data-types/tuple.md)                                                                  |
+| `\x05` binary, `\x00` binary subtype     | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md)                                                    |
+| `\x05` binary, `\x02` old binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md)                                                    |
+| `\x05` binary, `\x03` old uuid subtype   | [UUID](../sql-reference/data-types/uuid.md)                                                                                                                  |
+| `\x05` binary, `\x04` uuid subtype       | [UUID](../sql-reference/data-types/uuid.md)                                                                                                                  |
+| `\x07` ObjectId                          | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md)                                                    |
+| `\x08` boolean                           | [Bool](../sql-reference/data-types/boolean.md)                                                                                                               |
+| `\x09` datetime                          | [DateTime64](../sql-reference/data-types/datetime64.md)                                                                                                      |
+| `\x0A` null value                        | [NULL](../sql-reference/data-types/nullable.md)                                                                                                              |
+| `\x0D` JavaScript code                   | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md)                                                    |
+| `\x0E` symbol                            | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md)                                                    |
+| `\x10` int32                             | [Int32/UInt32](../sql-reference/data-types/int-uint.md)/[Decimal32](../sql-reference/data-types/decimal.md)                                                         |
+| `\x12` int64                             | [Int64/UInt64](../sql-reference/data-types/int-uint.md)/[Decimal64](../sql-reference/data-types/decimal.md)/[DateTime64](../sql-reference/data-types/datetime64.md) |
+
+Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). 
+Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
+
+Note: this format don't work properly on Big-Endian platforms.
+
+### BSON format settings {#bson-format-settings}
+
+- [output_format_bson_string_as_string](../operations/settings/settings.md#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`.
+- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`.
+
 ## Native {#native}
 
 The most efficient format. Data is written and read by blocks in binary format. For each block, the number of rows, number of columns, column names and types, and parts of columns in this block are recorded one after another. In other words, this format is “columnar” – it does not convert columns to rows. This is the format used in the native interface for interaction between servers, for using the command-line client, and for C++ clients.
diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md
index 0324f742988..3f11cc3cf7b 100644
--- a/docs/en/operations/clickhouse-keeper.md
+++ b/docs/en/operations/clickhouse-keeper.md
@@ -57,7 +57,7 @@ Internal coordination settings are located in the `<keeper_server>.<coordination
 -    `auto_forwarding` — Allow to forward write requests from followers to the leader (default: true).
 -    `shutdown_timeout` — Wait to finish internal connections and shutdown (ms) (default: 5000).
 -    `startup_timeout` — If the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000).
--    `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`).
+-    `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`).
 
 Quorum configuration is located in the `<keeper_server>.<raft_configuration>` section and contain servers description.
 
@@ -126,7 +126,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml
 
 ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
 
-The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif`.
+The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`.
 
 You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
 
@@ -328,6 +328,12 @@ target_committed_log_idx    101
 last_snapshot_idx   50
 ```
 
+- `rqld`: Request to become new leader. Return `Sent leadership request to leader.` if request sent or `Failed to send leadership request to leader.` if request not sent. Note that if node is already leader the outcome is same as the request is sent. 
+
+```
+Sent leadership request to leader.
+```
+
 ## Migration from ZooKeeper {#migration-from-zookeeper}
 
 Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md
index 8603257ea55..eee4058c230 100644
--- a/docs/en/operations/settings/index.md
+++ b/docs/en/operations/settings/index.md
@@ -26,7 +26,7 @@ Ways to configure settings, in order of priority:
 
     -   When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`.
     -   When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
-    -   Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed.
+    -   Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed.
 
 Settings that can only be made in the server config file are not covered in this section.
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 7abe4affbd1..7494f3db71a 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -276,7 +276,7 @@ Default value: 0.
 Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md/#create-default-values) instead of [NULL](../../sql-reference/syntax.md/#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable) data type.
 If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
 
-This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
+This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#inserting-the-results-of-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
 
 Possible values:
 
@@ -1619,8 +1619,8 @@ These functions can be transformed:
 -   [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
 -   [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
 -   [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
--   [isNull](../../sql-reference/operators/index.md/#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
--   [isNotNull](../../sql-reference/operators/index.md/#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
+-   [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
+-   [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
 -   [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
 -   [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn.
 -   [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn.
@@ -2041,7 +2041,7 @@ Default value: 16.
 
 ## validate_polygons {#validate_polygons}
 
-Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md/#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
+Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
 
 Possible values:
 
@@ -2227,7 +2227,7 @@ Default value: `0`.
 
 ## mutations_sync {#mutations_sync}
 
-Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md/#mutations)) synchronously.
+Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md#mutations)) synchronously.
 
 Possible values:
 
@@ -2239,8 +2239,8 @@ Default value: `0`.
 
 **See Also**
 
--   [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries)
--   [Mutations](../../sql-reference/statements/alter/index.md/#mutations)
+-   [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
+-   [Mutations](../../sql-reference/statements/alter/index.md#mutations)
 
 ## ttl_only_drop_parts {#ttl_only_drop_parts}
 
@@ -4784,7 +4784,7 @@ Possible values:
 
 Default value: 1.
 
-## SQLInsert format settings {$sqlinsert-format-settings}
+## SQLInsert format settings {#sqlinsert-format-settings}
 
 ### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size}
 
@@ -4815,3 +4815,17 @@ Default value: `false`.
 Quote column names with "`" characters
 
 Default value: `true`.
+
+## BSONEachRow format settings {#bson-each-row-format-settings}
+
+### output_format_bson_string_as_string {#output_format_bson_string_as_string}
+
+Use BSON String type instead of Binary for String columns.
+
+Disabled by default.
+
+### input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference}
+
+Allow skipping columns with unsupported types while schema inference for format BSONEachRow.
+
+Disabled by default.
diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md
index 0d3b764846b..d8fb91a63f5 100644
--- a/docs/en/operations/system-tables/mutations.md
+++ b/docs/en/operations/system-tables/mutations.md
@@ -3,7 +3,7 @@ slug: /en/operations/system-tables/mutations
 ---
 # mutations
 
-The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row.
+The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row.
 
 Columns:
 
@@ -45,7 +45,7 @@ If there were problems with mutating some data parts, the following columns cont
 
 **See Also**
 
--   [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations)
+-   [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
 -   [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine
 -   [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family
 
diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md
index cbabd9b27b1..bbd5385f44b 100644
--- a/docs/en/operations/system-tables/parts.md
+++ b/docs/en/operations/system-tables/parts.md
@@ -9,7 +9,7 @@ Each row describes one data part.
 
 Columns:
 
--   `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md/#query_language_queries_alter) query.
+-   `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query.
 
     Formats:
 
diff --git a/docs/en/operations/system-tables/parts_columns.md b/docs/en/operations/system-tables/parts_columns.md
index d934e01f245..68757ddfbff 100644
--- a/docs/en/operations/system-tables/parts_columns.md
+++ b/docs/en/operations/system-tables/parts_columns.md
@@ -9,7 +9,7 @@ Each row describes one data part.
 
 Columns:
 
--   `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md/#query_language_queries_alter) query.
+-   `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query.
 
     Formats:
 
diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md
index 827a7e33ea3..0201462c0b6 100644
--- a/docs/en/operations/tips.md
+++ b/docs/en/operations/tips.md
@@ -189,10 +189,12 @@ preAllocSize=131072
 # especially if there are a lot of clients. To prevent ZooKeeper from running
 # out of memory due to queued requests, ZooKeeper will throttle clients so that
 # there is no more than globalOutstandingLimit outstanding requests in the
-# system. The default limit is 1,000.ZooKeeper logs transactions to a
-# transaction log. After snapCount transactions are written to a log file a
-# snapshot is started and a new transaction log file is started. The default
-# snapCount is 10,000.
+# system. The default limit is 1000.
+# globalOutstandingLimit=1000
+
+# ZooKeeper logs transactions to a transaction log. After snapCount transactions
+# are written to a log file a snapshot is started and a new transaction log file
+# is started. The default snapCount is 100000.
 snapCount=3000000
 
 # If this option is defined, requests will be will logged to a trace file named
diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md
index c8c7470d2cb..db41fdf2fc3 100644
--- a/docs/en/sql-reference/data-types/date32.md
+++ b/docs/en/sql-reference/data-types/date32.md
@@ -6,7 +6,7 @@ sidebar_label: Date32
 
 # Date32
 
-A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1900-01-01. Allows storing values till 2299-12-31.
+A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored as a signed 32-bit integer in native byte order with the value representing the days since 1970-01-01 (0 represents 1970-01-01 and negative values represent the days before 1970).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md
index 06b5b8a6746..9f922a2cccb 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md
@@ -7,7 +7,9 @@ import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dict
 
 # Dictionaries 
 
-<CloudDetails />
+:::tip Tutorial
+If you are getting started with Dictionaries in ClickHouse we have a tutorial that covers that topic.  Take a look [here](/docs/en/tutorial.md).
+:::
 
 You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”.
 
@@ -27,6 +29,8 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl
 -   Configuration parameters.
 -   Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded.
 
+<CloudDetails />
+
 ## Creating a dictionary with a DDL query
 
 Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries:
diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index ece50591ef9..56f3a88b28b 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -161,3 +161,140 @@ Result:
 │          -1 │
 └─────────────┘
 ```
+
+## multiplyDecimal(a, b[, result_scale])
+
+Performs multiplication on two decimals. Result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md).
+Result scale can be explicitly specified by `result_scale` argument (const Integer in range `[0, 76]`). If not specified, the result scale is the max scale of given arguments.
+
+:::note    
+These functions work significantly slower than usual `multiply`.
+In case you don't really need controlled precision and/or need fast computation, consider using [multiply](#multiply)
+:::
+
+**Syntax**
+
+```sql
+multiplyDecimal(a, b[, result_scale])
+```
+
+**Arguments**
+
+-   `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+-   The result of multiplication with given scale.
+
+Type: [Decimal256](../../sql-reference/data-types/decimal.md).
+
+**Example**
+
+```text
+┌─multiplyDecimal(toDecimal256(-12, 0), toDecimal32(-2.1, 1), 1)─┐
+│                                                           25.2 │
+└────────────────────────────────────────────────────────────────┘
+```
+
+**Difference from regular multiplication:**
+```sql
+SELECT toDecimal64(-12.647, 3) * toDecimal32(2.1239, 4);
+SELECT toDecimal64(-12.647, 3) as a, toDecimal32(2.1239, 4) as b, multiplyDecimal(a, b);
+```
+
+```text
+┌─multiply(toDecimal64(-12.647, 3), toDecimal32(2.1239, 4))─┐
+│                                               -26.8609633 │
+└───────────────────────────────────────────────────────────┘
+┌─multiplyDecimal(toDecimal64(-12.647, 3), toDecimal32(2.1239, 4))─┐
+│                                                         -26.8609 │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+```sql
+SELECT
+    toDecimal64(-12.647987876, 9) AS a,
+    toDecimal64(123.967645643, 9) AS b,
+    multiplyDecimal(a, b);
+
+SELECT
+    toDecimal64(-12.647987876, 9) AS a,
+    toDecimal64(123.967645643, 9) AS b,
+    a * b;
+```
+
+```text
+┌─────────────a─┬─────────────b─┬─multiplyDecimal(toDecimal64(-12.647987876, 9), toDecimal64(123.967645643, 9))─┐
+│ -12.647987876 │ 123.967645643 │                                                               -1567.941279108 │
+└───────────────┴───────────────┴───────────────────────────────────────────────────────────────────────────────┘
+
+Received exception from server (version 22.11.1):
+Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal math overflow: While processing toDecimal64(-12.647987876, 9) AS a, toDecimal64(123.967645643, 9) AS b, a * b. (DECIMAL_OVERFLOW)
+```
+
+## divideDecimal(a, b[, result_scale])
+
+Performs division on two decimals. Result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md).
+Result scale can be explicitly specified by `result_scale` argument (const Integer in range `[0, 76]`). If not specified, the result scale is the max scale of given arguments.
+
+:::note    
+These function work significantly slower than usual `divide`.
+In case you don't really need controlled precision and/or need fast computation, consider using [divide](#divide).
+:::
+
+**Syntax**
+
+```sql
+divideDecimal(a, b[, result_scale])
+```
+
+**Arguments**
+
+-   `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+-   The result of division with given scale.
+
+Type: [Decimal256](../../sql-reference/data-types/decimal.md).
+
+**Example**
+
+```text
+┌─divideDecimal(toDecimal256(-12, 0), toDecimal32(2.1, 1), 10)─┐
+│                                                -5.7142857142 │
+└──────────────────────────────────────────────────────────────┘
+```
+
+**Difference from regular division:**
+```sql
+SELECT toDecimal64(-12, 1) / toDecimal32(2.1, 1);
+SELECT toDecimal64(-12, 1) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5);
+```
+
+```text
+┌─divide(toDecimal64(-12, 1), toDecimal32(2.1, 1))─┐
+│                                             -5.7 │
+└──────────────────────────────────────────────────┘
+
+┌───a─┬───b─┬─divideDecimal(toDecimal64(-12, 1), toDecimal32(2.1, 1), 1)─┬─divideDecimal(toDecimal64(-12, 1), toDecimal32(2.1, 1), 5)─┐
+│ -12 │ 2.1 │                                                       -5.7 │                                                   -5.71428 │
+└─────┴─────┴────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────┘
+```
+
+```sql
+SELECT toDecimal64(-12, 0) / toDecimal32(2.1, 1);
+SELECT toDecimal64(-12, 0) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5);
+```
+
+```text
+DB::Exception: Decimal result's scale is less than argument's one: While processing toDecimal64(-12, 0) / toDecimal32(2.1, 1). (ARGUMENT_OUT_OF_BOUND)
+
+┌───a─┬───b─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 1)─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 5)─┐
+│ -12 │ 2.1 │                                                       -5.7 │                                                   -5.71428 │
+└─────┴─────┴────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────┘
+```
diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md
index 88d6c2f3e17..293e02f8a54 100644
--- a/docs/en/sql-reference/functions/distance-functions.md
+++ b/docs/en/sql-reference/functions/distance-functions.md
@@ -474,13 +474,13 @@ Calculates the cosine distance between two vectors (the values of the tuples are
 **Syntax**
 
 ```sql
-cosineDistance(tuple1, tuple2)
+cosineDistance(vector1, vector2)
 ```
 
 **Arguments**
 
--   `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
--   `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+-   `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+-   `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
 
 **Returned value**
 
@@ -488,7 +488,7 @@ cosineDistance(tuple1, tuple2)
 
 Type: [Float](../../sql-reference/data-types/float.md).
 
-**Example**
+**Examples**
 
 Query:
 
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index 4a6e46e1759..cccc02c2553 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -185,7 +185,7 @@ unhex(arg)
 
 **Arguments**
 
--   `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md).
+-   `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md).
 
 Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex().
 
diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index 08f2620a009..4efa2131eb6 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -24,7 +24,7 @@ Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type
 
 Uses a linear congruential generator.
 
-## canonicalRand
+## randCanonical
 The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1).
 
 Non-deterministic. Return type is Float64.
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 6bca0dbff42..ae8671ffa9d 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -254,7 +254,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest
 
 There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`).
 
-If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#insert_query_insert-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query.
+If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query.
 
 The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running.
 
diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md
index 4dcab030d13..30ed96c0b9c 100644
--- a/docs/en/sql-reference/statements/alter/delete.md
+++ b/docs/en/sql-reference/statements/alter/delete.md
@@ -10,7 +10,7 @@ sidebar_label: DELETE
 ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
 ```
 
-Deletes data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations).
+Deletes data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 
 :::note
@@ -25,6 +25,6 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do
 
 **See also**
 
--   [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations)
--   [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries)
+-   [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
+-   [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
 -   [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting
diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md
index 146c15e776e..a8cea63380c 100644
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@@ -270,7 +270,7 @@ ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd'
 
 ## UPDATE IN PARTITION
 
-Manipulates data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations).
+Manipulates data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 Syntax:
 
@@ -290,7 +290,7 @@ ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2;
 
 ## DELETE IN PARTITION
 
-Deletes data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations).
+Deletes data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 Syntax:
 
diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md
index 99cb8fb8fd1..908d28d7ab1 100644
--- a/docs/en/sql-reference/statements/alter/projection.md
+++ b/docs/en/sql-reference/statements/alter/projection.md
@@ -11,6 +11,14 @@ Projections store data in a format that optimizes query execution, this feature
 
 You can define one or more projections for a table, and during the query analysis the projection with the least data to scan will be selected by ClickHouse without modifying the query provided by the user.
 
+:::note Disk usage
+
+Projections will create internally a new hidden table, this means that more IO and space on disk will be required.
+Example, If the projection has defined a different primary key, all the data from the original table will be duplicated.
+:::
+
+You can see more technical details about how projections work internally on this [page](/docs/en/guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-multiple.md/#option-3-projections).
+
 ## Example filtering without using primary keys
 
 Creating the table:
@@ -138,15 +146,15 @@ The following operations with [projections](/docs/en/engines/table-engines/merge
 
 ## DROP PROJECTION
 
-`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations).
+`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 ## MATERIALIZE PROJECTION
 
-`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations).
+`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 ## CLEAR PROJECTION
 
-`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations).
+`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 
 The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files.
diff --git a/docs/en/sql-reference/statements/alter/skipping-index.md b/docs/en/sql-reference/statements/alter/skipping-index.md
index 2dadffc4527..037e4bc38c5 100644
--- a/docs/en/sql-reference/statements/alter/skipping-index.md
+++ b/docs/en/sql-reference/statements/alter/skipping-index.md
@@ -14,7 +14,7 @@ The following operations are available:
 
 -   `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
 
--   `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
+-   `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
 
 The first two commands are lightweight in a sense that they only change metadata or remove files.
 
diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md
index f40b72f7ab3..5d27c382982 100644
--- a/docs/en/sql-reference/statements/alter/update.md
+++ b/docs/en/sql-reference/statements/alter/update.md
@@ -10,7 +10,7 @@ sidebar_label: UPDATE
 ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr
 ```
 
-Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations).
+Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
 
 :::note    
 The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use.
@@ -24,7 +24,7 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do
 
 **See also**
 
--   [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations)
--   [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries)
+-   [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
+-   [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
 -   [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting
 
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 13188d620b9..b69d09dd266 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -72,7 +72,7 @@ If you specify `POPULATE`, the existing table data is inserted into the view whe
 
 A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data won’t be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`.
 
-The execution of [ALTER](../../../sql-reference/statements/alter/view.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
+The execution of [ALTER](/docs/en/sql-reference/statements/alter/view.md) queries on materialized views has limitations, for example, you can not update the `SELECT` query, so this might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
 
 Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view.
 
diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md
index f4a6ccb0c7d..5649486905e 100644
--- a/docs/en/sql-reference/statements/explain.md
+++ b/docs/en/sql-reference/statements/explain.md
@@ -10,7 +10,7 @@ Shows the execution plan of a statement.
 Syntax:
 
 ```sql
-EXPLAIN [AST | SYNTAX | PLAN | PIPELINE | ESTIMATE | TABLE OVERRIDE] [setting = value, ...]
+EXPLAIN [AST | SYNTAX | QUERY TREE | PLAN | PIPELINE | ESTIMATE | TABLE OVERRIDE] [setting = value, ...]
     [
       SELECT ... |
       tableFunction(...) [COLUMNS (...)] [ORDER BY ...] [PARTITION BY ...] [PRIMARY KEY] [SAMPLE BY ...] [TTL ...]
@@ -47,6 +47,7 @@ Union
 
 -  `AST` — Abstract syntax tree.
 -  `SYNTAX` — Query text after AST-level optimizations.
+-  `QUERY TREE` — Query tree after Query Tree level optimizations.
 -  `PLAN` — Query execution plan.
 -  `PIPELINE` — Query execution pipeline.
 
@@ -110,6 +111,32 @@ FROM
 CROSS JOIN system.numbers AS c
 ```
 
+### EXPLAIN QUERY TREE
+
+Settings:
+
+-   `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`.
+-   `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`.
+-   `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`.
+
+Example:
+```sql
+EXPLAIN QUERY TREE SELECT id, value FROM test_table;
+```
+
+```
+QUERY id: 0
+  PROJECTION COLUMNS
+    id UInt64
+    value String
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: value, result_type: String, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.test_table
+```
+
 ### EXPLAIN PLAN
 
 Dump query plan steps.
diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md
index ac02e9ab5a1..2df8581c447 100644
--- a/docs/en/sql-reference/statements/select/group-by.md
+++ b/docs/en/sql-reference/statements/select/group-by.md
@@ -243,6 +243,54 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all
 
 You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../../../sql-reference/statements/select/join.md) clause (in this case, the respective total values are combined).
 
+## GROUP BY ALL
+
+`GROUP BY ALL` is equivalent to listing all the SELECT-ed expressions that are not aggregate functions.
+
+For example:
+
+``` sql
+SELECT
+    a * 2,
+    b,
+    count(c),
+FROM t
+GROUP BY ALL
+```
+
+is the same as
+
+``` sql
+SELECT
+    a * 2,
+    b,
+    count(c),
+FROM t
+GROUP BY a * 2, b
+```
+
+For a special case that if there is a function having both aggregate functions and other fields as its arguments, the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
+
+For example:
+
+``` sql
+SELECT
+    substring(a, 4, 2),
+    substring(substring(a, 1, 2), 1, count(b))
+FROM t
+GROUP BY ALL
+```
+
+is the same as
+
+``` sql
+SELECT
+    substring(a, 4, 2),
+    substring(substring(a, 1, 2), 1, count(b))
+FROM t
+GROUP BY substring(a, 4, 2), substring(a, 1, 2)
+```
+
 ## Examples
 
 Example:
diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md
new file mode 100644
index 00000000000..4d1488ea640
--- /dev/null
+++ b/docs/en/sql-reference/table-functions/format.md
@@ -0,0 +1,75 @@
+---
+slug: /en/sql-reference/table-functions/format
+sidebar_position: 56
+sidebar_label: format
+---
+
+# format
+
+Extracts table structure from data and parses it according to specified input format.
+
+**Syntax**
+
+``` sql
+format(format_name, data)
+```
+
+**Parameters**
+
+-   `format_name` — The [format](../../interfaces/formats.md#formats) of the data.
+-   `data` — String literal or constant expression that returns a string containing data in specified format
+
+**Returned value**
+
+A table with data parsed from `data` argument according specified format and extracted schema.
+
+**Examples**
+
+**Query:**
+``` sql
+:) select * from format(JSONEachRow, 
+$$
+{"a": "Hello", "b": 111}
+{"a": "World", "b": 123}
+{"a": "Hello", "b": 112}
+{"a": "World", "b": 124}
+$$)
+```
+
+**Result:**
+
+```text
+┌───b─┬─a─────┐
+│ 111 │ Hello │
+│ 123 │ World │
+│ 112 │ Hello │
+│ 124 │ World │
+└─────┴───────┘
+```
+
+**Query:**
+```sql
+
+:) desc format(JSONEachRow,
+$$
+{"a": "Hello", "b": 111}
+{"a": "World", "b": 123}
+{"a": "Hello", "b": 112}
+{"a": "World", "b": 124}
+$$)
+```
+
+**Result:**
+
+```text
+┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ b    │ Nullable(Float64) │              │                    │         │                  │                │
+│ a    │ Nullable(String)  │              │                    │         │                  │                │
+└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
+
+**See Also**
+
+-   [Formats](../../interfaces/formats.md)
+
+[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/format) <!--hide-->
diff --git a/docs/ru/operations/external-authenticators/kerberos.md b/docs/ru/operations/external-authenticators/kerberos.md
index 7b0702b2132..865ea639c89 100644
--- a/docs/ru/operations/external-authenticators/kerberos.md
+++ b/docs/ru/operations/external-authenticators/kerberos.md
@@ -98,7 +98,7 @@ ClickHouse предоставляет возможность аутентифи
 
 
 :::danger "Важно"
-    Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
+    Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
 
 :::info ""
     Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`.
diff --git a/docs/ru/operations/settings/index.md b/docs/ru/operations/settings/index.md
index 4e055405847..6806aea5135 100644
--- a/docs/ru/operations/settings/index.md
+++ b/docs/ru/operations/settings/index.md
@@ -24,7 +24,7 @@ slug: /ru/operations/settings/
 
     -   При запуске консольного клиента ClickHouse в не интерактивном режиме установите параметр запуска `--setting=value`.
     -   При использовании HTTP API передавайте cgi-параметры (`URL?setting_1=value&setting_2=value...`).
-    -  Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию.
+    -  Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию.
 
 Настройки, которые можно задать только в конфигурационном файле сервера, в разделе не рассматриваются.
 
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index a070dbd5e10..58894611386 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -479,7 +479,7 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
 Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
 Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
 
-Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`.
+Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`.
 
 Возможные значения:
 
diff --git a/docs/ru/sql-reference/functions/arithmetic-functions.md b/docs/ru/sql-reference/functions/arithmetic-functions.md
index bc1d0a55128..4e040edcc70 100644
--- a/docs/ru/sql-reference/functions/arithmetic-functions.md
+++ b/docs/ru/sql-reference/functions/arithmetic-functions.md
@@ -159,3 +159,150 @@ SELECT min2(-1, 2);
 └─────────────┘
 ```
 
+## multiplyDecimal(a, b[, result_scale])
+
+Совершает умножение двух Decimal. Результат будет иметь тип [Decimal256](../../sql-reference/data-types/decimal.md).
+Scale (размер дробной части) результат можно явно задать аргументом `result_scale`  (целочисленная константа из интервала `[0, 76]`).
+Если этот аргумент не задан, то scale результата будет равен наибольшему из scale обоих аргументов.
+
+**Синтаксис**
+
+```sql
+multiplyDecimal(a, b[, result_scale])
+```
+
+:::note    
+Эта функция работают гораздо медленнее обычной `multiply`.
+В случае, если нет необходимости иметь фиксированную точность и/или нужны быстрые вычисления, следует использовать [multiply](#multiply).
+:::
+
+**Аргументы**
+
+-   `a` — Первый сомножитель/делимое: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `b` — Второй сомножитель/делитель: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `result_scale` — Scale результата: [Int/UInt](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+-   Результат умножения с заданным scale.
+
+Тип: [Decimal256](../../sql-reference/data-types/decimal.md).
+
+**Примеры**
+
+```sql
+SELECT multiplyDecimal(toDecimal256(-12, 0), toDecimal32(-2.1, 1), 1);
+```
+
+```text
+┌─multiplyDecimal(toDecimal256(-12, 0), toDecimal32(-2.1, 1), 1)─┐
+│                                                           25.2 │
+└────────────────────────────────────────────────────────────────┘
+```
+
+**Отличие от стандартных функций**
+```sql
+SELECT toDecimal64(-12.647, 3) * toDecimal32(2.1239, 4);
+SELECT toDecimal64(-12.647, 3) as a, toDecimal32(2.1239, 4) as b, multiplyDecimal(a, b);
+```
+
+```text
+┌─multiply(toDecimal64(-12.647, 3), toDecimal32(2.1239, 4))─┐
+│                                               -26.8609633 │
+└───────────────────────────────────────────────────────────┘
+┌─multiplyDecimal(toDecimal64(-12.647, 3), toDecimal32(2.1239, 4))─┐
+│                                                         -26.8609 │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+```sql
+SELECT
+    toDecimal64(-12.647987876, 9) AS a,
+    toDecimal64(123.967645643, 9) AS b,
+    multiplyDecimal(a, b);
+
+SELECT
+    toDecimal64(-12.647987876, 9) AS a,
+    toDecimal64(123.967645643, 9) AS b,
+    a * b;
+```
+
+```text
+┌─────────────a─┬─────────────b─┬─multiplyDecimal(toDecimal64(-12.647987876, 9), toDecimal64(123.967645643, 9))─┐
+│ -12.647987876 │ 123.967645643 │                                                               -1567.941279108 │
+└───────────────┴───────────────┴───────────────────────────────────────────────────────────────────────────────┘
+
+Received exception from server (version 22.11.1):
+Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal math overflow: While processing toDecimal64(-12.647987876, 9) AS a, toDecimal64(123.967645643, 9) AS b, a * b. (DECIMAL_OVERFLOW)
+```
+
+## divideDecimal(a, b[, result_scale])
+
+Совершает деление двух Decimal. Результат будет иметь тип [Decimal256](../../sql-reference/data-types/decimal.md).
+Scale (размер дробной части) результат можно явно задать аргументом `result_scale`  (целочисленная константа из интервала `[0, 76]`).
+Если этот аргумент не задан, то scale результата будет равен наибольшему из scale обоих аргументов.
+
+**Синтаксис**
+
+```sql
+divideDecimal(a, b[, result_scale])
+```
+
+:::note    
+Эта функция работает гораздо медленнее обычной `divide`.
+В случае, если нет необходимости иметь фиксированную точность и/или нужны быстрые вычисления, следует использовать [divide](#divide).
+:::
+
+**Аргументы**
+
+-   `a` — Первый сомножитель/делимое: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `b` — Второй сомножитель/делитель: [Decimal](../../sql-reference/data-types/decimal.md).
+-   `result_scale` — Scale результата: [Int/UInt](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+-   Результат деления с заданным scale.
+
+Тип: [Decimal256](../../sql-reference/data-types/decimal.md).
+
+**Примеры**
+
+```sql
+SELECT divideDecimal(toDecimal256(-12, 0), toDecimal32(2.1, 1), 10);
+```
+
+```text
+┌─divideDecimal(toDecimal256(-12, 0), toDecimal32(2.1, 1), 10)─┐
+│                                                -5.7142857142 │
+└──────────────────────────────────────────────────────────────┘
+```
+
+**Отличие от стандартных функций**
+```sql
+SELECT toDecimal64(-12, 1) / toDecimal32(2.1, 1);
+SELECT toDecimal64(-12, 1) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5);
+```
+
+```text
+┌─divide(toDecimal64(-12, 1), toDecimal32(2.1, 1))─┐
+│                                             -5.7 │
+└──────────────────────────────────────────────────┘
+
+┌───a─┬───b─┬─divideDecimal(toDecimal64(-12, 1), toDecimal32(2.1, 1), 1)─┬─divideDecimal(toDecimal64(-12, 1), toDecimal32(2.1, 1), 5)─┐
+│ -12 │ 2.1 │                                                       -5.7 │                                                   -5.71428 │
+└─────┴─────┴────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────┘
+```
+
+```sql
+SELECT toDecimal64(-12, 0) / toDecimal32(2.1, 1);
+SELECT toDecimal64(-12, 0) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5);
+```
+
+```text
+DB::Exception: Decimal result's scale is less than argument's one: While processing toDecimal64(-12, 0) / toDecimal32(2.1, 1). (ARGUMENT_OUT_OF_BOUND)
+
+┌───a─┬───b─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 1)─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 5)─┐
+│ -12 │ 2.1 │                                                       -5.7 │                                                   -5.71428 │
+└─────┴─────┴────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────┘
+```
+
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index 11ec72596c4..a8ace213075 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp;
 
 Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`).
 
-Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
+Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
 
 Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.
 
diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index 573b8d39926..4fa6ac4ce66 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -95,7 +95,7 @@ INSERT INTO t FORMAT TabSeparated
 
 Если в таблице объявлены [ограничения](../../sql-reference/statements/create/table.md#constraints), то их выполнимость будет проверена для каждой вставляемой строки. Если для хотя бы одной строки ограничения не будут выполнены, запрос будет остановлен.
 
-### Вставка результатов `SELECT` {#insert_query_insert-select}
+### Вставка результатов `SELECT` {#inserting-the-results-of-select}
 
 **Синтаксис**
 
diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index 4479e24000b..f360a09eb10 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -270,7 +270,7 @@ SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
 └─────────────────┴────────┘
 ```
 
-## SETTINGS в запросе SELECT {#settings-in-select}
+## SETTINGS в запросе SELECT {#settings-in-select-query}
 
 Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию.
 
diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md
new file mode 120000
index 00000000000..cc5e3a5a142
--- /dev/null
+++ b/docs/ru/sql-reference/table-functions/format.md
@@ -0,0 +1 @@
+../../../en/sql-reference/table-functions/format.md
\ No newline at end of file
diff --git a/docs/zh/engines/table-engines/integrations/kafka.md b/docs/zh/engines/table-engines/integrations/kafka.md
index 707ee962ace..c6f11d9efce 100644
--- a/docs/zh/engines/table-engines/integrations/kafka.md
+++ b/docs/zh/engines/table-engines/integrations/kafka.md
@@ -74,7 +74,7 @@ Kafka 特性：
 
 消费的消息会被自动追踪，因此每个消息在不同的消费组里只会记录一次。如果希望获得两次数据，则使用另一个组名创建副本。
 
-消费组可以灵活配置并且在集群之间同步。例如，如果群集中有10个主题和5个表副本，则每个副本将获得2个主题。 如果副本数量发生变化，主题将自动在副本中重新分配。了解更多信息请访问 http://kafka.apache.org/intro。
+消费组可以灵活配置并且在集群之间同步。例如，如果群集中有10个主题和5个表副本，则每个副本将获得2个主题。 如果副本数量发生变化，主题将自动在副本中重新分配。了解更多信息请访问 [http://kafka.apache.org/intro](http://kafka.apache.org/intro)。
 
 `SELECT` 查询对于读取消息并不是很有用（调试除外），因为每条消息只能被读取一次。使用物化视图创建实时线程更实用。您可以这样做：
 
diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
index 13b4c368a96..e773a02fbc3 100644
--- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
+++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
@@ -164,7 +164,7 @@ SETTINGS index_granularity = 8192, index_granularity_bytes = 0;
 <li><font face = "monospace">index_granularity</font>: 显式设置为其默认值8192。这意味着对于每一组8192行，主索引将有一个索引条目，例如，如果表包含16384行，那么索引将有两个索引条目。
 </li>
 <br/>
-<li><font face = "monospace">index_granularity_bytes</font>: 设置为0表示禁止<a href="https://clickhouse.com/docs/en/whats-new/changelog/2019/#experimental-features-1" target="_blank"><font color="blue">字适应索引粒度</font></a>。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目
+<li><font face = "monospace">index_granularity_bytes</font>: 设置为0表示禁止<a href="https://clickhouse.com/docs/en/whats-new/changelog/2019/#experimental-features-1" target="_blank"><font color="blue">自适应索引粒度</font></a>。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目
 <ul>
 <li>如果n小于8192，但n行的合并行数据大小大于或等于10MB (index_granularity_bytes的默认值)或</li>
 <li>n达到8192</li>
@@ -777,7 +777,7 @@ ClickHouse现在创建了一个额外的索引来存储—每组4个连续的颗
 如果我们想显著加快我们的两个示例查询——一个过滤具有特定UserID的行，一个过滤具有特定URL的行——那么我们需要使用多个主索引，通过使用这三个方法中的一个：
 
 - 新建一个不同主键的新表。
-- 创建一个雾化视图。
+- 创建一个物化视图。
 - 增加projection。
 
 这三个方法都会有效地将示例数据复制到另一个表中，以便重新组织表的主索引和行排序顺序。
@@ -992,7 +992,7 @@ Ok.
 
 :::note
 - 我们在视图的主键中切换键列的顺序(与原始表相比)
-- 雾化视图由一个隐藏表支持，该表的行顺序和主索引基于给定的主键定义
+- 物化视图由一个隐藏表支持，该表的行顺序和主索引基于给定的主键定义
 - 我们使用POPULATE关键字，以便用源表hits_UserID_URL中的所有887万行立即导入新的物化视图 
 - 如果在源表hits_UserID_URL中插入了新行，那么这些行也会自动插入到隐藏表中
 - 实际上，隐式创建的隐藏表的行顺序和主索引与我们上面显式创建的辅助表相同:
@@ -1082,7 +1082,7 @@ ALTER TABLE hits_UserID_URL
     );
 ```
 
-雾化projection：
+物化projection：
 ```sql
 ALTER TABLE hits_UserID_URL
     MATERIALIZE PROJECTION prj_url_userid;
diff --git a/docs/zh/sql-reference/functions/encoding-functions.md b/docs/zh/sql-reference/functions/encoding-functions.md
index 5e9260ff94a..846d6c58f40 100644
--- a/docs/zh/sql-reference/functions/encoding-functions.md
+++ b/docs/zh/sql-reference/functions/encoding-functions.md
@@ -181,7 +181,7 @@ unhex(arg)
 
 **参数**
 
--   `arg` — 包含任意数量的十六进制数字的字符串。类型为：[String](../../sql-reference/data-types/string.md)。
+-   `arg` — 包含任意数量的十六进制数字的字符串。类型为：[String](../../sql-reference/data-types/string.md)，[FixedString](../../sql-reference/data-types/fixedstring.md)。
 
 支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数，则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容，则返回一些实现定义的结果（不抛出异常）。对于数字参数， unhex()不执行 hex(N) 的倒数。
 
diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md
index 23edfd633db..fd73be4fd93 100644
--- a/docs/zh/sql-reference/statements/alter.md
+++ b/docs/zh/sql-reference/statements/alter.md
@@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
 
 不支持对primary key或者sampling key中的列（在 `ENGINE` 表达式中用到的列）进行删除操作。改变包含在primary key中的列的类型时，如果操作不会导致数据的变化（例如，往Enum中添加一个值，或者将`DateTime` 类型改成 `UInt32`），那么这种操作是可行的。
 
-如果 `ALTER` 操作不足以完成你想要的表变动操作，你可以创建一张新的表，通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select)将数据拷贝进去，然后通过  [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称，并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。
+如果 `ALTER` 操作不足以完成你想要的表变动操作，你可以创建一张新的表，通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去，然后通过  [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称，并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。
 
  `ALTER` 操作会阻塞对表的所有读写操作。换句话说，当一个大的 `SELECT` 语句和 `ALTER`同时执行时，`ALTER`会等待，直到 `SELECT` 执行结束。与此同时，当 `ALTER` 运行时，新的 sql 语句将会等待。
 
diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md
index 69762bf43bc..f199329829c 100644
--- a/docs/zh/sql-reference/statements/insert-into.md
+++ b/docs/zh/sql-reference/statements/insert-into.md
@@ -90,7 +90,7 @@ INSERT INTO t FORMAT TabSeparated
 
 如果表中有一些[限制](../../sql-reference/statements/create/table.mdx#constraints),，数据插入时会逐行进行数据校验，如果这里面包含了不符合限制条件的数据，服务将会抛出包含限制信息的异常，这个语句也会被停止执行。
 
-### 使用`SELECT`的结果写入 {#insert_query_insert-select}
+### 使用`SELECT`的结果写入 {#inserting-the-results-of-select}
 
 ``` sql
 INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
diff --git a/docs/zh/sql-reference/statements/select/group-by.md b/docs/zh/sql-reference/statements/select/group-by.md
index 90b3c7660ee..31c1649bc30 100644
--- a/docs/zh/sql-reference/statements/select/group-by.md
+++ b/docs/zh/sql-reference/statements/select/group-by.md
@@ -77,6 +77,54 @@ sidebar_label: GROUP BY
 
 您可以使用 `WITH TOTALS` 在子查询中，包括在子查询 [JOIN](../../../sql-reference/statements/select/join.md) 子句（在这种情况下，将各自的总值合并）。
 
+## GROUP BY ALL {#group-by-all}
+
+`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的字段进行`GROUP BY`。
+
+例如
+
+``` sql
+SELECT
+    a * 2,
+    b,
+    count(c),
+FROM t
+GROUP BY ALL
+```
+
+效果等同于
+
+``` sql
+SELECT
+    a * 2,
+    b,
+    count(c),
+FROM t
+GROUP BY a * 2, b
+```
+
+对于一种特殊情况，如果一个 function 的参数中同时有聚合函数和其他字段，会对参数中能提取的最大非聚合字段进行`GROUP BY`。
+
+例如:
+
+``` sql
+SELECT
+    substring(a, 4, 2),
+    substring(substring(a, 1, 2), 1, count(b))
+FROM t
+GROUP BY ALL
+```
+
+效果等同于
+
+``` sql
+SELECT
+    substring(a, 4, 2),
+    substring(substring(a, 1, 2), 1, count(b))
+FROM t
+GROUP BY substring(a, 4, 2), substring(a, 1, 2)
+```
+
 ## 例子 {#examples}
 
 示例:
diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md
new file mode 120000
index 00000000000..cc5e3a5a142
--- /dev/null
+++ b/docs/zh/sql-reference/table-functions/format.md
@@ -0,0 +1 @@
+../../../en/sql-reference/table-functions/format.md
\ No newline at end of file
diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service
index 028b4fbf8ab..1581b95213e 100644
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@@ -9,7 +9,10 @@ After=time-sync.target network-online.target
 Wants=time-sync.target
 
 [Service]
-Type=simple
+Type=notify
+
+# Switching off watchdog is very important for sd_notify to work correctly.
+Environment=CLICKHOUSE_WATCHDOG_ENABLE=0
 User=clickhouse
 Group=clickhouse
 Restart=always
diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index 00f5b26eddf..2923de44045 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -15,18 +15,26 @@
     </openSSL>
     <!--
         It's a custom prompt settings for the clickhouse-client
+
         Possible macros:
             {host}
             {port}
             {user}
             {display_name}
-        Terminal colors: https://misc.flogisoft.com/bash/tip_colors_and_formatting
-        See also: https://wiki.hackzine.org/development/misc/readline-color-prompt.html
+
+        You can also use colored prompt, like in [1].
+
+          [1]: https://misc.flogisoft.com/bash/tip_colors_and_formatting
+
+        But note, that ClickHouse does not use readline anymore, instead it uses
+        replxx. This means that you don't need the following:
+        - RL_PROMPT_START_IGNORE (\001)
+        - RL_PROMPT_END_IGNORE   (\002)
     -->
     <prompt_by_server_display_name>
         <default>{display_name} :) </default>
-        <test>{display_name} \x01\e[1;32m\x02:)\x01\e[0m\x02 </test> <!-- if it matched to the substring "test" in the server display name - -->
-        <production>{display_name} \x01\e[1;31m\x02:)\x01\e[0m\x02 </production> <!-- if it matched to the substring "production" in the server display name -->
+        <test>{display_name} \e[1;32m:)\e[0m </test> <!-- if it matched to the substring "test" in the server display name - -->
+        <production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
     </prompt_by_server_display_name>
 
     <!-- 
diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index 2e7d81617fa..a7ebbb52b58 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -1142,7 +1142,7 @@ TaskStatus ClusterCopier::tryCreateDestinationTable(const ConnectionTimeouts & t
         InterpreterCreateQuery::prepareOnClusterQuery(create, getContext(), task_table.cluster_push_name);
         String query = queryToString(create_query_push_ast);
 
-        LOG_INFO(log, "Create destination tables. Query: \n {}", query);
+        LOG_INFO(log, "Create destination tables. Query: {}", query);
         UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE);
         LOG_INFO(
             log,
@@ -1413,7 +1413,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
         auto create_query_push_ast = rewriteCreateQueryStorage(create_query_ast, database_and_table_for_current_piece, new_engine_push_ast);
         String query = queryToString(create_query_push_ast);
 
-        LOG_INFO(log, "Create destination tables. Query: \n {}", query);
+        LOG_INFO(log, "Create destination tables. Query: {}", query);
         UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE);
         LOG_INFO(
             log,
@@ -1517,7 +1517,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
         // Select all fields
         ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", /*enable_splitting*/ true, inject_fault ? "1" : "");
 
-        LOG_INFO(log, "Executing SELECT query and pull from {} : {}", task_shard.getDescription(), queryToString(query_select_ast));
+        LOG_INFO(log, "Executing SELECT query and pull from {}: {}", task_shard.getDescription(), queryToString(query_select_ast));
 
         ASTPtr query_insert_ast;
         {
@@ -1871,7 +1871,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
     const auto & settings = getContext()->getSettingsRef();
     ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth);
 
-    LOG_INFO(log, "Computing destination partition set, executing query: \n {}", query);
+    LOG_INFO(log, "Computing destination partition set, executing query: {}", query);
 
     auto local_context = Context::createCopy(context);
     local_context->setSettings(task_cluster->settings_pull);
@@ -1922,7 +1922,7 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts,
     const auto & settings = getContext()->getSettingsRef();
     ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth);
 
-    LOG_INFO(log, "Checking shard {} for partition {} existence, executing query: \n {}",
+    LOG_INFO(log, "Checking shard {} for partition {} existence, executing query: {}",
         task_shard.getDescription(), partition_quoted_name, query_ast->formatForErrorMessage());
 
     auto local_context = Context::createCopy(context);
@@ -1964,7 +1964,7 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
 
     query += " LIMIT 1";
 
-    LOG_INFO(log, "Checking shard {} for partition {} piece {} existence, executing query: \n \u001b[36m {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query);
+    LOG_INFO(log, "Checking shard {} for partition {} piece {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query);
 
     ParserQuery parser_query(query.data() + query.size());
     const auto & settings = getContext()->getSettingsRef();
@@ -2046,7 +2046,7 @@ UInt64 ClusterCopier::executeQueryOnCluster(
             }
             catch (...)
             {
-                LOG_WARNING(log, "An error occurred while processing query : \n {}", query);
+                LOG_WARNING(log, "An error occurred while processing query: {}", query);
                 tryLogCurrentException(log);
                 continue;
             }
diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp
index a0d5344236e..bd505b319bb 100644
--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@@ -160,7 +160,7 @@ void ClusterCopierApp::mainImpl()
     registerTableFunctions();
     registerStorages();
     registerDictionaries();
-    registerDisks();
+    registerDisks(/* global_skip_access_check= */ true);
     registerFormats();
 
     static const std::string default_database = "_local";
diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp
index 5b09a9c99a7..65eaf8b7108 100644
--- a/programs/copier/TaskTable.cpp
+++ b/programs/copier/TaskTable.cpp
@@ -45,7 +45,7 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati
     engine_push_str = config.getString(table_prefix + "engine", "rand()");
 
     {
-        ParserStorage parser_storage;
+        ParserStorage parser_storage{ParserStorage::TABLE_ENGINE};
         engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
         engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
         primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
diff --git a/programs/diagnostics/go.mod b/programs/diagnostics/go.mod
index fb1568ea491..d063fc9c3a9 100644
--- a/programs/diagnostics/go.mod
+++ b/programs/diagnostics/go.mod
@@ -80,8 +80,8 @@ require (
 	go.opentelemetry.io/otel v1.4.1 // indirect
 	go.opentelemetry.io/otel/trace v1.4.1 // indirect
 	golang.org/x/net v0.0.0-20211108170745-6635138e15ea // indirect
-	golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 // indirect
-	golang.org/x/text v0.3.7 // indirect
+	golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
+	golang.org/x/text v0.3.8 // indirect
 	google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect
 	google.golang.org/grpc v1.43.0 // indirect
 	google.golang.org/protobuf v1.27.1 // indirect
diff --git a/programs/diagnostics/go.sum b/programs/diagnostics/go.sum
index aa69472e9c2..e8b6558bc99 100644
--- a/programs/diagnostics/go.sum
+++ b/programs/diagnostics/go.sum
@@ -1078,8 +1078,8 @@ golang.org/x/sys v0.0.0-20211109184856-51b60fd695b3/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0=
-golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -1089,8 +1089,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
+golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp
index 91472a8df33..ea46bb1ba8d 100644
--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@@ -176,7 +176,7 @@ int DisksApp::main(const std::vector<String> & /*args*/)
         Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level));
     }
 
-    registerDisks();
+    registerDisks(/* global_skip_access_check= */ true);
     registerFormats();
 
     shared_context = Context::createShared();
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index 8028ccde72d..de85572d5c6 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -888,7 +888,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
 namespace
 {
-    int start(const std::string & user, const fs::path & executable, const fs::path & config, const fs::path & pid_file)
+    int start(const std::string & user, const fs::path & executable, const fs::path & config, const fs::path & pid_file, unsigned max_tries)
     {
         if (fs::exists(pid_file))
         {
@@ -939,8 +939,7 @@ namespace
         /// Wait to start.
 
         size_t try_num = 0;
-        constexpr size_t num_tries = 60;
-        for (; try_num < num_tries; ++try_num)
+        for (; try_num < max_tries; ++try_num)
         {
             fmt::print("Waiting for server to start\n");
             if (fs::exists(pid_file))
@@ -951,7 +950,7 @@ namespace
             sleepForSeconds(1);
         }
 
-        if (try_num == num_tries)
+        if (try_num == max_tries)
         {
             fmt::print("Cannot start server. You can execute {} without --daemon option to run manually.\n", command);
 
@@ -1052,7 +1051,7 @@ namespace
         return pid;
     }
 
-    int stop(const fs::path & pid_file, bool force, bool do_not_kill)
+    int stop(const fs::path & pid_file, bool force, bool do_not_kill, unsigned max_tries)
     {
         if (force && do_not_kill)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible");
@@ -1071,8 +1070,7 @@ namespace
             throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR);
 
         size_t try_num = 0;
-        constexpr size_t num_tries = 60;
-        for (; try_num < num_tries; ++try_num)
+        for (; try_num < max_tries; ++try_num)
         {
             fmt::print("Waiting for server to stop\n");
             if (!isRunning(pid_file))
@@ -1083,7 +1081,7 @@ namespace
             sleepForSeconds(1);
         }
 
-        if (try_num == num_tries)
+        if (try_num == max_tries)
         {
             if (do_not_kill)
             {
@@ -1136,6 +1134,7 @@ int mainEntryClickHouseStart(int argc, char ** argv)
             ("config-path", po::value<std::string>()->default_value("etc/clickhouse-server"), "directory with configs")
             ("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
             ("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")
+            ("max-tries", po::value<unsigned>()->default_value(60), "Max number of tries for waiting the server (with 1 second delay)")
         ;
 
         po::variables_map options;
@@ -1153,8 +1152,9 @@ int mainEntryClickHouseStart(int argc, char ** argv)
         fs::path executable = prefix / options["binary-path"].as<std::string>() / "clickhouse-server";
         fs::path config = prefix / options["config-path"].as<std::string>() / "config.xml";
         fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
+        unsigned max_tries = options["max-tries"].as<unsigned>();
 
-        return start(user, executable, config, pid_file);
+        return start(user, executable, config, pid_file, max_tries);
     }
     catch (...)
     {
@@ -1175,6 +1175,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
             ("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
             ("force", po::bool_switch(), "Stop with KILL signal instead of TERM")
             ("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
+            ("max-tries", po::value<unsigned>()->default_value(60), "Max number of tries for waiting the server to finish after sending TERM (with 1 second delay)")
         ;
 
         po::variables_map options;
@@ -1191,7 +1192,8 @@ int mainEntryClickHouseStop(int argc, char ** argv)
 
         bool force = options["force"].as<bool>();
         bool do_not_kill = options["do-not-kill"].as<bool>();
-        return stop(pid_file, force, do_not_kill);
+        unsigned max_tries = options["max-tries"].as<unsigned>();
+        return stop(pid_file, force, do_not_kill, max_tries);
     }
     catch (...)
     {
@@ -1250,6 +1252,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
             ("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")
             ("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
             ("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
+            ("max-tries", po::value<unsigned>()->default_value(60), "Max number of tries for waiting the server (with 1 second delay)")
         ;
 
         po::variables_map options;
@@ -1270,10 +1273,11 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
 
         bool force = options["force"].as<bool>();
         bool do_not_kill = options["do-not-kill"].as<bool>();
-        if (int res = stop(pid_file, force, do_not_kill))
-            return res;
+        unsigned max_tries = options["max-tries"].as<unsigned>();
 
-        return start(user, executable, config, pid_file);
+        if (int res = stop(pid_file, force, do_not_kill, max_tries))
+            return res;
+        return start(user, executable, config, pid_file, max_tries);
     }
     catch (...)
     {
diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index a1bf324f482..8f65141b533 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -262,6 +262,7 @@ void Keeper::defineOptions(Poco::Util::OptionSet & options)
 }
 
 int Keeper::main(const std::vector<std::string> & /*args*/)
+try
 {
     Poco::Logger * log = &logger();
 
@@ -473,6 +474,12 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
 
     return Application::EXIT_OK;
 }
+catch (...)
+{
+    /// Poco does not provide stacktrace.
+    tryLogCurrentException("Application");
+    throw;
+}
 
 
 void Keeper::logRevision() const
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 4c07fa0a02d..ce7e27026f1 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -413,7 +413,7 @@ try
     registerTableFunctions();
     registerStorages();
     registerDictionaries();
-    registerDisks();
+    registerDisks(/* global_skip_access_check= */ true);
     registerFormats();
 
     processConfig();
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index a5321997779..cda9dbbcf28 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -99,6 +99,10 @@
 #include "config_version.h"
 
 #if defined(OS_LINUX)
+#    include <cstddef>
+#    include <cstdlib>
+#    include <sys/socket.h>
+#    include <sys/un.h>
 #    include <sys/mman.h>
 #    include <sys/ptrace.h>
 #    include <Common/hasLinuxCapability.h>
@@ -273,6 +277,7 @@ namespace ErrorCodes
     extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
     extern const int NETWORK_ERROR;
     extern const int CORRUPTED_DATA;
+    extern const int SYSTEM_ERROR;
 }
 
 
@@ -646,7 +651,53 @@ static void sanityChecks(Server & server)
     }
 }
 
+#if defined(OS_LINUX)
+/// Sends notification to systemd, analogous to sd_notify from libsystemd
+static void systemdNotify(const std::string_view & command)
+{
+    const char * path = getenv("NOTIFY_SOCKET");  // NOLINT(concurrency-mt-unsafe)
+
+    if (path == nullptr)
+        return; /// not using systemd
+
+    int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+
+    if (s == -1)
+        throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);
+
+    SCOPE_EXIT({ close(s); });
+
+    const size_t len = strlen(path);
+
+    struct sockaddr_un addr;
+
+    addr.sun_family = AF_UNIX;
+
+    if (len < 2 || len > sizeof(addr.sun_path) - 1)
+        throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
+
+    memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
+
+    size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
+
+    /// '@' meass this is Linux abstract socket, per documentation it must be sun_path[0] must be set to '\0' for it.
+    if (path[0] == '@')
+        addr.sun_path[0] = 0;
+    else if (path[0] == '/')
+        addrlen += 1; /// non-abstract-addresses should be zero terminated.
+    else
+        throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
+
+    const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
+
+    if (sendto(s, command.data(), command.size(), 0, sock_addr, static_cast <socklen_t>(addrlen)) != static_cast <ssize_t>(command.size()))
+        throw Exception("Failed to notify systemd.", ErrorCodes::SYSTEM_ERROR);
+
+}
+#endif
+
 int Server::main(const std::vector<std::string> & /*args*/)
+try
 {
     Poco::Logger * log = &logger();
 
@@ -679,7 +730,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
     registerTableFunctions();
     registerStorages();
     registerDictionaries();
-    registerDisks();
+    registerDisks(/* global_skip_access_check= */ false);
     registerFormats();
     registerRemoteFileMetadatas();
 
@@ -1148,6 +1199,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
             total_memory_tracker.setDescription("(total)");
             total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
 
+            bool allow_use_jemalloc_memory = config->getBool("allow_use_jemalloc_memory", true);
+            total_memory_tracker.setAllowUseJemallocMemory(allow_use_jemalloc_memory);
+
             auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
             total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
 
@@ -1421,8 +1475,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
     if (settings.async_insert_threads)
         global_context->setAsynchronousInsertQueue(std::make_shared<AsynchronousInsertQueue>(
             global_context,
-            settings.async_insert_threads,
-            settings.async_insert_cleanup_timeout_ms));
+            settings.async_insert_threads));
 
     /// Size of cache for marks (index of MergeTree family of tables).
     size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
@@ -1776,6 +1829,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
             tryLogCurrentException(log, "Caught exception while starting cluster discovery");
         }
 
+#if defined(OS_LINUX)
+        systemdNotify("READY=1\n");
+#endif
+
         SCOPE_EXIT_SAFE({
             LOG_DEBUG(log, "Received termination signal.");
 
@@ -1845,6 +1902,12 @@ int Server::main(const std::vector<std::string> & /*args*/)
 
     return Application::EXIT_OK;
 }
+catch (...)
+{
+    /// Poco does not provide stacktrace.
+    tryLogCurrentException("Application");
+    throw;
+}
 
 std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
     const Poco::Util::AbstractConfiguration & config,
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 8263f50d1b0..ed87b13f01a 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -130,8 +130,8 @@ enum class AccessType
     M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \
     M(SHOW_QUOTAS, "SHOW CREATE QUOTA", GLOBAL, SHOW_ACCESS) \
     M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \
-    M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GLOBAL, SHOW_ACCESS) \
     M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \
+    M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GROUP, ACCESS_MANAGEMENT) \
     M(ACCESS_MANAGEMENT, "", GROUP, ALL) \
     \
     M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \
diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp
index 8da16b86f4e..e1c598f26f5 100644
--- a/src/Access/ExternalAuthenticators.cpp
+++ b/src/Access/ExternalAuthenticators.cpp
@@ -2,6 +2,8 @@
 #include <Access/LDAPClient.h>
 #include <Common/Exception.h>
 #include <Common/quoteString.h>
+#include <Common/SipHash.h>
+
 #include <Poco/Util/AbstractConfiguration.h>
 #include <boost/algorithm/string/case_conv.hpp>
 
@@ -73,6 +75,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
     const bool has_tls_ca_cert_file = config.has(ldap_server_config + ".tls_ca_cert_file");
     const bool has_tls_ca_cert_dir = config.has(ldap_server_config + ".tls_ca_cert_dir");
     const bool has_tls_cipher_suite = config.has(ldap_server_config + ".tls_cipher_suite");
+    const bool has_search_limit = config.has(ldap_server_config + ".search_limit");
 
     if (!has_host)
         throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS);
@@ -91,8 +94,8 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
     }
     else if (has_auth_dn_prefix || has_auth_dn_suffix)
     {
-        const auto auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix");
-        const auto auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix");
+        std::string auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix");
+        std::string auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix");
         params.bind_dn = auth_dn_prefix + "{user_name}" + auth_dn_suffix;
     }
 
@@ -176,14 +179,17 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
 
     if (has_port)
     {
-        const auto port = config.getInt64(ldap_server_config + ".port");
-        if (port < 0 || port > 65535)
+        UInt32 port = config.getUInt(ldap_server_config + ".port");
+        if (port > 65535)
             throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS);
 
         params.port = port;
     }
     else
         params.port = (params.enable_tls == LDAPClient::Params::TLSEnable::YES ? 636 : 389);
+
+    if (has_search_limit)
+        params.search_limit = static_cast<UInt32>(config.getUInt64(ldap_server_config + ".search_limit"));
 }
 
 void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::AbstractConfiguration & config)
@@ -313,11 +319,26 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
     }
 }
 
+UInt128 computeParamsHash(const LDAPClient::Params & params, const LDAPClient::RoleSearchParamsList * role_search_params)
+{
+    SipHash hash;
+    params.updateHash(hash);
+    if (role_search_params)
+    {
+        for (const auto & params_instance : *role_search_params)
+        {
+            params_instance.updateHash(hash);
+        }
+    }
+
+    return hash.get128();
+}
+
 bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const BasicCredentials & credentials,
     const LDAPClient::RoleSearchParamsList * role_search_params, LDAPClient::SearchResultsList * role_search_results) const
 {
     std::optional<LDAPClient::Params> params;
-    std::size_t params_hash = 0;
+    UInt128 params_hash = 0;
 
     {
         std::scoped_lock lock(mutex);
@@ -331,14 +352,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B
         params->user = credentials.getUserName();
         params->password = credentials.getPassword();
 
-        params->combineCoreHash(params_hash);
-        if (role_search_params)
-        {
-            for (const auto & params_instance : *role_search_params)
-            {
-                params_instance.combineHash(params_hash);
-            }
-        }
+        params_hash = computeParamsHash(*params, role_search_params);
 
         // Check the cache, but only if the caching is enabled at all.
         if (params->verification_cooldown > std::chrono::seconds{0})
@@ -408,15 +422,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B
         new_params.user = credentials.getUserName();
         new_params.password = credentials.getPassword();
 
-        std::size_t new_params_hash = 0;
-        new_params.combineCoreHash(new_params_hash);
-        if (role_search_params)
-        {
-            for (const auto & params_instance : *role_search_params)
-            {
-                params_instance.combineHash(new_params_hash);
-            }
-        }
+        const UInt128 new_params_hash = computeParamsHash(new_params, role_search_params);
 
         // If the critical server params have changed while we were checking the password, we discard the current result.
         if (params_hash != new_params_hash)
diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp
index 2affbc293ec..856ffd18a32 100644
--- a/src/Access/LDAPClient.cpp
+++ b/src/Access/LDAPClient.cpp
@@ -2,10 +2,10 @@
 #include <Common/Exception.h>
 #include <base/scope_guard.h>
 #include <Common/logger_useful.h>
+#include <Common/SipHash.h>
 
 #include <Poco/Logger.h>
 #include <boost/algorithm/string/predicate.hpp>
-#include <boost/container_hash/hash.hpp>
 
 #include <mutex>
 #include <utility>
@@ -15,6 +15,22 @@
 
 #include <sys/time.h>
 
+namespace
+{
+
+template <typename T, typename = std::enable_if_t<std::is_fundamental_v<std::decay_t<T>>>>
+void updateHash(SipHash & hash, const T & value)
+{
+    hash.update(value);
+}
+
+void updateHash(SipHash & hash, const std::string & value)
+{
+    hash.update(value.size());
+    hash.update(value);
+}
+
+}
 
 namespace DB
 {
@@ -26,30 +42,30 @@ namespace ErrorCodes
     extern const int LDAP_ERROR;
 }
 
-void LDAPClient::SearchParams::combineHash(std::size_t & seed) const
+void LDAPClient::SearchParams::updateHash(SipHash & hash) const
 {
-    boost::hash_combine(seed, base_dn);
-    boost::hash_combine(seed, static_cast<int>(scope));
-    boost::hash_combine(seed, search_filter);
-    boost::hash_combine(seed, attribute);
+    ::updateHash(hash, base_dn);
+    ::updateHash(hash, static_cast<int>(scope));
+    ::updateHash(hash, search_filter);
+    ::updateHash(hash, attribute);
 }
 
-void LDAPClient::RoleSearchParams::combineHash(std::size_t & seed) const
+void LDAPClient::RoleSearchParams::updateHash(SipHash & hash) const
 {
-    SearchParams::combineHash(seed);
-    boost::hash_combine(seed, prefix);
+    SearchParams::updateHash(hash);
+    ::updateHash(hash, prefix);
 }
 
-void LDAPClient::Params::combineCoreHash(std::size_t & seed) const
+void LDAPClient::Params::updateHash(SipHash & hash) const
 {
-    boost::hash_combine(seed, host);
-    boost::hash_combine(seed, port);
-    boost::hash_combine(seed, bind_dn);
-    boost::hash_combine(seed, user);
-    boost::hash_combine(seed, password);
+    ::updateHash(hash, host);
+    ::updateHash(hash, port);
+    ::updateHash(hash, bind_dn);
+    ::updateHash(hash, user);
+    ::updateHash(hash, password);
 
     if (user_dn_detection)
-        user_dn_detection->combineHash(seed);
+        user_dn_detection->updateHash(hash);
 }
 
 LDAPClient::LDAPClient(const Params & params_)
@@ -153,13 +169,13 @@ namespace
 
 }
 
-void LDAPClient::diag(int rc, String text)
+void LDAPClient::handleError(int result_code, String text)
 {
     std::scoped_lock lock(ldap_global_mutex);
 
-    if (rc != LDAP_SUCCESS)
+    if (result_code != LDAP_SUCCESS)
     {
-        const char * raw_err_str = ldap_err2string(rc);
+        const char * raw_err_str = ldap_err2string(result_code);
         if (raw_err_str && *raw_err_str != '\0')
         {
             if (!text.empty())
@@ -214,7 +230,7 @@ bool LDAPClient::openConnection()
 
         SCOPE_EXIT({ ldap_memfree(uri); });
 
-        diag(ldap_initialize(&handle, uri));
+        handleError(ldap_initialize(&handle, uri));
         if (!handle)
             throw Exception("ldap_initialize() failed", ErrorCodes::LDAP_ERROR);
     }
@@ -226,13 +242,13 @@ bool LDAPClient::openConnection()
             case LDAPClient::Params::ProtocolVersion::V2: value = LDAP_VERSION2; break;
             case LDAPClient::Params::ProtocolVersion::V3: value = LDAP_VERSION3; break;
         }
-        diag(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value));
+        handleError(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value));
     }
 
-    diag(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON));
+    handleError(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON));
 
 #ifdef LDAP_OPT_KEEPCONN
-    diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON));
+    handleError(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON));
 #endif
 
 #ifdef LDAP_OPT_TIMEOUT
@@ -240,7 +256,7 @@ bool LDAPClient::openConnection()
         ::timeval operation_timeout;
         operation_timeout.tv_sec = params.operation_timeout.count();
         operation_timeout.tv_usec = 0;
-        diag(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout));
+        handleError(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout));
     }
 #endif
 
@@ -249,18 +265,18 @@ bool LDAPClient::openConnection()
         ::timeval network_timeout;
         network_timeout.tv_sec = params.network_timeout.count();
         network_timeout.tv_usec = 0;
-        diag(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout));
+        handleError(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout));
     }
 #endif
 
     {
         const int search_timeout = static_cast<int>(params.search_timeout.count());
-        diag(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout));
+        handleError(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout));
     }
 
     {
-        const int size_limit = params.search_limit;
-        diag(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit));
+        const int size_limit = static_cast<int>(params.search_limit);
+        handleError(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit));
     }
 
 #ifdef LDAP_OPT_X_TLS_PROTOCOL_MIN
@@ -274,7 +290,7 @@ bool LDAPClient::openConnection()
             case LDAPClient::Params::TLSProtocolVersion::TLS1_1: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_1; break;
             case LDAPClient::Params::TLSProtocolVersion::TLS1_2: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_2; break;
         }
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value));
     }
 #endif
 
@@ -288,44 +304,44 @@ bool LDAPClient::openConnection()
             case LDAPClient::Params::TLSRequireCert::TRY:    value = LDAP_OPT_X_TLS_TRY;    break;
             case LDAPClient::Params::TLSRequireCert::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break;
         }
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value));
     }
 #endif
 
 #ifdef LDAP_OPT_X_TLS_CERTFILE
     if (!params.tls_cert_file.empty())
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str()));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str()));
 #endif
 
 #ifdef LDAP_OPT_X_TLS_KEYFILE
     if (!params.tls_key_file.empty())
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str()));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str()));
 #endif
 
 #ifdef LDAP_OPT_X_TLS_CACERTFILE
     if (!params.tls_ca_cert_file.empty())
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str()));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str()));
 #endif
 
 #ifdef LDAP_OPT_X_TLS_CACERTDIR
     if (!params.tls_ca_cert_dir.empty())
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str()));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str()));
 #endif
 
 #ifdef LDAP_OPT_X_TLS_CIPHER_SUITE
     if (!params.tls_cipher_suite.empty())
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str()));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str()));
 #endif
 
 #ifdef LDAP_OPT_X_TLS_NEWCTX
     {
         const int i_am_a_server = 0;
-        diag(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server));
+        handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server));
     }
 #endif
 
     if (params.enable_tls == LDAPClient::Params::TLSEnable::YES_STARTTLS)
-        diag(ldap_start_tls_s(handle, nullptr, nullptr));
+        handleError(ldap_start_tls_s(handle, nullptr, nullptr));
 
     final_user_name = escapeForDN(params.user);
     final_bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", final_user_name} });
@@ -346,7 +362,7 @@ bool LDAPClient::openConnection()
                 if (rc == LDAP_INVALID_CREDENTIALS)
                     return false;
 
-                diag(rc);
+                handleError(rc);
             }
 
             // Once bound, run the user DN search query and update the default value, if asked.
@@ -425,7 +441,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
         }
     });
 
-    diag(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs));
+    handleError(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs));
 
     for (
          auto * msg = ldap_first_message(handle, msgs);
@@ -452,7 +468,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
 
                     ::berval bv;
 
-                    diag(ldap_get_dn_ber(handle, msg, &ber, &bv));
+                    handleError(ldap_get_dn_ber(handle, msg, &ber, &bv));
 
                     if (bv.bv_val && bv.bv_len > 0)
                         result.emplace(bv.bv_val, bv.bv_len);
@@ -504,7 +520,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
             case LDAP_RES_SEARCH_REFERENCE:
             {
                 char ** referrals = nullptr;
-                diag(ldap_parse_reference(handle, msg, &referrals, nullptr, 0));
+                handleError(ldap_parse_reference(handle, msg, &referrals, nullptr, 0));
 
                 if (referrals)
                 {
@@ -528,7 +544,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
                 char * matched_msg = nullptr;
                 char * error_msg = nullptr;
 
-                diag(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0));
+                handleError(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0));
 
                 if (rc != LDAP_SUCCESS)
                 {
@@ -610,7 +626,7 @@ bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList * role_search
 
 #else // USE_LDAP
 
-void LDAPClient::diag(const int, String)
+void LDAPClient::handleError(const int, String)
 {
     throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME);
 }
diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h
index f228bac5926..bf193bf6bb2 100644
--- a/src/Access/LDAPClient.h
+++ b/src/Access/LDAPClient.h
@@ -16,6 +16,7 @@
 #include <set>
 #include <vector>
 
+class SipHash;
 
 namespace DB
 {
@@ -38,7 +39,7 @@ public:
         String search_filter;
         String attribute = "cn";
 
-        void combineHash(std::size_t & seed) const;
+        void updateHash(SipHash & hash) const;
     };
 
     struct RoleSearchParams
@@ -46,7 +47,7 @@ public:
     {
         String prefix;
 
-        void combineHash(std::size_t & seed) const;
+        void updateHash(SipHash & hash) const;
     };
 
     using RoleSearchParamsList = std::vector<RoleSearchParams>;
@@ -95,7 +96,7 @@ public:
         ProtocolVersion protocol_version = ProtocolVersion::V3;
 
         String host;
-        std::uint16_t port = 636;
+        UInt16 port = 636;
 
         TLSEnable enable_tls = TLSEnable::YES;
         TLSProtocolVersion tls_minimum_protocol_version = TLSProtocolVersion::TLS1_2;
@@ -119,9 +120,9 @@ public:
         std::chrono::seconds operation_timeout{40};
         std::chrono::seconds network_timeout{30};
         std::chrono::seconds search_timeout{20};
-        std::uint32_t search_limit = 100;
+        UInt32 search_limit = 256; /// An arbitrary number, no particular motivation for this value.
 
-        void combineCoreHash(std::size_t & seed) const;
+        void updateHash(SipHash & hash) const;
     };
 
     explicit LDAPClient(const Params & params_);
@@ -133,7 +134,7 @@ public:
     LDAPClient & operator= (LDAPClient &&) = delete;
 
 protected:
-    MAYBE_NORETURN void diag(int rc, String text = "");
+    MAYBE_NORETURN void handleError(int result_code, String text = "");
     MAYBE_NORETURN bool openConnection();
     void closeConnection() noexcept;
     SearchResults search(const SearchParams & search_params);
diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp
index 28ef4ebaa7a..e3690f36cc3 100644
--- a/src/Access/UsersConfigAccessStorage.cpp
+++ b/src/Access/UsersConfigAccessStorage.cpp
@@ -228,6 +228,12 @@ namespace
             user->access.revokeGrantOption(AccessType::ALL);
         }
 
+        bool show_named_collections = config.getBool(user_config + ".show_named_collections", false);
+        if (!show_named_collections)
+        {
+            user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS);
+        }
+
         String default_database = config.getString(user_config + ".default_database", "");
         user->default_database = default_database;
 
diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h
index 516d33f42de..decb572b019 100644
--- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h
+++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h
@@ -13,6 +13,7 @@ struct Settings;
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int CORRUPTED_DATA;
 }
 
 
@@ -89,6 +90,13 @@ public:
     {
         this->data(place).result.read(buf, *serialization_res, arena);
         this->data(place).value.read(buf, *serialization_val, arena);
+        if (unlikely(this->data(place).value.has() != this->data(place).result.has()))
+            throw Exception(
+                ErrorCodes::CORRUPTED_DATA,
+                "Invalid state of the aggregate function {}: has_value ({}) != has_result ({})",
+                getName(),
+                this->data(place).value.has(),
+                this->data(place).result.has());
     }
 
     bool allocatesMemoryInArena() const override
diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index 46be7331195..ef812c0361e 100644
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -29,6 +29,8 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NOT_IMPLEMENTED;
+    extern const int TOO_LARGE_STRING_SIZE;
+    extern const int LOGICAL_ERROR;
 }
 
 /** Aggregate functions that store one of passed values.
@@ -447,6 +449,34 @@ public:
 
 };
 
+struct Compatibility
+{
+    /// Old versions used to store terminating null-character in SingleValueDataString.
+    /// Then -WithTerminatingZero methods were removed from IColumn interface,
+    /// because these methods are quite dangerous and easy to misuse. It introduced incompatibility.
+    /// See https://github.com/ClickHouse/ClickHouse/pull/41431 and https://github.com/ClickHouse/ClickHouse/issues/42916
+    /// Here we keep these functions for compatibility.
+    /// It's safe because there's no way unsanitized user input (without \0 at the end) can reach these functions.
+
+    static StringRef getDataAtWithTerminatingZero(const ColumnString & column, size_t n)
+    {
+        auto res = column.getDataAt(n);
+        /// ColumnString always reserves extra byte for null-character after string.
+        /// But getDataAt returns StringRef without the null-character. Let's add it.
+        chassert(res.data[res.size] == '\0');
+        ++res.size;
+        return res;
+    }
+
+    static void insertDataWithTerminatingZero(ColumnString & column, const char * pos, size_t length)
+    {
+        /// String already has terminating null-character.
+        /// But insertData will add another one unconditionally. Trim existing null-character to avoid duplication.
+        chassert(0 < length);
+        chassert(pos[length - 1] == '\0');
+        column.insertData(pos, length - 1);
+    }
+};
 
 /** For strings. Short strings are stored in the object itself, and long strings are allocated separately.
   * NOTE It could also be suitable for arrays of numbers.
@@ -456,13 +486,15 @@ struct SingleValueDataString //-V730
 private:
     using Self = SingleValueDataString;
 
-    Int32 size = -1;    /// -1 indicates that there is no value.
-    Int32 capacity = 0;    /// power of two or zero
+    /// 0 size indicates that there is no value. Empty string must has terminating '\0' and, therefore, size of empty string is 1
+    UInt32 size = 0;
+    UInt32 capacity = 0;    /// power of two or zero
     char * large_data;
 
 public:
-    static constexpr Int32 AUTOMATIC_STORAGE_SIZE = 64;
-    static constexpr Int32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data);
+    static constexpr UInt32 AUTOMATIC_STORAGE_SIZE = 64;
+    static constexpr UInt32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data);
+    static constexpr UInt32 MAX_STRING_SIZE = std::numeric_limits<Int32>::max();
 
 private:
     char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero.
@@ -473,12 +505,22 @@ public:
 
     bool has() const
     {
-        return size >= 0;
+        return size;
+    }
+
+private:
+    char * getDataMutable()
+    {
+        return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data;
     }
 
     const char * getData() const
     {
-        return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data;
+        const char * data_ptr = size <= MAX_SMALL_STRING_SIZE ? small_data : large_data;
+        /// It must always be terminated with null-character
+        chassert(0 < size);
+        chassert(data_ptr[size - 1] == '\0');
+        return data_ptr;
     }
 
     StringRef getStringRef() const
@@ -486,61 +528,105 @@ public:
         return StringRef(getData(), size);
     }
 
+public:
     void insertResultInto(IColumn & to) const
     {
         if (has())
-            assert_cast<ColumnString &>(to).insertData(getData(), size);
+            Compatibility::insertDataWithTerminatingZero(assert_cast<ColumnString &>(to), getData(), size);
         else
             assert_cast<ColumnString &>(to).insertDefault();
     }
 
     void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const
     {
-        writeBinary(size, buf);
+        if (unlikely(MAX_STRING_SIZE < size))
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "String size is too big ({}), it's a bug", size);
+
+        /// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
+        Int32 size_to_write = size ? size : -1;
+        writeBinary(size_to_write, buf);
         if (has())
             buf.write(getData(), size);
     }
 
+    void allocateLargeDataIfNeeded(UInt32 size_to_reserve, Arena * arena)
+    {
+        if (capacity < size_to_reserve)
+        {
+            if (unlikely(MAX_STRING_SIZE < size_to_reserve))
+                throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve);
+
+            size_t rounded_capacity = roundUpToPowerOfTwoOrZero(size_to_reserve);
+            chassert(rounded_capacity <= MAX_STRING_SIZE + 1);  /// rounded_capacity <= 2^31
+            capacity = static_cast<UInt32>(rounded_capacity);
+
+            /// Don't free large_data here.
+            large_data = arena->alloc(capacity);
+        }
+    }
+
     void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena)
     {
-        Int32 rhs_size;
-        readBinary(rhs_size, buf);
+        /// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
+        Int32 rhs_size_signed;
+        readBinary(rhs_size_signed, buf);
 
-        if (rhs_size >= 0)
+        if (rhs_size_signed < 0)
         {
-            if (rhs_size <= MAX_SMALL_STRING_SIZE)
-            {
-                /// Don't free large_data here.
-
-                size = rhs_size;
-
-                if (size > 0)
-                    buf.readStrict(small_data, size);
-            }
-            else
-            {
-                if (capacity < rhs_size)
-                {
-                    capacity = static_cast<UInt32>(roundUpToPowerOfTwoOrZero(rhs_size));
-                    /// Don't free large_data here.
-                    large_data = arena->alloc(capacity);
-                }
-
-                size = rhs_size;
-                buf.readStrict(large_data, size);
-            }
+            /// Don't free large_data here.
+            size = 0;
+            return;
         }
-        else
+
+        UInt32 rhs_size = rhs_size_signed;
+        if (rhs_size <= MAX_SMALL_STRING_SIZE)
         {
             /// Don't free large_data here.
             size = rhs_size;
+            buf.readStrict(small_data, size);
         }
+        else
+        {
+            /// Reserve one byte more for null-character
+            allocateLargeDataIfNeeded(rhs_size + 1, arena);
+            size = rhs_size;
+            buf.readStrict(large_data, size);
+        }
+
+        /// Check if the string we read is null-terminated (getDataMutable does not have the assertion)
+        if (0 < size && getDataMutable()[size - 1] == '\0')
+            return;
+
+        /// It's not null-terminated, but it must be (for historical reasons). There are two variants:
+        /// - The value was serialized by one of the incompatible versions of ClickHouse. We had some range of versions
+        ///   that used to serialize SingleValueDataString without terminating '\0'. Let's just append it.
+        /// - An attacker sent crafted data. Sanitize it and append '\0'.
+        /// In all other cases the string must be already null-terminated.
+
+        /// NOTE We cannot add '\0' unconditionally, because it will be duplicated.
+        /// NOTE It's possible that a string that actually ends with '\0' was written by one of the incompatible versions.
+        ///      Unfortunately, we cannot distinguish it from normal string written by normal version.
+        ///      So such strings will be trimmed.
+
+        if (size == MAX_SMALL_STRING_SIZE)
+        {
+            /// Special case: We have to move value to large_data
+            allocateLargeDataIfNeeded(size + 1, arena);
+            memcpy(large_data, small_data, size);
+        }
+
+        /// We have enough space to append
+        ++size;
+        getDataMutable()[size - 1] = '\0';
     }
 
     /// Assuming to.has()
     void changeImpl(StringRef value, Arena * arena)
     {
-        Int32 value_size = static_cast<Int32>(value.size);
+        if (unlikely(MAX_STRING_SIZE < value.size))
+            throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", value.size);
+
+        UInt32 value_size = static_cast<UInt32>(value.size);
 
         if (value_size <= MAX_SMALL_STRING_SIZE)
         {
@@ -552,13 +638,7 @@ public:
         }
         else
         {
-            if (capacity < value_size)
-            {
-                /// Don't free large_data here.
-                capacity = static_cast<Int32>(roundUpToPowerOfTwoOrZero(value_size));
-                large_data = arena->alloc(capacity);
-            }
-
+            allocateLargeDataIfNeeded(value_size, arena);
             size = value_size;
             memcpy(large_data, value.data, size);
         }
@@ -566,7 +646,7 @@ public:
 
     void change(const IColumn & column, size_t row_num, Arena * arena)
     {
-        changeImpl(assert_cast<const ColumnString &>(column).getDataAt(row_num), arena);
+        changeImpl(Compatibility::getDataAtWithTerminatingZero(assert_cast<const ColumnString &>(column), row_num), arena);
     }
 
     void change(const Self & to, Arena * arena)
@@ -615,7 +695,7 @@ public:
 
     bool changeIfLess(const IColumn & column, size_t row_num, Arena * arena)
     {
-        if (!has() || assert_cast<const ColumnString &>(column).getDataAt(row_num) < getStringRef())
+        if (!has() || Compatibility::getDataAtWithTerminatingZero(assert_cast<const ColumnString &>(column), row_num) < getStringRef())
         {
             change(column, row_num, arena);
             return true;
@@ -637,7 +717,7 @@ public:
 
     bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
     {
-        if (!has() || assert_cast<const ColumnString &>(column).getDataAt(row_num) > getStringRef())
+        if (!has() || Compatibility::getDataAtWithTerminatingZero(assert_cast<const ColumnString &>(column), row_num) > getStringRef())
         {
             change(column, row_num, arena);
             return true;
@@ -664,7 +744,7 @@ public:
 
     bool isEqualTo(const IColumn & column, size_t row_num) const
     {
-        return has() && assert_cast<const ColumnString &>(column).getDataAt(row_num) == getStringRef();
+        return has() && Compatibility::getDataAtWithTerminatingZero(assert_cast<const ColumnString &>(column), row_num) == getStringRef();
     }
 
     static bool allocatesMemoryInArena()
diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index 6a8fc9e99d8..e9db1a71511 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -155,7 +155,7 @@ public:
                         "Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
                         getName(), value_type->getName()};
 
-                WhichDataType value_type_to_check(value_type);
+                WhichDataType value_type_to_check(value_type_without_nullable);
 
                 /// Do not promote decimal because of implementation issues of this function design
                 /// Currently we cannot get result column type in case of decimal we cannot get decimal scale
@@ -202,7 +202,7 @@ public:
         auto & merged_maps = this->data(place).merged_maps;
         for (size_t col = 0, size = values_types.size(); col < size; ++col)
         {
-            const auto & array_column = assert_cast<const ColumnArray&>(*columns[col + 1]);
+            const auto & array_column = assert_cast<const ColumnArray &>(*columns[col + 1]);
             const IColumn & value_column = array_column.getData();
             const IColumn::Offsets & offsets = array_column.getOffsets();
             const size_t values_vec_offset = offsets[row_num - 1];
@@ -532,7 +532,12 @@ private:
 public:
     explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {}
 
-    bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); }
+    bool operator() (Null &) const
+    {
+        /// Do not update current value, skip nulls
+        return false;
+    }
+
     bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot compare AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); }
 
     bool operator() (Array & x) const { return compareImpl<Array>(x); }
@@ -567,7 +572,13 @@ private:
 public:
     explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {}
 
-    bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); }
+
+    bool operator() (Null &) const
+    {
+        /// Do not update current value, skip nulls
+        return false;
+    }
+
     bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); }
 
     bool operator() (Array & x) const { return compareImpl<Array>(x); }
diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp
index 0d1c831c839..1c90767131c 100644
--- a/src/AggregateFunctions/AggregateFunctionUniq.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp
@@ -9,6 +9,7 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeUUID.h>
 
+#include <Core/Settings.h>
 
 namespace DB
 {
@@ -28,8 +29,9 @@ namespace
 /** `DataForVariadic` is a data structure that will be used for `uniq` aggregate function of multiple arguments.
   * It differs, for example, in that it uses a trivial hash function, since `uniq` of many arguments first hashes them out itself.
   */
-template <typename Data, typename DataForVariadic>
-AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+template <typename Data, template <bool, bool> typename DataForVariadic>
+AggregateFunctionPtr
+createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
 {
     assertNoParameters(name, params);
 
@@ -61,21 +63,22 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const
         else if (which.isTuple())
         {
             if (use_exact_hash_function)
-                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, true>>(argument_types);
+                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<true, true>>>(argument_types);
             else
-                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, true>>(argument_types);
+                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<false, true>>>(argument_types);
         }
     }
 
     /// "Variadic" method also works as a fallback generic case for single argument.
     if (use_exact_hash_function)
-        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, false>>(argument_types);
+        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<true, false>>>(argument_types);
     else
-        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, false>>(argument_types);
+        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<false, false>>>(argument_types);
 }
 
-template <bool is_exact, template <typename> class Data, typename DataForVariadic>
-AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+template <bool is_exact, template <typename, bool> typename Data, template <bool, bool, bool> typename DataForVariadic, bool is_able_to_parallelize_merge>
+AggregateFunctionPtr
+createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
 {
     assertNoParameters(name, params);
 
@@ -91,35 +94,35 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const
     {
         const IDataType & argument_type = *argument_types[0];
 
-        AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniq, Data>(*argument_types[0], argument_types));
+        AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniq, Data, is_able_to_parallelize_merge>(*argument_types[0], argument_types));
 
         WhichDataType which(argument_type);
         if (res)
             return res;
         else if (which.isDate())
-            return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data<DataTypeDate::FieldType>>>(argument_types);
+            return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data<DataTypeDate::FieldType, is_able_to_parallelize_merge>>>(argument_types);
         else if (which.isDate32())
-            return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data<DataTypeDate32::FieldType>>>(argument_types);
+            return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data<DataTypeDate32::FieldType, is_able_to_parallelize_merge>>>(argument_types);
         else if (which.isDateTime())
-            return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>>>(argument_types);
+            return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType, is_able_to_parallelize_merge>>>(argument_types);
         else if (which.isStringOrFixedString())
-            return std::make_shared<AggregateFunctionUniq<String, Data<String>>>(argument_types);
+            return std::make_shared<AggregateFunctionUniq<String, Data<String, is_able_to_parallelize_merge>>>(argument_types);
         else if (which.isUUID())
-            return std::make_shared<AggregateFunctionUniq<DataTypeUUID::FieldType, Data<DataTypeUUID::FieldType>>>(argument_types);
+            return std::make_shared<AggregateFunctionUniq<DataTypeUUID::FieldType, Data<DataTypeUUID::FieldType, is_able_to_parallelize_merge>>>(argument_types);
         else if (which.isTuple())
         {
             if (use_exact_hash_function)
-                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, true>>(argument_types);
+                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<true, true, is_able_to_parallelize_merge>>>(argument_types);
             else
-                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, true>>(argument_types);
+                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<false, true, is_able_to_parallelize_merge>>>(argument_types);
         }
     }
 
     /// "Variadic" method also works as a fallback generic case for single argument.
     if (use_exact_hash_function)
-        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, false>>(argument_types);
+        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<true, false, is_able_to_parallelize_merge>>>(argument_types);
     else
-        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, false>>(argument_types);
+        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<false, false, is_able_to_parallelize_merge>>>(argument_types);
 }
 
 }
@@ -132,14 +135,23 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)
         {createAggregateFunctionUniq<AggregateFunctionUniqUniquesHashSetData, AggregateFunctionUniqUniquesHashSetDataForVariadic>, properties});
 
     factory.registerFunction("uniqHLL12",
-        {createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic>, properties});
+        {createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic, false /* is_able_to_parallelize_merge */>, properties});
 
-    factory.registerFunction("uniqExact",
-        {createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
+    auto assign_bool_param = [](const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings)
+    {
+        /// Using two level hash set if we wouldn't be able to merge in parallel can cause ~10% slowdown.
+        if (settings && settings->max_threads > 1)
+            return createAggregateFunctionUniq<
+                true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, true /* is_able_to_parallelize_merge */>(name, argument_types, params, settings);
+        else
+            return createAggregateFunctionUniq<
+                true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, false /* is_able_to_parallelize_merge */>(name, argument_types, params, settings);
+    };
+    factory.registerFunction("uniqExact", {assign_bool_param, properties});
 
 #if USE_DATASKETCHES
     factory.registerFunction("uniqTheta",
-        {createAggregateFunctionUniq<AggregateFunctionUniqThetaData, AggregateFunctionUniqThetaData>, properties});
+        {createAggregateFunctionUniq<AggregateFunctionUniqThetaData, AggregateFunctionUniqThetaDataForVariadic>, properties});
 #endif
 
 }
diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h
index fe2530800cc..1a98bfc8456 100644
--- a/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/src/AggregateFunctions/AggregateFunctionUniq.h
@@ -1,7 +1,10 @@
 #pragma once
 
-#include <city.h>
+#include <atomic>
+#include <memory>
 #include <type_traits>
+#include <utility>
+#include <city.h>
 
 #include <base/bit_cast.h>
 
@@ -13,17 +16,18 @@
 
 #include <Interpreters/AggregationCommon.h>
 
+#include <Common/CombinedCardinalityEstimator.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/HashTable/HashSet.h>
 #include <Common/HyperLogLogWithSmallSetOptimization.h>
-#include <Common/CombinedCardinalityEstimator.h>
-#include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
 
-#include <AggregateFunctions/UniquesHashSet.h>
 #include <AggregateFunctions/IAggregateFunction.h>
 #include <AggregateFunctions/ThetaSketchData.h>
+#include <AggregateFunctions/UniqExactSet.h>
 #include <AggregateFunctions/UniqVariadicHash.h>
+#include <AggregateFunctions/UniquesHashSet.h>
 
 
 namespace DB
@@ -37,94 +41,128 @@ struct AggregateFunctionUniqUniquesHashSetData
     using Set = UniquesHashSet<DefaultHash<UInt64>>;
     Set set;
 
+    constexpr static bool is_able_to_parallelize_merge = false;
+    constexpr static bool is_variadic = false;
+
     static String getName() { return "uniq"; }
 };
 
 /// For a function that takes multiple arguments. Such a function pre-hashes them in advance, so TrivialHash is used here.
+template <bool is_exact_, bool argument_is_tuple_>
 struct AggregateFunctionUniqUniquesHashSetDataForVariadic
 {
     using Set = UniquesHashSet<TrivialHash>;
     Set set;
 
+    constexpr static bool is_able_to_parallelize_merge = false;
+    constexpr static bool is_variadic = true;
+    constexpr static bool is_exact = is_exact_;
+    constexpr static bool argument_is_tuple = argument_is_tuple_;
+
     static String getName() { return "uniq"; }
 };
 
 
 /// uniqHLL12
 
-template <typename T>
+template <typename T, bool is_able_to_parallelize_merge_>
 struct AggregateFunctionUniqHLL12Data
 {
     using Set = HyperLogLogWithSmallSetOptimization<T, 16, 12>;
     Set set;
 
-    static String getName() { return "uniqHLL12"; }
-};
-
-template <>
-struct AggregateFunctionUniqHLL12Data<String>
-{
-    using Set = HyperLogLogWithSmallSetOptimization<UInt64, 16, 12>;
-    Set set;
+    constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_;
+    constexpr static bool is_variadic = false;
 
     static String getName() { return "uniqHLL12"; }
 };
 
 template <>
-struct AggregateFunctionUniqHLL12Data<UUID>
+struct AggregateFunctionUniqHLL12Data<String, false>
 {
     using Set = HyperLogLogWithSmallSetOptimization<UInt64, 16, 12>;
     Set set;
 
+    constexpr static bool is_able_to_parallelize_merge = false;
+    constexpr static bool is_variadic = false;
+
     static String getName() { return "uniqHLL12"; }
 };
 
+template <>
+struct AggregateFunctionUniqHLL12Data<UUID, false>
+{
+    using Set = HyperLogLogWithSmallSetOptimization<UInt64, 16, 12>;
+    Set set;
+
+    constexpr static bool is_able_to_parallelize_merge = false;
+    constexpr static bool is_variadic = false;
+
+    static String getName() { return "uniqHLL12"; }
+};
+
+template <bool is_exact_, bool argument_is_tuple_, bool is_able_to_parallelize_merge_>
 struct AggregateFunctionUniqHLL12DataForVariadic
 {
     using Set = HyperLogLogWithSmallSetOptimization<UInt64, 16, 12, TrivialHash>;
     Set set;
 
+    constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_;
+    constexpr static bool is_variadic = true;
+    constexpr static bool is_exact = is_exact_;
+    constexpr static bool argument_is_tuple = argument_is_tuple_;
+
     static String getName() { return "uniqHLL12"; }
 };
 
 
 /// uniqExact
 
-template <typename T>
+template <typename T, bool is_able_to_parallelize_merge_>
 struct AggregateFunctionUniqExactData
 {
     using Key = T;
 
     /// When creating, the hash table must be small.
-    using Set = HashSet<
-        Key,
-        HashCRC32<Key>,
-        HashTableGrower<4>,
-        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 4)>>;
+    using SingleLevelSet = HashSet<Key, HashCRC32<Key>, HashTableGrower<4>, HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 4)>>;
+    using TwoLevelSet = TwoLevelHashSet<Key, HashCRC32<Key>>;
+    using Set = UniqExactSet<SingleLevelSet, TwoLevelSet>;
 
     Set set;
 
+    constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_;
+    constexpr static bool is_variadic = false;
+
     static String getName() { return "uniqExact"; }
 };
 
 /// For rows, we put the SipHash values (128 bits) into the hash table.
-template <>
-struct AggregateFunctionUniqExactData<String>
+template <bool is_able_to_parallelize_merge_>
+struct AggregateFunctionUniqExactData<String, is_able_to_parallelize_merge_>
 {
     using Key = UInt128;
 
     /// When creating, the hash table must be small.
-    using Set = HashSet<
-        Key,
-        UInt128TrivialHash,
-        HashTableGrower<3>,
-        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 3)>>;
+    using SingleLevelSet = HashSet<Key, UInt128TrivialHash, HashTableGrower<3>, HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 3)>>;
+    using TwoLevelSet = TwoLevelHashSet<Key, UInt128TrivialHash>;
+    using Set = UniqExactSet<SingleLevelSet, TwoLevelSet>;
 
     Set set;
 
+    constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_;
+    constexpr static bool is_variadic = false;
+
     static String getName() { return "uniqExact"; }
 };
 
+template <bool is_exact_, bool argument_is_tuple_, bool is_able_to_parallelize_merge_>
+struct AggregateFunctionUniqExactDataForVariadic : AggregateFunctionUniqExactData<String, is_able_to_parallelize_merge_>
+{
+    constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_;
+    constexpr static bool is_variadic = true;
+    constexpr static bool is_exact = is_exact_;
+    constexpr static bool argument_is_tuple = argument_is_tuple_;
+};
 
 /// uniqTheta
 #if USE_DATASKETCHES
@@ -134,14 +172,37 @@ struct AggregateFunctionUniqThetaData
     using Set = ThetaSketchData<UInt64>;
     Set set;
 
+    constexpr static bool is_able_to_parallelize_merge = false;
+    constexpr static bool is_variadic = false;
+
     static String getName() { return "uniqTheta"; }
 };
 
+template <bool is_exact_, bool argument_is_tuple_>
+struct AggregateFunctionUniqThetaDataForVariadic : AggregateFunctionUniqThetaData
+{
+    constexpr static bool is_able_to_parallelize_merge = false;
+    constexpr static bool is_variadic = true;
+    constexpr static bool is_exact = is_exact_;
+    constexpr static bool argument_is_tuple = argument_is_tuple_;
+};
+
 #endif
 
 namespace detail
 {
 
+template <typename T>
+struct IsUniqExactSet : std::false_type
+{
+};
+
+template <typename T1, typename T2>
+struct IsUniqExactSet<UniqExactSet<T1, T2>> : std::true_type
+{
+};
+
+
 /** Hash function for uniq.
   */
 template <typename T> struct AggregateFunctionUniqTraits
@@ -162,17 +223,31 @@ template <typename T> struct AggregateFunctionUniqTraits
 };
 
 
-/** The structure for the delegation work to add one element to the `uniq` aggregate functions.
+/** The structure for the delegation work to add elements to the `uniq` aggregate functions.
   * Used for partial specialization to add strings.
   */
 template <typename T, typename Data>
-struct OneAdder
+struct Adder
 {
-    static void ALWAYS_INLINE add(Data & data, const IColumn & column, size_t row_num)
+    /// We have to introduce this template parameter (and a bunch of ugly code dealing with it), because we cannot
+    /// add runtime branches in whatever_hash_set::insert - it will immediately pop up in the perf top.
+    template <bool use_single_level_hash_table = true>
+    static void ALWAYS_INLINE add(Data & data, const IColumn ** columns, size_t num_args, size_t row_num)
     {
-        if constexpr (std::is_same_v<Data, AggregateFunctionUniqUniquesHashSetData>
-            || std::is_same_v<Data, AggregateFunctionUniqHLL12Data<T>>)
+        if constexpr (Data::is_variadic)
         {
+            if constexpr (IsUniqExactSet<typename Data::Set>::value)
+                data.set.template insert<T, use_single_level_hash_table>(
+                    UniqVariadicHash<Data::is_exact, Data::argument_is_tuple>::apply(num_args, columns, row_num));
+            else
+                data.set.insert(T{UniqVariadicHash<Data::is_exact, Data::argument_is_tuple>::apply(num_args, columns, row_num)});
+        }
+        else if constexpr (
+            std::is_same_v<
+                Data,
+                AggregateFunctionUniqUniquesHashSetData> || std::is_same_v<Data, AggregateFunctionUniqHLL12Data<T, Data::is_able_to_parallelize_merge>>)
+        {
+            const auto & column = *columns[0];
             if constexpr (!std::is_same_v<T, String>)
             {
                 using ValueType = typename decltype(data.set)::value_type;
@@ -185,11 +260,13 @@ struct OneAdder
                 data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
             }
         }
-        else if constexpr (std::is_same_v<Data, AggregateFunctionUniqExactData<T>>)
+        else if constexpr (std::is_same_v<Data, AggregateFunctionUniqExactData<T, Data::is_able_to_parallelize_merge>>)
         {
+            const auto & column = *columns[0];
             if constexpr (!std::is_same_v<T, String>)
             {
-                data.set.insert(assert_cast<const ColumnVector<T> &>(column).getData()[row_num]);
+                data.set.template insert<const T &, use_single_level_hash_table>(
+                    assert_cast<const ColumnVector<T> &>(column).getData()[row_num]);
             }
             else
             {
@@ -200,16 +277,72 @@ struct OneAdder
                 hash.update(value.data, value.size);
                 hash.get128(key);
 
-                data.set.insert(key);
+                data.set.template insert<const UInt128 &, use_single_level_hash_table>(key);
             }
         }
 #if USE_DATASKETCHES
         else if constexpr (std::is_same_v<Data, AggregateFunctionUniqThetaData>)
         {
+            const auto & column = *columns[0];
             data.set.insertOriginal(column.getDataAt(row_num));
         }
 #endif
     }
+
+    static void ALWAYS_INLINE
+    add(Data & data, const IColumn ** columns, size_t num_args, size_t row_begin, size_t row_end, const char8_t * flags, const UInt8 * null_map)
+    {
+        bool use_single_level_hash_table = true;
+        if constexpr (Data::is_able_to_parallelize_merge)
+            use_single_level_hash_table = data.set.isSingleLevel();
+
+        if (use_single_level_hash_table)
+            addImpl<true>(data, columns, num_args, row_begin, row_end, flags, null_map);
+        else
+            addImpl<false>(data, columns, num_args, row_begin, row_end, flags, null_map);
+
+        if constexpr (Data::is_able_to_parallelize_merge)
+        {
+            if (data.set.isSingleLevel() && data.set.size() > 100'000)
+                data.set.convertToTwoLevel();
+        }
+    }
+
+private:
+    template <bool use_single_level_hash_table>
+    static void ALWAYS_INLINE
+    addImpl(Data & data, const IColumn ** columns, size_t num_args, size_t row_begin, size_t row_end, const char8_t * flags, const UInt8 * null_map)
+    {
+        if (!flags)
+        {
+            if (!null_map)
+            {
+                for (size_t row = row_begin; row < row_end; ++row)
+                    add<use_single_level_hash_table>(data, columns, num_args, row);
+            }
+            else
+            {
+                for (size_t row = row_begin; row < row_end; ++row)
+                    if (!null_map[row])
+                        add<use_single_level_hash_table>(data, columns, num_args, row);
+            }
+        }
+        else
+        {
+            if (!null_map)
+            {
+                for (size_t row = row_begin; row < row_end; ++row)
+                    if (flags[row])
+                        add<use_single_level_hash_table>(data, columns, num_args, row);
+            }
+            else
+            {
+                for (size_t row = row_begin; row < row_end; ++row)
+                    if (!null_map[row] && flags[row])
+                        add<use_single_level_hash_table>(data, columns, num_args, row);
+            }
+        }
+    }
 };
 
 }
@@ -219,9 +352,15 @@ struct OneAdder
 template <typename T, typename Data>
 class AggregateFunctionUniq final : public IAggregateFunctionDataHelper<Data, AggregateFunctionUniq<T, Data>>
 {
+private:
+    static constexpr size_t num_args = 1;
+    static constexpr bool is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge;
+
 public:
-    AggregateFunctionUniq(const DataTypes & argument_types_)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionUniq<T, Data>>(argument_types_, {}) {}
+    explicit AggregateFunctionUniq(const DataTypes & argument_types_)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionUniq<T, Data>>(argument_types_, {})
+    {
+    }
 
     String getName() const override { return Data::getName(); }
 
@@ -235,7 +374,18 @@ public:
     /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function
     void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        detail::OneAdder<T, Data>::add(this->data(place), *columns[0], row_num);
+        detail::Adder<T, Data>::add(this->data(place), columns, num_args, row_num);
+    }
+
+    void ALWAYS_INLINE addBatchSinglePlace(
+        size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena *, ssize_t if_argument_pos)
+        const override
+    {
+        const char8_t * flags = nullptr;
+        if (if_argument_pos >= 0)
+            flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
+
+        detail::Adder<T, Data>::add(this->data(place), columns, num_args, row_begin, row_end, flags, nullptr /* null_map */);
     }
 
     void addManyDefaults(
@@ -244,7 +394,23 @@ public:
         size_t /*length*/,
         Arena * /*arena*/) const override
     {
-        detail::OneAdder<T, Data>::add(this->data(place), *columns[0], 0);
+        detail::Adder<T, Data>::add(this->data(place), columns, num_args, 0);
+    }
+
+    void addBatchSinglePlaceNotNull(
+        size_t row_begin,
+        size_t row_end,
+        AggregateDataPtr __restrict place,
+        const IColumn ** columns,
+        const UInt8 * null_map,
+        Arena *,
+        ssize_t if_argument_pos) const override
+    {
+        const char8_t * flags = nullptr;
+        if (if_argument_pos >= 0)
+            flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
+
+        detail::Adder<T, Data>::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map);
     }
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
@@ -252,6 +418,16 @@ public:
         this->data(place).set.merge(this->data(rhs).set);
     }
 
+    bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
+    {
+        if constexpr (is_able_to_parallelize_merge)
+            this->data(place).set.merge(this->data(rhs).set, &thread_pool);
+        else
+            this->data(place).set.merge(this->data(rhs).set);
+    }
+
     void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
     {
         this->data(place).set.write(buf);
@@ -273,15 +449,20 @@ public:
   * You can pass multiple arguments as is; You can also pass one argument - a tuple.
   * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples.
   */
-template <typename Data, bool is_exact, bool argument_is_tuple>
-class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper<Data, AggregateFunctionUniqVariadic<Data, is_exact, argument_is_tuple>>
+template <typename Data>
+class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper<Data, AggregateFunctionUniqVariadic<Data>>
 {
 private:
+    using T = typename Data::Set::value_type;
+
+    static constexpr size_t is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge;
+    static constexpr size_t argument_is_tuple = Data::argument_is_tuple;
+
     size_t num_args = 0;
 
 public:
-    AggregateFunctionUniqVariadic(const DataTypes & arguments)
-        : IAggregateFunctionDataHelper<Data, AggregateFunctionUniqVariadic<Data, is_exact, argument_is_tuple>>(arguments, {})
+    explicit AggregateFunctionUniqVariadic(const DataTypes & arguments)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionUniqVariadic<Data>>(arguments, {})
     {
         if (argument_is_tuple)
             num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
@@ -300,8 +481,34 @@ public:
 
     void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        this->data(place).set.insert(typename Data::Set::value_type(
-            UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
+        detail::Adder<T, Data>::add(this->data(place), columns, num_args, row_num);
+    }
+
+    void addBatchSinglePlace(
+        size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena *, ssize_t if_argument_pos)
+        const override
+    {
+        const char8_t * flags = nullptr;
+        if (if_argument_pos >= 0)
+            flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
+
+        detail::Adder<T, Data>::add(this->data(place), columns, num_args, row_begin, row_end, flags, nullptr /* null_map */);
+    }
+
+    void addBatchSinglePlaceNotNull(
+        size_t row_begin,
+        size_t row_end,
+        AggregateDataPtr __restrict place,
+        const IColumn ** columns,
+        const UInt8 * null_map,
+        Arena *,
+        ssize_t if_argument_pos) const override
+    {
+        const char8_t * flags = nullptr;
+        if (if_argument_pos >= 0)
+            flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
+
+        detail::Adder<T, Data>::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map);
     }
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
@@ -309,6 +516,16 @@ public:
         this->data(place).set.merge(this->data(rhs).set);
     }
 
+    bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
+    {
+        if constexpr (is_able_to_parallelize_merge)
+            this->data(place).set.merge(this->data(rhs).set, &thread_pool);
+        else
+            this->data(place).set.merge(this->data(rhs).set);
+    }
+
     void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
     {
         this->data(place).set.write(buf);
diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h
index 6e140f4b9cf..c97733571a3 100644
--- a/src/AggregateFunctions/Helpers.h
+++ b/src/AggregateFunctions/Helpers.h
@@ -74,6 +74,19 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
     return nullptr;
 }
 
+template <template <typename, typename> class AggregateFunctionTemplate, template <typename, bool> class Data, bool bool_param, typename... TArgs>
+static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
+{
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE) \
+    if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<TYPE, Data<TYPE, bool_param>>(std::forward<TArgs>(args)...); /// NOLINT
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Enum8) return new AggregateFunctionTemplate<Int8, Data<Int8, bool_param>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Enum16) return new AggregateFunctionTemplate<Int16, Data<Int16, bool_param>>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
 template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
 static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argument_type, TArgs && ... args)
 {
diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index b3fd055b28d..ada00791e69 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -1,14 +1,15 @@
 #pragma once
 
+#include <Columns/ColumnSparse.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnSparse.h>
 #include <Core/Block.h>
 #include <Core/ColumnNumbers.h>
 #include <Core/Field.h>
 #include <Interpreters/Context_fwd.h>
-#include <Common/Exception.h>
 #include <base/types.h>
+#include <Common/Exception.h>
+#include <Common/ThreadPool.h>
 
 #include "config.h"
 
@@ -147,6 +148,16 @@ public:
     /// Merges state (on which place points to) with other state of current aggregation function.
     virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const = 0;
 
+    /// Tells if merge() with thread pool parameter could be used.
+    virtual bool isAbleToParallelizeMerge() const { return false; }
+
+    /// Should be used only if isAbleToParallelizeMerge() returned true.
+    virtual void
+    merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "merge() with thread pool parameter isn't implemented for {} ", getName());
+    }
+
     /// Serializes state (to transmit it over the network, for example).
     virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
 
diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h
new file mode 100644
index 00000000000..cf05d54e541
--- /dev/null
+++ b/src/AggregateFunctions/UniqExactSet.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include <Common/CurrentThread.h>
+#include <Common/HashTable/HashSet.h>
+#include <Common/ThreadPool.h>
+#include <Common/setThreadName.h>
+
+
+namespace DB
+{
+
+template <typename SingleLevelSet, typename TwoLevelSet>
+class UniqExactSet
+{
+    static_assert(std::is_same_v<typename SingleLevelSet::value_type, typename TwoLevelSet::value_type>);
+
+public:
+    using value_type = typename SingleLevelSet::value_type;
+
+    template <typename Arg, bool use_single_level_hash_table = true>
+    auto ALWAYS_INLINE insert(Arg && arg)
+    {
+        if constexpr (use_single_level_hash_table)
+            asSingleLevel().insert(std::forward<Arg>(arg));
+        else
+            asTwoLevel().insert(std::forward<Arg>(arg));
+    }
+
+    auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr)
+    {
+        if (isSingleLevel() && other.isTwoLevel())
+            convertToTwoLevel();
+
+        if (isSingleLevel())
+        {
+            asSingleLevel().merge(other.asSingleLevel());
+        }
+        else
+        {
+            auto & lhs = asTwoLevel();
+            const auto rhs_ptr = other.getTwoLevelSet();
+            const auto & rhs = *rhs_ptr;
+            if (!thread_pool)
+            {
+                for (size_t i = 0; i < rhs.NUM_BUCKETS; ++i)
+                    lhs.impls[i].merge(rhs.impls[i]);
+            }
+            else
+            {
+                auto next_bucket_to_merge = std::make_shared<std::atomic_uint32_t>(0);
+
+                auto thread_func = [&lhs, &rhs, next_bucket_to_merge, thread_group = CurrentThread::getGroup()]()
+                {
+                    if (thread_group)
+                        CurrentThread::attachToIfDetached(thread_group);
+                    setThreadName("UniqExactMerger");
+
+                    while (true)
+                    {
+                        const auto bucket = next_bucket_to_merge->fetch_add(1);
+                        if (bucket >= rhs.NUM_BUCKETS)
+                            return;
+                        lhs.impls[bucket].merge(rhs.impls[bucket]);
+                    }
+                };
+
+                for (size_t i = 0; i < std::min<size_t>(thread_pool->getMaxThreads(), rhs.NUM_BUCKETS); ++i)
+                    thread_pool->scheduleOrThrowOnError(thread_func);
+                thread_pool->wait();
+            }
+        }
+    }
+
+    void read(ReadBuffer & in) { asSingleLevel().read(in); }
+
+    void write(WriteBuffer & out) const
+    {
+        if (isSingleLevel())
+            asSingleLevel().write(out);
+        else
+            /// We have to preserve compatibility with the old implementation that used only single level hash sets.
+            asTwoLevel().writeAsSingleLevel(out);
+    }
+
+    size_t size() const { return isSingleLevel() ? asSingleLevel().size() : asTwoLevel().size(); }
+
+    /// To convert set to two level before merging (we cannot just call convertToTwoLevel() on right hand side set, because it is declared const).
+    std::shared_ptr<TwoLevelSet> getTwoLevelSet() const
+    {
+        return two_level_set ? two_level_set : std::make_shared<TwoLevelSet>(asSingleLevel());
+    }
+
+    void convertToTwoLevel()
+    {
+        two_level_set = getTwoLevelSet();
+        single_level_set.clear();
+    }
+
+    bool isSingleLevel() const { return !two_level_set; }
+    bool isTwoLevel() const { return !!two_level_set; }
+
+private:
+    SingleLevelSet & asSingleLevel() { return single_level_set; }
+    const SingleLevelSet & asSingleLevel() const { return single_level_set; }
+
+    TwoLevelSet & asTwoLevel() { return *two_level_set; }
+    const TwoLevelSet & asTwoLevel() const { return *two_level_set; }
+
+    SingleLevelSet single_level_set;
+    std::shared_ptr<TwoLevelSet> two_level_set;
+};
+}
diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h
index 777ec0edc7e..075b0897c3a 100644
--- a/src/AggregateFunctions/UniquesHashSet.h
+++ b/src/AggregateFunctions/UniquesHashSet.h
@@ -329,7 +329,7 @@ public:
         free();
     }
 
-    void insert(Value x)
+    void ALWAYS_INLINE insert(Value x)
     {
         HashValue hash_value = hash(x);
         if (!good(hash_value))
diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp
index 55e097cc5f4..adc5782f7b7 100644
--- a/src/Analyzer/FunctionNode.cpp
+++ b/src/Analyzer/FunctionNode.cpp
@@ -166,7 +166,12 @@ ASTPtr FunctionNode::toASTImpl() const
     auto function_ast = std::make_shared<ASTFunction>();
 
     function_ast->name = function_name;
-    function_ast->is_window_function = isWindowFunction();
+
+    if (isWindowFunction())
+    {
+        function_ast->is_window_function = true;
+        function_ast->kind = ASTFunction::Kind::WINDOW_FUNCTION;
+    }
 
     const auto & parameters = getParameters();
     if (!parameters.getNodes().empty())
diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h
new file mode 100644
index 00000000000..46222755938
--- /dev/null
+++ b/src/Analyzer/HashUtils.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <Analyzer/IQueryTreeNode.h>
+
+namespace DB
+{
+
+/** This structure holds query tree node ptr and its hash. It can be used as hash map key to avoid unnecessary hash
+  * recalculations.
+  *
+  * Example of usage:
+  * std::unordered_map<QueryTreeNodeConstRawPtrWithHash, std::string> map;
+  */
+template <typename QueryTreeNodePtrType>
+struct QueryTreeNodeWithHash
+{
+    QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT
+        : node(std::move(node_))
+        , hash(node->getTreeHash().first)
+    {}
+
+    QueryTreeNodePtrType node = nullptr;
+    size_t hash = 0;
+};
+
+template <typename T>
+inline bool operator==(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
+{
+    return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node);
+}
+
+template <typename T>
+inline bool operator!=(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
+{
+    return !(lhs == rhs);
+}
+
+using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash<QueryTreeNodePtr>;
+using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash<IQueryTreeNode *>;
+using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash<const IQueryTreeNode *>;
+
+using QueryTreeNodePtrWithHashSet = std::unordered_set<QueryTreeNodePtrWithHash>;
+using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set<QueryTreeNodeConstRawPtrWithHash>;
+
+template <typename Value>
+using QueryTreeNodePtrWithHashMap = std::unordered_map<QueryTreeNodePtrWithHash, Value>;
+
+template <typename Value>
+using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map<QueryTreeNodeConstRawPtrWithHash, Value>;
+
+}
+
+template <typename T>
+struct std::hash<DB::QueryTreeNodeWithHash<T>>
+{
+    size_t operator()(const DB::QueryTreeNodeWithHash<T> & node_with_hash) const
+    {
+        return node_with_hash.hash;
+    }
+};
diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp
index 6c6613e7598..a75afc7cfa6 100644
--- a/src/Analyzer/Passes/FuseFunctionsPass.cpp
+++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp
@@ -8,6 +8,7 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/ConstantNode.h>
+#include <Analyzer/HashUtils.h>
 
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
@@ -48,43 +49,24 @@ public:
             /// Do not apply for `count()` with without arguments or `count(*)`, only `count(x)` is supported.
             return;
 
-        mapping[QueryTreeNodeWithHash(argument_nodes[0])].push_back(&node);
+        argument_to_functions_mapping[argument_nodes[0]].push_back(&node);
     }
 
-    struct QueryTreeNodeWithHash
-    {
-        const QueryTreeNodePtr & node;
-        IQueryTreeNode::Hash hash;
-
-        explicit QueryTreeNodeWithHash(const QueryTreeNodePtr & node_)
-            : node(node_)
-            , hash(node->getTreeHash())
-        {}
-
-        bool operator==(const QueryTreeNodeWithHash & rhs) const
-        {
-            return hash == rhs.hash && node->isEqual(*rhs.node);
-        }
-
-        struct Hash
-        {
-            size_t operator() (const QueryTreeNodeWithHash & key) const { return key.hash.first ^ key.hash.second; }
-        };
-    };
-
     /// argument -> list of sum/count/avg functions with this argument
-    std::unordered_map<QueryTreeNodeWithHash, std::vector<QueryTreeNodePtr *>, QueryTreeNodeWithHash::Hash> mapping;
+    QueryTreeNodePtrWithHashMap<std::vector<QueryTreeNodePtr *>> argument_to_functions_mapping;
 
 private:
     std::unordered_set<String> names_to_collect;
 };
 
-QueryTreeNodePtr createResolvedFunction(ContextPtr context, const String & name, DataTypePtr result_type, QueryTreeNodes arguments)
+QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, const DataTypePtr & result_type, QueryTreeNodes arguments)
 {
     auto function_node = std::make_shared<FunctionNode>(name);
+
     auto function = FunctionFactory::instance().get(name, context);
     function_node->resolveAsFunction(std::move(function), result_type);
     function_node->getArguments().getNodes() = std::move(arguments);
+
     return function_node;
 }
 
@@ -94,21 +76,20 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query
 
     AggregateFunctionProperties properties;
     auto aggregate_function = AggregateFunctionFactory::instance().get(name, {argument->getResultType()}, parameters, properties);
-
     function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType());
+    function_node->getArguments().getNodes() = { argument };
 
-    function_node->getArgumentsNode() = std::make_shared<ListNode>(QueryTreeNodes{argument});
     return function_node;
 }
 
-QueryTreeNodePtr createTupleElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
+QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index)
 {
-    return createResolvedFunction(context, "tupleElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
+    return createResolvedFunction(context, "tupleElement", result_type, {std::move(argument), std::make_shared<ConstantNode>(index)});
 }
 
-QueryTreeNodePtr createArrayElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
+QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index)
 {
-    return createResolvedFunction(context, "arrayElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
+    return createResolvedFunction(context, "arrayElement", result_type, {std::move(argument), std::make_shared<ConstantNode>(index)});
 }
 
 void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_count_node, ContextPtr context)
@@ -151,6 +132,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector<QueryTreeNodePtr *> n
 {
     Array parameters;
     parameters.reserve(nodes.size());
+
     for (const auto * node : nodes)
     {
         const FunctionNode & function_node = (*node)->as<const FunctionNode &>();
@@ -172,6 +154,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector<QueryTreeNodePtr *> n
 
         parameters.push_back(constant_value->getValue());
     }
+
     return createResolvedAggregateFunction("quantiles", argument, parameters);
 }
 
@@ -181,7 +164,7 @@ void tryFuseSumCountAvg(QueryTreeNodePtr query_tree_node, ContextPtr context)
     FuseFunctionsVisitor visitor({"sum", "count", "avg"});
     visitor.visit(query_tree_node);
 
-    for (auto & [argument, nodes] : visitor.mapping)
+    for (auto & [argument, nodes] : visitor.argument_to_functions_mapping)
     {
         if (nodes.size() < 2)
             continue;
@@ -199,24 +182,22 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
     FuseFunctionsVisitor visitor_quantile({"quantile"});
     visitor_quantile.visit(query_tree_node);
-    for (auto & [argument, nodes] : visitor_quantile.mapping)
+
+    for (auto & [argument, nodes] : visitor_quantile.argument_to_functions_mapping)
     {
-        if (nodes.size() < 2)
+        size_t nodes_size = nodes.size();
+        if (nodes_size < 2)
             continue;
 
         auto quantiles_node = createFusedQuantilesNode(nodes, argument.node);
         auto result_array_type = std::dynamic_pointer_cast<const DataTypeArray>(quantiles_node->getResultType());
         if (!result_array_type)
-        {
             throw Exception(ErrorCodes::LOGICAL_ERROR,
                 "Unexpected return type '{}' of function '{}', should be array",
                 quantiles_node->getResultType(), quantiles_node->getFunctionName());
-        }
 
-        for (size_t i = 0; i < nodes.size(); ++i)
-        {
+        for (size_t i = 0; i < nodes_size; ++i)
             *nodes[i] = createArrayElementFunction(context, result_array_type->getNestedType(), quantiles_node, i + 1);
-        }
     }
 }
 
diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
index e4d6633b6e6..3632c41028b 100644
--- a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
+++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp
@@ -3,6 +3,7 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/QueryNode.h>
 #include <Analyzer/SortNode.h>
+#include <Analyzer/HashUtils.h>
 
 namespace DB
 {
@@ -10,35 +11,6 @@ namespace DB
 namespace
 {
 
-struct QueryTreeNodeWithHash
-{
-    explicit QueryTreeNodeWithHash(const IQueryTreeNode * node_)
-        : node(node_)
-        , hash(node->getTreeHash().first)
-    {}
-
-    const IQueryTreeNode * node = nullptr;
-    size_t hash = 0;
-};
-
-struct QueryTreeNodeWithHashHash
-{
-    size_t operator()(const QueryTreeNodeWithHash & node_with_hash) const
-    {
-        return node_with_hash.hash;
-    }
-};
-
-struct QueryTreeNodeWithHashEqualTo
-{
-    bool operator()(const QueryTreeNodeWithHash & lhs_node, const QueryTreeNodeWithHash & rhs_node) const
-    {
-        return lhs_node.hash == rhs_node.hash && lhs_node.node->isEqual(*rhs_node.node);
-    }
-};
-
-using QueryTreeNodeWithHashSet = std::unordered_set<QueryTreeNodeWithHash, QueryTreeNodeWithHashHash, QueryTreeNodeWithHashEqualTo>;
-
 class OrderByLimitByDuplicateEliminationVisitor : public InDepthQueryTreeVisitor<OrderByLimitByDuplicateEliminationVisitor>
 {
 public:
@@ -93,7 +65,7 @@ public:
     }
 
 private:
-    QueryTreeNodeWithHashSet unique_expressions_nodes_set;
+    QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
 };
 
 }
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 24b88a729be..c2791c582dc 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -67,6 +67,8 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/QueryTreeBuilder.h>
 
+#include <Common/checkStackSize.h>
+
 namespace DB
 {
 
@@ -517,7 +519,7 @@ public:
 
 private:
     QueryTreeNodes expressions;
-    std::unordered_map<std::string, std::vector<QueryTreeNodePtr>> alias_name_to_expressions;
+    std::unordered_map<std::string, QueryTreeNodes> alias_name_to_expressions;
 };
 
 /** Projection names is name of query tree node that is used in projection part of query node.
@@ -1100,6 +1102,10 @@ private:
 
     static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
 
+    static void expandGroupByAll(QueryNode & query_tree_node_typed);
+
+    static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);
+
     /// Resolve identifier functions
 
     static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
@@ -1517,6 +1523,7 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection(
     {
         for (const auto & [name, expression] : scope.alias_name_to_expression_node)
         {
+            assert(expression);
             auto expression_identifier = Identifier(name);
             valid_identifiers_result.insert(expression_identifier);
 
@@ -1928,6 +1935,68 @@ void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodeP
             scope.scope_node->formatASTForErrorMessage());
 }
 
+std::pair<bool, UInt64> QueryAnalyzer::recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into)
+{
+    checkStackSize();
+
+    if (node->as<ColumnNode>())
+    {
+        into.push_back(node);
+        return {false, 1};
+    }
+
+    auto * function = node->as<FunctionNode>();
+
+    if (!function)
+        return {false, 0};
+
+    if (function->isAggregateFunction())
+        return {true, 0};
+
+    UInt64 pushed_children = 0;
+    bool has_aggregate = false;
+
+    for (auto & child : function->getArguments().getNodes())
+    {
+        auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into);
+        has_aggregate |= child_has_aggregate;
+        pushed_children += child_pushed_children;
+    }
+
+    /// The current function is not aggregate function and there is no aggregate function in its arguments,
+    /// so use the current function to replace its arguments
+    if (!has_aggregate)
+    {
+        for (UInt64 i = 0; i < pushed_children; i++)
+            into.pop_back();
+
+        into.push_back(node);
+        pushed_children = 1;
+    }
+
+    return {has_aggregate, pushed_children};
+}
+
+/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions.
+  *
+  * For a special case that if there is a function having both aggregate functions and other fields as its arguments,
+  * the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
+  *
+  * Example:
+  * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL
+  * will expand as
+  * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2)
+  */
+void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
+{
+    auto & group_by_nodes = query_tree_node_typed.getGroupBy().getNodes();
+    auto & projection_list = query_tree_node_typed.getProjection();
+
+    for (auto & node : projection_list.getNodes())
+        recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
+
+}
+
 
 /// Resolve identifier functions implementation
 
@@ -2170,6 +2239,19 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
         auto & alias_identifier_node = it->second->as<IdentifierNode &>();
         auto identifier = alias_identifier_node.getIdentifier();
         auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings);
+        if (!lookup_result.resolved_identifier)
+        {
+            std::unordered_set<Identifier> valid_identifiers;
+            collectScopeWithParentScopesValidIdentifiersForTypoCorrection(identifier, scope, true, false, false, valid_identifiers);
+            auto hints = collectIdentifierTypoHints(identifier, valid_identifiers);
+
+            throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}'. In scope {}{}",
+                toStringLowercase(identifier_lookup.lookup_context),
+                identifier.getFullName(),
+                scope.scope_node->formatASTForErrorMessage(),
+                getHintsErrorMessageSuffix(hints));
+        }
+
         it->second = lookup_result.resolved_identifier;
 
         /** During collection of aliases if node is identifier and has alias, we cannot say if it is
@@ -2180,9 +2262,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
           * If we resolved identifier node as function, we must remove identifier node alias from
           * expression alias map.
           */
-        if (identifier_lookup.isExpressionLookup() && it->second)
+        if (identifier_lookup.isExpressionLookup())
             scope.alias_name_to_lambda_node.erase(identifier_bind_part);
-        else if (identifier_lookup.isFunctionLookup() && it->second)
+        else if (identifier_lookup.isFunctionLookup())
             scope.alias_name_to_expression_node.erase(identifier_bind_part);
 
         scope.expressions_in_resolve_process_stack.popNode();
@@ -3190,11 +3272,9 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
 
         if (auto * array_join_node = table_expression->as<ArrayJoinNode>())
         {
-            size_t table_expressions_column_nodes_with_names_stack_size = table_expressions_column_nodes_with_names_stack.size();
-            if (table_expressions_column_nodes_with_names_stack_size < 1)
+            if (table_expressions_column_nodes_with_names_stack.empty())
                 throw Exception(ErrorCodes::LOGICAL_ERROR,
-                    "Expected at least 1 table expressions on stack before ARRAY JOIN processing. Actual {}",
-                    table_expressions_column_nodes_with_names_stack_size);
+                    "Expected at least 1 table expressions on stack before ARRAY JOIN processing");
 
             auto & table_expression_column_nodes_with_names = table_expressions_column_nodes_with_names_stack.back();
 
@@ -3983,6 +4063,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         in_subquery->getJoinTree() = exists_subquery_argument;
         in_subquery->getLimit() = std::make_shared<ConstantNode>(1UL, constant_data_type);
         in_subquery->resolveProjectionColumns({NameAndTypePair("1", constant_data_type)});
+        in_subquery->setIsSubquery(true);
 
         function_node_ptr = std::make_shared<FunctionNode>("in");
         function_node_ptr->getArguments().getNodes() = {std::make_shared<ConstantNode>(1UL, constant_data_type), in_subquery};
@@ -5375,25 +5456,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
                 }
             }
 
-            /// TODO: Special functions that can take query
-            /// TODO: Support qualified matchers for table function
-
-            for (auto & argument_node : table_function_node.getArguments().getNodes())
-            {
-                if (argument_node->getNodeType() == QueryTreeNodeType::MATCHER)
-                {
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Matcher as table function argument is not supported {}. In scope {}",
-                        join_tree_node->formatASTForErrorMessage(),
-                        scope.scope_node->formatASTForErrorMessage());
-                }
-
-                auto * function_node = argument_node->as<FunctionNode>();
-                if (function_node && table_function_factory.hasNameOrAlias(function_node->getFunctionName()))
-                    continue;
-
-                resolveExpressionNode(argument_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
-            }
+            resolveExpressionNodeList(table_function_node.getArgumentsNode(), scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
 
             auto table_function_ast = table_function_node.toAST();
             table_function_ptr->parseArguments(table_function_ast, scope_context);
@@ -5993,6 +6056,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         node->removeAlias();
     }
 
+    if (query_node_typed.isGroupByAll())
+        expandGroupByAll(query_node_typed);
+
     /** Validate aggregates
       *
       * 1. Check that there are no aggregate functions and GROUPING function in JOIN TREE, WHERE, PREWHERE, in another aggregate functions.
diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index f43c90e10eb..5413d1b4670 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -61,7 +61,7 @@ public:
             function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);
             function_node_arguments_nodes.resize(1);
 
-            resolveAggregateFunctionNode(*function_node, "countIf");
+            resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
             return;
         }
 
@@ -102,15 +102,16 @@ public:
             function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]);
             function_node_arguments_nodes.resize(1);
 
-            resolveAggregateFunctionNode(*function_node, "countIf");
+            resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
             return;
         }
 
         /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))`.
         if (if_true_condition_value == 0 && if_false_condition_value == 1)
         {
-            auto condition_result_type = nested_if_function_arguments_nodes[0]->getResultType();
             DataTypePtr not_function_result_type = std::make_shared<DataTypeUInt8>();
+
+            const auto & condition_result_type = nested_if_function_arguments_nodes[0]->getResultType();
             if (condition_result_type->isNullable())
                 not_function_result_type = makeNullable(not_function_result_type);
 
@@ -123,23 +124,21 @@ public:
             function_node_arguments_nodes[0] = std::move(not_function);
             function_node_arguments_nodes.resize(1);
 
-            resolveAggregateFunctionNode(*function_node, "countIf");
+            resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
             return;
         }
     }
 
 private:
-    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name)
+    static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
     {
-        auto function_result_type = function_node.getResultType();
-        auto function_aggregate_function = function_node.getAggregateFunction();
-
         AggregateFunctionProperties properties;
-        auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
-            function_aggregate_function->getArgumentTypes(),
-            function_aggregate_function->getParameters(),
+        auto aggregate_function = AggregateFunctionFactory::instance().get("countIf",
+            {argument_type},
+            function_node.getAggregateFunction()->getParameters(),
             properties);
 
+        auto function_result_type = function_node.getResultType();
         function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type));
     }
 
diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp
index 6520cb0717d..1716c37228a 100644
--- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp
+++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp
@@ -2,9 +2,13 @@
 
 #include <Functions/IFunction.h>
 
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/FunctionNode.h>
 
+
 namespace DB
 {
 
@@ -30,7 +34,9 @@ public:
         if (!function_node || !function_node->isAggregateFunction() || !isUniqFunction(function_node->getFunctionName()))
             return;
 
+        bool replaced_argument = false;
         auto & uniq_function_arguments_nodes = function_node->getArguments().getNodes();
+
         for (auto & uniq_function_argument_node : uniq_function_arguments_nodes)
         {
             auto * uniq_function_argument_node_typed = uniq_function_argument_node->as<FunctionNode>();
@@ -49,7 +55,28 @@ public:
 
             /// Replace injective function with its single argument
             uniq_function_argument_node = uniq_function_argument_node_argument_nodes[0];
+            replaced_argument = true;
         }
+
+        if (!replaced_argument)
+            return;
+
+        const auto & function_node_argument_nodes = function_node->getArguments().getNodes();
+
+        DataTypes argument_types;
+        argument_types.reserve(function_node_argument_nodes.size());
+
+        for (const auto & function_node_argument : function_node_argument_nodes)
+            argument_types.emplace_back(function_node_argument->getResultType());
+
+        AggregateFunctionProperties properties;
+        auto aggregate_function = AggregateFunctionFactory::instance().get(function_node->getFunctionName(),
+            argument_types,
+            function_node->getAggregateFunction()->getParameters(),
+            properties);
+
+        auto function_result_type = function_node->getResultType();
+        function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type));
     }
 };
 
diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp
index c5bbc193544..d31a3660336 100644
--- a/src/Analyzer/QueryNode.cpp
+++ b/src/Analyzer/QueryNode.cpp
@@ -54,6 +54,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
     if (is_group_by_with_totals)
         buffer << ", is_group_by_with_totals: " << is_group_by_with_totals;
 
+    if (is_group_by_all)
+        buffer << ", is_group_by_all: " << is_group_by_all;
+
     std::string group_by_type;
     if (is_group_by_with_rollup)
         group_by_type = "rollup";
@@ -117,7 +120,7 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
         getWhere()->dumpTreeImpl(buffer, format_state, indent + 4);
     }
 
-    if (hasGroupBy())
+    if (!is_group_by_all && hasGroupBy())
     {
         buffer << '\n' << std::string(indent + 2, ' ') << "GROUP BY\n";
         getGroupBy().dumpTreeImpl(buffer, format_state, indent + 4);
@@ -198,7 +201,8 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
         is_group_by_with_totals == rhs_typed.is_group_by_with_totals &&
         is_group_by_with_rollup == rhs_typed.is_group_by_with_rollup &&
         is_group_by_with_cube == rhs_typed.is_group_by_with_cube &&
-        is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets;
+        is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets &&
+        is_group_by_all == rhs_typed.is_group_by_all;
 }
 
 void QueryNode::updateTreeHashImpl(HashState & state) const
@@ -226,6 +230,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
     state.update(is_group_by_with_rollup);
     state.update(is_group_by_with_cube);
     state.update(is_group_by_with_grouping_sets);
+    state.update(is_group_by_all);
 
     if (constant_value)
     {
@@ -251,6 +256,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
     result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
     result_query_node->is_group_by_with_cube = is_group_by_with_cube;
     result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets;
+    result_query_node->is_group_by_all = is_group_by_all;
     result_query_node->cte_name = cte_name;
     result_query_node->projection_columns = projection_columns;
     result_query_node->constant_value = constant_value;
@@ -267,6 +273,7 @@ ASTPtr QueryNode::toASTImpl() const
     select_query->group_by_with_rollup = is_group_by_with_rollup;
     select_query->group_by_with_cube = is_group_by_with_cube;
     select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets;
+    select_query->group_by_all = is_group_by_all;
 
     if (hasWith())
         select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST());
@@ -283,7 +290,7 @@ ASTPtr QueryNode::toASTImpl() const
     if (getWhere())
         select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST());
 
-    if (hasGroupBy())
+    if (!is_group_by_all && hasGroupBy())
         select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST());
 
     if (hasHaving())
diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h
index 1bb381c95c9..5eb70f168ec 100644
--- a/src/Analyzer/QueryNode.h
+++ b/src/Analyzer/QueryNode.h
@@ -176,6 +176,18 @@ public:
         is_group_by_with_grouping_sets = is_group_by_with_grouping_sets_value;
     }
 
+    /// Returns true, if query node has GROUP BY ALL modifier, false otherwise
+    bool isGroupByAll() const
+    {
+        return is_group_by_all;
+    }
+
+    /// Set query node GROUP BY ALL modifier value
+    void setIsGroupByAll(bool is_group_by_all_value)
+    {
+        is_group_by_all = is_group_by_all_value;
+    }
+
     /// Returns true if query node WITH section is not empty, false otherwise
     bool hasWith() const
     {
@@ -580,6 +592,7 @@ private:
     bool is_group_by_with_rollup = false;
     bool is_group_by_with_cube = false;
     bool is_group_by_with_grouping_sets = false;
+    bool is_group_by_all = false;
 
     std::string cte_name;
     NamesAndTypes projection_columns;
diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index 51745d820e7..01ecd4ece30 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -215,6 +215,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
     current_query_tree->setIsGroupByWithCube(select_query_typed.group_by_with_cube);
     current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
     current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
+    current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
     current_query_tree->setOriginalAST(select_query);
 
     auto select_settings = select_query_typed.settings();
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index f7f7643a6e3..8342749e230 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -46,7 +46,7 @@ namespace
             context->getRemoteHostFilter(),
             static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
             context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
-            /* for_disk_s3 = */ false);
+            /* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {});
 
         client_configuration.endpointOverride = s3_uri.endpoint;
         client_configuration.maxConnections = static_cast<unsigned>(context->getSettingsRef().s3_max_connections);
@@ -86,9 +86,10 @@ BackupReaderS3::BackupReaderS3(
     const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
     : s3_uri(s3_uri_)
     , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
-    , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries)
     , read_settings(context_->getReadSettings())
+    , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
 {
+    request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
 }
 
 DataSourceDescription BackupReaderS3::getDataSourceDescription() const
@@ -115,7 +116,7 @@ UInt64 BackupReaderS3::getFileSize(const String & file_name)
 std::unique_ptr<SeekableReadBuffer> BackupReaderS3::readFile(const String & file_name)
 {
     return std::make_unique<ReadBufferFromS3>(
-        client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings);
+        client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings);
 }
 
 
@@ -123,12 +124,12 @@ BackupWriterS3::BackupWriterS3(
     const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
     : s3_uri(s3_uri_)
     , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
-    , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries)
     , read_settings(context_->getReadSettings())
-    , rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings)
+    , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
     , log(&Poco::Logger::get("BackupWriterS3"))
 {
-    rw_settings.updateFromSettingsIfEmpty(context_->getSettingsRef());
+    request_settings.updateFromSettingsIfEmpty(context_->getSettingsRef());
+    request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
 }
 
 DataSourceDescription BackupWriterS3::getDataSourceDescription() const
@@ -216,7 +217,7 @@ void BackupWriterS3::copyObjectMultipartImpl(
     std::vector<String> part_tags;
 
     size_t position = 0;
-    size_t upload_part_size = rw_settings.min_upload_part_size;
+    size_t upload_part_size = request_settings.min_upload_part_size;
 
     for (size_t part_number = 1; position < size; ++part_number)
     {
@@ -248,10 +249,10 @@ void BackupWriterS3::copyObjectMultipartImpl(
 
         position = next_position;
 
-        if (part_number % rw_settings.upload_part_size_multiply_parts_count_threshold == 0)
+        if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
         {
-            upload_part_size *= rw_settings.upload_part_size_multiply_factor;
-            upload_part_size = std::min(upload_part_size, rw_settings.max_upload_part_size);
+            upload_part_size *= request_settings.upload_part_size_multiply_factor;
+            upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);
         }
     }
 
@@ -294,7 +295,7 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
         auto file_path = fs::path(s3_uri.key) / file_name_to;
 
         auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult();
-        if (static_cast<size_t>(head.GetContentLength()) < rw_settings.max_single_operation_copy_size)
+        if (static_cast<size_t>(head.GetContentLength()) < request_settings.max_single_operation_copy_size)
         {
             copyObjectImpl(
                 source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
@@ -331,7 +332,7 @@ bool BackupWriterS3::fileContentsEqual(const String & file_name, const String &
     try
     {
         auto in = std::make_unique<ReadBufferFromS3>(
-            client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings);
+            client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings);
         String actual_file_contents(expected_file_contents.size(), ' ');
         return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
             && (actual_file_contents == expected_file_contents) && in->eof();
@@ -349,7 +350,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
         client,
         s3_uri.bucket,
         fs::path(s3_uri.key) / file_name,
-        rw_settings,
+        request_settings,
         std::nullopt,
         DBMS_DEFAULT_BUFFER_SIZE,
         threadPoolCallbackRunner<void>(IOThreadPool::get(), "BackupWriterS3"));
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index b52de23e262..deaf7979ff8 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -39,8 +39,8 @@ public:
 private:
     S3::URI s3_uri;
     std::shared_ptr<Aws::S3::S3Client> client;
-    UInt64 max_single_read_retries;
     ReadSettings read_settings;
+    S3Settings::RequestSettings request_settings;
 };
 
 
@@ -81,9 +81,8 @@ private:
 
     S3::URI s3_uri;
     std::shared_ptr<Aws::S3::S3Client> client;
-    UInt64 max_single_read_retries;
     ReadSettings read_settings;
-    S3Settings::ReadWriteSettings rw_settings;
+    S3Settings::RequestSettings request_settings;
     Poco::Logger * log;
 };
 
diff --git a/src/Backups/BackupInfo.cpp b/src/Backups/BackupInfo.cpp
index 12bf1f91e86..334bc5b5892 100644
--- a/src/Backups/BackupInfo.cpp
+++ b/src/Backups/BackupInfo.cpp
@@ -6,7 +6,6 @@
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/formatAST.h>
 #include <Parsers/parseQuery.h>
-#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
 
 
 namespace DB
@@ -36,6 +35,7 @@ ASTPtr BackupInfo::toAST() const
     auto func = std::make_shared<ASTFunction>();
     func->name = backup_engine_name;
     func->no_empty_args = true;
+    func->kind = ASTFunction::Kind::BACKUP_NAME;
 
     auto list = std::make_shared<ASTExpressionList>();
     func->arguments = list;
@@ -93,10 +93,9 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
 }
 
 
-String BackupInfo::toStringForLogging(const ContextPtr & context) const
+String BackupInfo::toStringForLogging() const
 {
-    ASTPtr ast = toAST();
-    return maskSensitiveInfoInBackupNameForLogging(serializeAST(*ast), ast, context);
+    return toAST()->formatForLogging();
 }
 
 }
diff --git a/src/Backups/BackupInfo.h b/src/Backups/BackupInfo.h
index 708238e3868..cf5d75bb0d0 100644
--- a/src/Backups/BackupInfo.h
+++ b/src/Backups/BackupInfo.h
@@ -22,7 +22,7 @@ struct BackupInfo
     ASTPtr toAST() const;
     static BackupInfo fromAST(const IAST & ast);
 
-    String toStringForLogging(const ContextPtr & context) const;
+    String toStringForLogging() const;
 };
 
 }
diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp
index 295ab723326..5266296c248 100644
--- a/src/Backups/BackupSettings.cpp
+++ b/src/Backups/BackupSettings.cpp
@@ -2,8 +2,8 @@
 #include <Backups/BackupSettings.h>
 #include <Core/SettingsFields.h>
 #include <Parsers/ASTBackupQuery.h>
-#include <Parsers/ASTSetQuery.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTSetQuery.h>
 #include <Parsers/ASTLiteral.h>
 #include <IO/ReadHelpers.h>
 
@@ -126,7 +126,12 @@ void BackupSettings::copySettingsToQuery(ASTBackupQuery & query) const
 
     query.settings = query_settings;
 
-    query.base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
+    auto base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
+    if (base_backup_name)
+        query.setOrReplace(query.base_backup_name, base_backup_name);
+    else
+        query.reset(query.base_backup_name);
+
     query.cluster_host_ids = !cluster_host_ids.empty() ? Util::clusterHostIDsToAST(cluster_host_ids) : nullptr;
 }
 
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index e235a34a09a..704562488b1 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -16,6 +16,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Parsers/ASTBackupQuery.h>
+#include <Parsers/ASTFunction.h>
 #include <Common/Exception.h>
 #include <Common/Macros.h>
 #include <Common/logger_useful.h>
@@ -166,7 +167,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
     }
 
     auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
-    String backup_name_for_logging = backup_info.toStringForLogging(context);
+    String backup_name_for_logging = backup_info.toStringForLogging();
     try
     {
         addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::CREATING_BACKUP);
@@ -388,7 +389,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
     try
     {
         auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
-        String backup_name_for_logging = backup_info.toStringForLogging(context);
+        String backup_name_for_logging = backup_info.toStringForLogging();
         addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING);
 
         /// Prepare context to use.
diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp
index 63915670fa4..2c06ee907b5 100644
--- a/src/Backups/RestoreSettings.cpp
+++ b/src/Backups/RestoreSettings.cpp
@@ -3,6 +3,7 @@
 #include <Backups/RestoreSettings.h>
 #include <Core/SettingsFields.h>
 #include <Parsers/ASTBackupQuery.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSetQuery.h>
 #include <boost/algorithm/string/predicate.hpp>
 #include <Common/FieldVisitorConvertToNumber.h>
@@ -213,7 +214,12 @@ void RestoreSettings::copySettingsToQuery(ASTBackupQuery & query) const
 
     query.settings = query_settings;
 
-    query.base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
+    auto base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
+    if (base_backup_name)
+        query.setOrReplace(query.base_backup_name, base_backup_name);
+    else
+        query.reset(query.base_backup_name);
+
     query.cluster_host_ids = !cluster_host_ids.empty() ? BackupSettings::Util::clusterHostIDsToAST(cluster_host_ids) : nullptr;
 }
 
diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp
index 7f5cb7f12a1..33b0049dc4d 100644
--- a/src/Backups/registerBackupEngineS3.cpp
+++ b/src/Backups/registerBackupEngineS3.cpp
@@ -47,7 +47,7 @@ void registerBackupEngineS3(BackupFactory & factory)
     auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
     {
 #if USE_AWS_S3
-        String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
+        String backup_name_for_logging = params.backup_info.toStringForLogging();
         const String & id_arg = params.backup_info.id_arg;
         const auto & args = params.backup_info.args;
 
@@ -110,12 +110,12 @@ void registerBackupEngineS3(BackupFactory & factory)
 
         if (params.open_mode == IBackup::OpenMode::READ)
         {
-            auto reader = std::make_shared<BackupReaderS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
+            auto reader = std::make_shared<BackupReaderS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context);
             return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
         }
         else
         {
-            auto writer = std::make_shared<BackupWriterS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
+            auto writer = std::make_shared<BackupWriterS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context);
             return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
         }
 #else
diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp
index 6a3e1669507..def9c5ba188 100644
--- a/src/Backups/registerBackupEnginesFileAndDisk.cpp
+++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp
@@ -99,7 +99,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
 {
     auto creator_fn = [](const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
     {
-        String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
+        String backup_name_for_logging = params.backup_info.toStringForLogging();
         const String & engine_name = params.backup_info.backup_engine_name;
 
         if (!params.backup_info.id_arg.empty())
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 1ed9ff58fdc..16597748f1e 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1401,6 +1401,11 @@ try
     QueryPipeline pipeline(std::move(pipe));
     PullingAsyncPipelineExecutor executor(pipeline);
 
+    if (need_render_progress)
+    {
+        pipeline.setProgressCallback([this](const Progress & progress){ onProgress(progress); });
+    }
+
     Block block;
     while (executor.pull(block))
     {
@@ -1445,12 +1450,6 @@ catch (...)
 
 void ClientBase::sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query)
 {
-    if (need_render_progress)
-    {
-        /// Add callback to track reading from fd.
-        std_in.setProgressCallback(global_context);
-    }
-
     /// Send data read from stdin.
     try
     {
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 6c85d6a5f2b..2b15bfa7cac 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -171,6 +171,11 @@ protected:
 
     void initTtyBuffer(ProgressOption progress);
 
+    /// Should be one of the first, to be destroyed the last,
+    /// since other members can use them.
+    SharedContextHolder shared_context;
+    ContextMutablePtr global_context;
+
     bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
     bool is_multiquery = false;
     bool delayed_interactive = false;
@@ -208,9 +213,6 @@ protected:
     /// Settings specified via command line args
     Settings cmd_settings;
 
-    SharedContextHolder shared_context;
-    ContextMutablePtr global_context;
-
     /// thread status should be destructed before shared context because it relies on process list.
     std::optional<ThreadStatus> thread_status;
 
diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index b51b21307f3..f51a0426199 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -152,16 +152,16 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
     /// If there are references to states in final column, we must hold their ownership
     /// by holding arenas and source.
 
-    auto callback = [&](auto & subcolumn)
+    auto callback = [&](IColumn & subcolumn)
     {
-        if (auto * aggregate_subcolumn = typeid_cast<ColumnAggregateFunction *>(subcolumn.get()))
+        if (auto * aggregate_subcolumn = typeid_cast<ColumnAggregateFunction *>(&subcolumn))
         {
             aggregate_subcolumn->foreign_arenas = concatArenas(column_aggregate_func.foreign_arenas, column_aggregate_func.my_arena);
             aggregate_subcolumn->src = column_aggregate_func.getPtr();
         }
     };
 
-    callback(res);
+    callback(*res);
     res->forEachSubcolumnRecursively(callback);
 
     for (auto * val : data)
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index d34ae640962..63cadbb5241 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -524,11 +524,13 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
     size_t nested_offset = src_concrete.offsetAt(start);
     size_t nested_length = src_concrete.getOffsets()[start + length - 1] - nested_offset;
 
+    Offsets & cur_offsets = getOffsets();
+    /// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED)
+    cur_offsets.reserve(cur_offsets.size() + length);
+
     getData().insertRangeFrom(src_concrete.getData(), nested_offset, nested_length);
 
-    Offsets & cur_offsets = getOffsets();
     const Offsets & src_offsets = src_concrete.getOffsets();
-
     if (start == 0 && cur_offsets.empty())
     {
         cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index 5970802f598..44652fd0c4b 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -151,17 +151,17 @@ public:
 
     ColumnPtr compress() const override;
 
-    void forEachSubcolumn(ColumnCallback callback) override
+    void forEachSubcolumn(ColumnCallback callback) const override
     {
         callback(offsets);
         callback(data);
     }
 
-    void forEachSubcolumnRecursively(ColumnCallback callback) override
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
     {
-        callback(offsets);
+        callback(*offsets);
         offsets->forEachSubcolumnRecursively(callback);
-        callback(data);
+        callback(*data);
         data->forEachSubcolumnRecursively(callback);
     }
 
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index e1f335460fd..0c9245885c4 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -230,14 +230,14 @@ public:
         data->getExtremes(min, max);
     }
 
-    void forEachSubcolumn(ColumnCallback callback) override
+    void forEachSubcolumn(ColumnCallback callback) const override
     {
         callback(data);
     }
 
-    void forEachSubcolumnRecursively(ColumnCallback callback) override
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
     {
-        callback(data);
+        callback(*data);
         data->forEachSubcolumnRecursively(callback);
     }
 
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index 20bc45eb569..e7310e70fcb 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -164,7 +164,7 @@ public:
     size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
     size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
 
-    void forEachSubcolumn(ColumnCallback callback) override
+    void forEachSubcolumn(ColumnCallback callback) const override
     {
         callback(idx.getPositionsPtr());
 
@@ -173,15 +173,15 @@ public:
             callback(dictionary.getColumnUniquePtr());
     }
 
-    void forEachSubcolumnRecursively(ColumnCallback callback) override
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
     {
-        callback(idx.getPositionsPtr());
+        callback(*idx.getPositionsPtr());
         idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
 
         /// Column doesn't own dictionary if it's shared.
         if (!dictionary.isShared())
         {
-            callback(dictionary.getColumnUniquePtr());
+            callback(*dictionary.getColumnUniquePtr());
             dictionary.getColumnUniquePtr()->forEachSubcolumnRecursively(callback);
         }
     }
@@ -278,6 +278,7 @@ public:
 
         const ColumnPtr & getPositions() const { return positions; }
         WrappedPtr & getPositionsPtr() { return positions; }
+        const WrappedPtr & getPositionsPtr() const { return positions; }
         size_t getPositionAt(size_t row) const;
         void insertPosition(UInt64 position);
         void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit);
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 7377707bdb2..0fe6d4c361b 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -273,14 +273,14 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
     max = std::move(map_max_value);
 }
 
-void ColumnMap::forEachSubcolumn(ColumnCallback callback)
+void ColumnMap::forEachSubcolumn(ColumnCallback callback) const
 {
     callback(nested);
 }
 
-void ColumnMap::forEachSubcolumnRecursively(ColumnCallback callback)
+void ColumnMap::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
 {
-    callback(nested);
+    callback(*nested);
     nested->forEachSubcolumnRecursively(callback);
 }
 
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index 1e03633ced7..db918c3db50 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -88,8 +88,8 @@ public:
     size_t byteSizeAt(size_t n) const override;
     size_t allocatedBytes() const override;
     void protect() override;
-    void forEachSubcolumn(ColumnCallback callback) override;
-    void forEachSubcolumnRecursively(ColumnCallback callback) override;
+    void forEachSubcolumn(ColumnCallback callback) const override;
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
     bool structureEquals(const IColumn & rhs) const override;
     double getRatioOfDefaultRows(double sample_ratio) const override;
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 2d15442e583..85bf095a9d1 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -130,17 +130,17 @@ public:
 
     ColumnPtr compress() const override;
 
-    void forEachSubcolumn(ColumnCallback callback) override
+    void forEachSubcolumn(ColumnCallback callback) const override
     {
         callback(nested_column);
         callback(null_map);
     }
 
-    void forEachSubcolumnRecursively(ColumnCallback callback) override
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
     {
-        callback(nested_column);
+        callback(*nested_column);
         nested_column->forEachSubcolumnRecursively(callback);
-        callback(null_map);
+        callback(*null_map);
         null_map->forEachSubcolumnRecursively(callback);
     }
 
diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp
index bf4630137d5..b2adf9e0059 100644
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@@ -664,20 +664,20 @@ size_t ColumnObject::allocatedBytes() const
     return res;
 }
 
-void ColumnObject::forEachSubcolumn(ColumnCallback callback)
+void ColumnObject::forEachSubcolumn(ColumnCallback callback) const
 {
-    for (auto & entry : subcolumns)
-        for (auto & part : entry->data.data)
+    for (const auto & entry : subcolumns)
+        for (const auto & part : entry->data.data)
             callback(part);
 }
 
-void ColumnObject::forEachSubcolumnRecursively(ColumnCallback callback)
+void ColumnObject::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
 {
-    for (auto & entry : subcolumns)
+    for (const auto & entry : subcolumns)
     {
-        for (auto & part : entry->data.data)
+        for (const auto & part : entry->data.data)
         {
-            callback(part);
+            callback(*part);
             part->forEachSubcolumnRecursively(callback);
         }
     }
diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h
index 8fcf3d41fba..aa31cf54413 100644
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@@ -206,8 +206,8 @@ public:
     size_t size() const override;
     size_t byteSize() const override;
     size_t allocatedBytes() const override;
-    void forEachSubcolumn(ColumnCallback callback) override;
-    void forEachSubcolumnRecursively(ColumnCallback callback) override;
+    void forEachSubcolumn(ColumnCallback callback) const override;
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
     void insert(const Field & field) override;
     void insertDefault() override;
     void insertFrom(const IColumn & src, size_t n) override;
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 0c3f8d11adc..0f44d2994fb 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -744,17 +744,17 @@ bool ColumnSparse::structureEquals(const IColumn & rhs) const
     return false;
 }
 
-void ColumnSparse::forEachSubcolumn(ColumnCallback callback)
+void ColumnSparse::forEachSubcolumn(ColumnCallback callback) const
 {
     callback(values);
     callback(offsets);
 }
 
-void ColumnSparse::forEachSubcolumnRecursively(ColumnCallback callback)
+void ColumnSparse::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
 {
-    callback(values);
+    callback(*values);
     values->forEachSubcolumnRecursively(callback);
-    callback(offsets);
+    callback(*offsets);
     offsets->forEachSubcolumnRecursively(callback);
 }
 
diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h
index 5814cd77637..ffe392a83c1 100644
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@@ -139,8 +139,8 @@ public:
 
     ColumnPtr compress() const override;
 
-    void forEachSubcolumn(ColumnCallback callback) override;
-    void forEachSubcolumnRecursively(ColumnCallback callback) override;
+    void forEachSubcolumn(ColumnCallback callback) const override;
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
 
     bool structureEquals(const IColumn & rhs) const override;
 
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index 982951f05b0..f3c344e1bd7 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -124,6 +124,9 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len
     size_t nested_offset = src_concrete.offsetAt(start);
     size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset;
 
+    /// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED)
+    offsets.reserve(offsets.size() + length);
+
     size_t old_chars_size = chars.size();
     chars.resize(old_chars_size + nested_length);
     memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length);
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index d8a43bf510d..8827b7b9850 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -495,17 +495,17 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const
     max = max_tuple;
 }
 
-void ColumnTuple::forEachSubcolumn(ColumnCallback callback)
+void ColumnTuple::forEachSubcolumn(ColumnCallback callback) const
 {
-    for (auto & column : columns)
+    for (const auto & column : columns)
         callback(column);
 }
 
-void ColumnTuple::forEachSubcolumnRecursively(ColumnCallback callback)
+void ColumnTuple::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
 {
-    for (auto & column : columns)
+    for (const auto & column : columns)
     {
-        callback(column);
+        callback(*column);
         column->forEachSubcolumnRecursively(callback);
     }
 }
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index 96395d4edfb..25f6328b3fc 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -96,8 +96,8 @@ public:
     size_t byteSizeAt(size_t n) const override;
     size_t allocatedBytes() const override;
     void protect() override;
-    void forEachSubcolumn(ColumnCallback callback) override;
-    void forEachSubcolumnRecursively(ColumnCallback callback) override;
+    void forEachSubcolumn(ColumnCallback callback) const override;
+    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
     bool structureEquals(const IColumn & rhs) const override;
     bool isCollationSupported() const override;
     ColumnPtr compress() const override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index a1579a46ae0..27faf4bd2ad 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -105,7 +105,13 @@ public:
         return column_holder->allocatedBytes() + reverse_index.allocatedBytes()
             + (nested_null_mask ? nested_null_mask->allocatedBytes() : 0);
     }
-    void forEachSubcolumn(IColumn::ColumnCallback callback) override
+
+    void forEachSubcolumn(IColumn::ColumnCallback callback) const override
+    {
+        callback(column_holder);
+    }
+
+    void forEachSubcolumn(IColumn::MutableColumnCallback callback) override
     {
         callback(column_holder);
         reverse_index.setColumn(getRawColumnPtr());
@@ -113,9 +119,15 @@ public:
             nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
     }
 
-    void forEachSubcolumnRecursively(IColumn::ColumnCallback callback) override
+    void forEachSubcolumnRecursively(IColumn::RecursiveColumnCallback callback) const override
     {
-        callback(column_holder);
+        callback(*column_holder);
+        column_holder->forEachSubcolumnRecursively(callback);
+    }
+
+    void forEachSubcolumnRecursively(IColumn::RecursiveMutableColumnCallback callback) override
+    {
+        callback(*column_holder);
         column_holder->forEachSubcolumnRecursively(callback);
         reverse_index.setColumn(getRawColumnPtr());
         if (is_nullable)
diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp
index 2158adb86a8..46f07cffa9e 100644
--- a/src/Columns/IColumn.cpp
+++ b/src/Columns/IColumn.cpp
@@ -20,12 +20,10 @@ String IColumn::dumpStructure() const
     WriteBufferFromOwnString res;
     res << getFamilyName() << "(size = " << size();
 
-    ColumnCallback callback = [&](ColumnPtr & subcolumn)
+    forEachSubcolumn([&](const auto & subcolumn)
     {
         res << ", " << subcolumn->dumpStructure();
-    };
-
-    const_cast<IColumn*>(this)->forEachSubcolumn(callback);
+    });
 
     res << ")";
     return res.str();
@@ -64,6 +62,22 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
     return res;
 }
 
+void IColumn::forEachSubcolumn(MutableColumnCallback callback)
+{
+    std::as_const(*this).forEachSubcolumn([&callback](const WrappedPtr & subcolumn)
+    {
+        callback(const_cast<WrappedPtr &>(subcolumn));
+    });
+}
+
+void IColumn::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
+{
+    std::as_const(*this).forEachSubcolumnRecursively([&callback](const IColumn & subcolumn)
+    {
+        callback(const_cast<IColumn &>(subcolumn));
+    });
+}
+
 bool isColumnNullable(const IColumn & column)
 {
     return checkColumn<ColumnNullable>(column);
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 461e41e3eec..94e87508619 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -411,11 +411,22 @@ public:
 
     /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
     /// Shallow: doesn't do recursive calls; don't do call for itself.
-    using ColumnCallback = std::function<void(WrappedPtr&)>;
-    virtual void forEachSubcolumn(ColumnCallback) {}
+
+    using ColumnCallback = std::function<void(const WrappedPtr &)>;
+    virtual void forEachSubcolumn(ColumnCallback) const {}
+
+    using MutableColumnCallback = std::function<void(WrappedPtr &)>;
+    virtual void forEachSubcolumn(MutableColumnCallback callback);
 
     /// Similar to forEachSubcolumn but it also do recursive calls.
-    virtual void forEachSubcolumnRecursively(ColumnCallback) {}
+    /// In recursive calls it's prohibited to replace pointers
+    /// to subcolumns, so we use another callback function.
+
+    using RecursiveColumnCallback = std::function<void(const IColumn &)>;
+    virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const {}
+
+    using RecursiveMutableColumnCallback = std::function<void(IColumn &)>;
+    virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback);
 
     /// Columns have equal structure.
     /// If true - you can use "compareAt", "insertFrom", etc. methods.
diff --git a/src/Columns/tests/gtest_column_dump_structure.cpp b/src/Columns/tests/gtest_column_dump_structure.cpp
new file mode 100644
index 00000000000..e00c77798c8
--- /dev/null
+++ b/src/Columns/tests/gtest_column_dump_structure.cpp
@@ -0,0 +1,27 @@
+#include <Columns/ColumnLowCardinality.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <gtest/gtest.h>
+#include <thread>
+
+using namespace DB;
+
+TEST(IColumn, dumpStructure)
+{
+    auto type_lc = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
+    ColumnPtr column_lc = type_lc->createColumn();
+    String expected_structure = "ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1)))";
+
+    std::vector<std::thread> threads;
+    for (size_t i = 0; i < 6; ++i)
+    {
+        threads.emplace_back([&]
+        {
+            for (size_t j = 0; j < 10000; ++j)
+                ASSERT_EQ(column_lc->dumpStructure(), expected_structure);
+        });
+    }
+
+    for (auto & t : threads)
+        t.join();
+}
diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 17d53acd8f7..5772dff6bca 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -141,7 +141,7 @@ public:
     /// Get piece of memory, without alignment.
     char * alloc(size_t size)
     {
-        if (unlikely(head->pos + size > head->end))
+        if (unlikely(static_cast<std::ptrdiff_t>(size) > head->end - head->pos))
             addMemoryChunk(size);
 
         char * res = head->pos;
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 6d9fd686765..6878533c2fd 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -5,6 +5,7 @@
 #define APPLY_FOR_METRICS(M) \
     M(Query, "Number of executing queries") \
     M(Merge, "Number of executing background merges") \
+    M(Move, "Number of currently executing moves") \
     M(PartMutation, "Number of mutations (ALTER DELETE/UPDATE)") \
     M(ReplicatedFetch, "Number of data parts being fetched from replica") \
     M(ReplicatedSend, "Number of data parts being sent to replicas") \
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 2bc5d70421a..e312a84d0f5 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -258,7 +258,7 @@
     M(250, NOT_ENOUGH_BLOCK_NUMBERS) \
     M(251, NO_SUCH_REPLICA) \
     M(252, TOO_MANY_PARTS) \
-    M(253, REPLICA_IS_ALREADY_EXIST) \
+    M(253, REPLICA_ALREADY_EXISTS) \
     M(254, NO_ACTIVE_REPLICAS) \
     M(255, TOO_MANY_RETRIES_TO_FETCH_PARTS) \
     M(256, PARTITION_ALREADY_EXISTS) \
diff --git a/src/Common/EventRateMeter.h b/src/Common/EventRateMeter.h
index f70258faa9e..3a21a80ce8b 100644
--- a/src/Common/EventRateMeter.h
+++ b/src/Common/EventRateMeter.h
@@ -27,6 +27,14 @@ public:
     /// NOTE: Adding events into distant past (further than `period`) must be avoided.
     void add(double now, double count)
     {
+        // Remove data for initial heating stage that can present at the beginning of a query.
+        // Otherwise it leads to wrong gradual increase of average value, turning algorithm into not very reactive.
+        if (count != 0.0 && ++data_points < 5)
+        {
+            start = events.time;
+            events = ExponentiallySmoothedAverage();
+        }
+
         if (now - period <= start) // precise counting mode
             events = ExponentiallySmoothedAverage(events.value + count, now);
         else // exponential smoothing mode
@@ -51,6 +59,7 @@ public:
     {
         start = now;
         events = ExponentiallySmoothedAverage();
+        data_points = 0;
     }
 
 private:
@@ -58,6 +67,7 @@ private:
     const double half_decay_time;
     double start; // Instant in past without events before it; when measurement started or reset
     ExponentiallySmoothedAverage events; // Estimated number of events in the last `period`
+    size_t data_points = 0;
 };
 
 }
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index 399ccecf000..35231354651 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -15,6 +15,7 @@
 #include <Common/formatReadable.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/ErrorCodes.h>
+#include <Common/SensitiveDataMasker.h>
 #include <Common/LockMemoryExceptionInThread.h>
 #include <filesystem>
 
@@ -63,11 +64,18 @@ void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool
     ErrorCodes::increment(code, remote, msg, trace);
 }
 
-Exception::Exception(const std::string & msg, int code, bool remote_)
-    : Poco::Exception(msg, code)
+Exception::MessageMasked::MessageMasked(const std::string & msg_)
+    : msg(msg_)
+{
+    if (auto * masker = SensitiveDataMasker::getInstance())
+        masker->wipeSensitiveData(msg);
+}
+
+Exception::Exception(const MessageMasked & msg_masked, int code, bool remote_)
+    : Poco::Exception(msg_masked.msg, code)
     , remote(remote_)
 {
-    handle_error_code(msg, code, remote, getStackFramePointers());
+    handle_error_code(msg_masked.msg, code, remote, getStackFramePointers());
 }
 
 Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index 62121cc22e1..0f459a887f1 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -27,7 +27,19 @@ public:
     using FramePointers = std::vector<void *>;
 
     Exception() = default;
-    Exception(const std::string & msg, int code, bool remote_ = false);
+
+    // used to remove the sensitive information from exceptions if query_masking_rules is configured
+    struct MessageMasked
+    {
+        std::string msg;
+        MessageMasked(const std::string & msg_);
+    };
+
+    Exception(const MessageMasked & msg_masked, int code, bool remote_);
+
+    // delegating constructor to mask sensitive information from the message
+    Exception(const std::string & msg, int code, bool remote_ = false): Exception(MessageMasked(msg), code, remote_)
+    {}
 
     Exception(int code, const std::string & message)
         : Exception(message, code)
@@ -54,12 +66,17 @@ public:
     template <typename... Args>
     void addMessage(fmt::format_string<Args...> format, Args &&... args)
     {
-        extendedMessage(fmt::format(format, std::forward<Args>(args)...));
+        addMessage(fmt::format(format, std::forward<Args>(args)...));
     }
 
     void addMessage(const std::string& message)
     {
-        extendedMessage(message);
+        addMessage(MessageMasked(message));
+    }
+
+    void addMessage(const MessageMasked & msg_masked)
+    {
+        extendedMessage(msg_masked.msg);
     }
 
     /// Used to distinguish local exceptions from the one that was received from remote node.
diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp
index 2c404c33177..db7b4850204 100644
--- a/src/Common/FieldVisitorSum.cpp
+++ b/src/Common/FieldVisitorSum.cpp
@@ -21,7 +21,12 @@ bool FieldVisitorSum::operator() (UInt64 & x) const
 
 bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.get<Float64>(); return x != 0; }
 
-bool FieldVisitorSum::operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); }
+bool FieldVisitorSum::operator() (Null &) const
+{
+    /// Do not add anything
+    return false;
+}
+
 bool FieldVisitorSum::operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); }
 bool FieldVisitorSum::operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); }
 bool FieldVisitorSum::operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); }
diff --git a/src/Common/HashTable/HashSet.h b/src/Common/HashTable/HashSet.h
index 279ab167347..be4be078ee8 100644
--- a/src/Common/HashTable/HashSet.h
+++ b/src/Common/HashTable/HashSet.h
@@ -3,6 +3,7 @@
 #include <Common/HashTable/Hash.h>
 #include <Common/HashTable/HashTable.h>
 #include <Common/HashTable/HashTableAllocator.h>
+#include <Common/HashTable/TwoLevelHashTable.h>
 
 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
@@ -10,6 +11,14 @@
 #include <IO/ReadHelpers.h>
 #include <IO/VarInt.h>
 
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+}
+
 /** NOTE HashSet could only be used for memmoveable (position independent) types.
   * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
   * Also, key must be of type, that zero bytes is compared equals to zero key.
@@ -64,6 +73,47 @@ public:
 };
 
 
+template <
+    typename Key,
+    typename TCell, /// Supposed to have no state (HashTableNoState)
+    typename Hash = DefaultHash<Key>,
+    typename Grower = TwoLevelHashTableGrower<>,
+    typename Allocator = HashTableAllocator>
+class TwoLevelHashSetTable
+    : public TwoLevelHashTable<Key, TCell, Hash, Grower, Allocator, HashSetTable<Key, TCell, Hash, Grower, Allocator>>
+{
+public:
+    using Self = TwoLevelHashSetTable;
+    using Base = TwoLevelHashTable<Key, TCell, Hash, Grower, Allocator, HashSetTable<Key, TCell, Hash, Grower, Allocator>>;
+
+    using Base::Base;
+
+    /// Writes its content in a way that it will be correctly read by HashSetTable.
+    /// Used by uniqExact to preserve backward compatibility.
+    void writeAsSingleLevel(DB::WriteBuffer & wb) const
+    {
+        DB::writeVarUInt(this->size(), wb);
+
+        bool zero_written = false;
+        for (size_t i = 0; i < Base::NUM_BUCKETS; ++i)
+        {
+            if (this->impls[i].hasZero())
+            {
+                if (zero_written)
+                    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "No more than one zero value expected");
+                this->impls[i].zeroValue()->write(wb);
+                zero_written = true;
+            }
+        }
+
+        static constexpr HashTableNoState state;
+        for (auto ptr = this->begin(); ptr != this->end(); ++ptr)
+            if (!ptr.getPtr()->isZero(state))
+                ptr.getPtr()->write(wb);
+    }
+};
+
+
 template <typename Key, typename Hash, typename TState = HashTableNoState>
 struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState>
 {
@@ -89,6 +139,13 @@ template <
     typename Allocator = HashTableAllocator>
 using HashSet = HashSetTable<Key, HashTableCell<Key, Hash>, Hash, Grower, Allocator>;
 
+template <
+    typename Key,
+    typename Hash = DefaultHash<Key>,
+    typename Grower = TwoLevelHashTableGrower<>,
+    typename Allocator = HashTableAllocator>
+using TwoLevelHashSet = TwoLevelHashSetTable<Key, HashTableCell<Key, Hash>, Hash, Grower, Allocator>;
+
 template <typename Key, typename Hash, size_t initial_size_degree>
 using HashSetWithStackMemory = HashSet<
     Key,
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 7aa375cfa79..837647cb8bd 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -432,20 +432,12 @@ struct AllocatorBufferDeleter<true, Allocator, Cell>
 
 
 // The HashTable
-template
-<
-    typename Key,
-    typename Cell,
-    typename Hash,
-    typename Grower,
-    typename Allocator
->
-class HashTable :
-    private boost::noncopyable,
-    protected Hash,
-    protected Allocator,
-    protected Cell::State,
-    protected ZeroValueStorage<Cell::need_zero_value_storage, Cell>     /// empty base optimization
+template <typename Key, typename Cell, typename Hash, typename Grower, typename Allocator>
+class HashTable : private boost::noncopyable,
+                  protected Hash,
+                  protected Allocator,
+                  protected Cell::State,
+                  public ZeroValueStorage<Cell::need_zero_value_storage, Cell> /// empty base optimization
 {
 public:
     // If we use an allocator with inline memory, check that the initial
diff --git a/src/Common/HashTable/TwoLevelHashTable.h b/src/Common/HashTable/TwoLevelHashTable.h
index 5acc8b19195..bd4c4c366f2 100644
--- a/src/Common/HashTable/TwoLevelHashTable.h
+++ b/src/Common/HashTable/TwoLevelHashTable.h
@@ -159,14 +159,16 @@ public:
 
     class const_iterator /// NOLINT
     {
-        Self * container{};
+        const Self * container{};
         size_t bucket{};
         typename Impl::const_iterator current_it{};
 
         friend class TwoLevelHashTable;
 
-        const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
-            : container(container_), bucket(bucket_), current_it(current_it_) {}
+        const_iterator(const Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
+            : container(container_), bucket(bucket_), current_it(current_it_)
+        {
+        }
 
     public:
         const_iterator() = default;
diff --git a/src/Common/KnownObjectNames.cpp b/src/Common/KnownObjectNames.cpp
new file mode 100644
index 00000000000..0862c52d892
--- /dev/null
+++ b/src/Common/KnownObjectNames.cpp
@@ -0,0 +1,48 @@
+#include <Common/KnownObjectNames.h>
+#include <Poco/String.h>
+
+
+namespace DB
+{
+
+bool KnownObjectNames::exists(const String & name) const
+{
+    std::lock_guard lock{mutex};
+    if (names.contains(name))
+        return true;
+
+    if (!case_insensitive_names.empty())
+    {
+        String lower_name = Poco::toLower(name);
+        if (case_insensitive_names.contains(lower_name))
+            return true;
+    }
+
+    return false;
+}
+
+
+void KnownObjectNames::add(const String & name, bool case_insensitive)
+{
+    std::lock_guard lock{mutex};
+    if (case_insensitive)
+        case_insensitive_names.emplace(Poco::toLower(name));
+    else
+        names.emplace(name);
+}
+
+
+KnownTableFunctionNames & KnownTableFunctionNames::instance()
+{
+    static KnownTableFunctionNames the_instance;
+    return the_instance;
+}
+
+
+KnownFormatNames & KnownFormatNames::instance()
+{
+    static KnownFormatNames the_instance;
+    return the_instance;
+}
+
+}
diff --git a/src/Common/KnownObjectNames.h b/src/Common/KnownObjectNames.h
new file mode 100644
index 00000000000..f003af69904
--- /dev/null
+++ b/src/Common/KnownObjectNames.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <base/types.h>
+#include <mutex>
+#include <unordered_set>
+
+
+namespace DB
+{
+
+class KnownObjectNames
+{
+public:
+    bool exists(const String & name) const;
+    void add(const String & name, bool case_insensitive = false);
+
+private:
+    mutable std::mutex mutex;
+    std::unordered_set<String> names;
+    std::unordered_set<String> case_insensitive_names;
+};
+
+
+class KnownTableFunctionNames : public KnownObjectNames
+{
+public:
+    static KnownTableFunctionNames & instance();
+};
+
+
+class KnownFormatNames : public KnownObjectNames
+{
+public:
+    static KnownFormatNames & instance();
+};
+
+}
diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index b530410ec63..f556b255fc2 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -220,7 +220,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
     Int64 limit_to_check = current_hard_limit;
 
 #if USE_JEMALLOC
-    if (level == VariableContext::Global)
+    if (level == VariableContext::Global && allow_use_jemalloc_memory.load(std::memory_order_relaxed))
     {
         /// Jemalloc arenas may keep some extra memory.
         /// This memory was substucted from RSS to decrease memory drift.
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 2d898935dcf..f6113d31423 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -55,6 +55,7 @@ private:
     std::atomic<Int64> soft_limit {0};
     std::atomic<Int64> hard_limit {0};
     std::atomic<Int64> profiler_limit {0};
+    std::atomic_bool allow_use_jemalloc_memory {true};
 
     static std::atomic<Int64> free_memory_in_allocator_arenas;
 
@@ -125,6 +126,10 @@ public:
     {
         return soft_limit.load(std::memory_order_relaxed);
     }
+    void setAllowUseJemallocMemory(bool value)
+    {
+        allow_use_jemalloc_memory.store(value, std::memory_order_relaxed);
+    }
 
     /** Set limit if it was not set.
       * Otherwise, set limit to new value, if new value is greater than previous limit.
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index e30a6bb6aaf..90d24ec027e 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -62,7 +62,7 @@
     M(NetworkSendElapsedMicroseconds, "Total time spent waiting for data to send to network or sending data to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries..") \
     M(NetworkReceiveBytes, "Total number of bytes received from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \
     M(NetworkSendBytes, "Total number of bytes send to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \
-    M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform the 'max_network_bandwidth' setting.") \
+    M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_network_bandwidth' and other throttling settings.") \
     \
     M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \
     \
diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp
index b049edcdcf7..071d4bb2f48 100644
--- a/src/Common/ProgressIndication.cpp
+++ b/src/Common/ProgressIndication.cpp
@@ -123,13 +123,16 @@ void ProgressIndication::writeFinalProgress()
     if (progress.read_rows < 1000)
         return;
 
-    std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
-                << formatReadableSizeWithDecimalSuffix(progress.read_bytes);
+    UInt64 processed_rows = progress.read_rows + progress.written_rows;
+    UInt64 processed_bytes = progress.read_bytes + progress.written_bytes;
+
+    std::cout << "Processed " << formatReadableQuantity(processed_rows) << " rows, "
+                << formatReadableSizeWithDecimalSuffix(processed_bytes);
 
     UInt64 elapsed_ns = getElapsedNanoseconds();
     if (elapsed_ns)
-        std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
-                    << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
+        std::cout << " (" << formatReadableQuantity(processed_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
+                    << formatReadableSizeWithDecimalSuffix(processed_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
     else
         std::cout << ". ";
 }
@@ -164,16 +167,18 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
 
     size_t prefix_size = message.count();
 
+    UInt64 processed_rows = progress.read_rows + progress.written_rows;
+    UInt64 processed_bytes = progress.read_bytes + progress.written_bytes;
     message << indicator << " Progress: ";
     message
-        << formatReadableQuantity(progress.read_rows) << " rows, "
-        << formatReadableSizeWithDecimalSuffix(progress.read_bytes);
+        << formatReadableQuantity(processed_rows) << " rows, "
+        << formatReadableSizeWithDecimalSuffix(processed_bytes);
 
     UInt64 elapsed_ns = getElapsedNanoseconds();
     if (elapsed_ns)
         message << " ("
-                << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
-                << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.) ";
+                << formatReadableQuantity(processed_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
+                << formatReadableSizeWithDecimalSuffix(processed_bytes * 1000000000.0 / elapsed_ns) << "/s.) ";
     else
         message << ". ";
 
diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h
index 294b7c9a493..717de5debb9 100644
--- a/src/Common/ProgressIndication.h
+++ b/src/Common/ProgressIndication.h
@@ -90,7 +90,7 @@ private:
 
     bool write_progress_on_update = false;
 
-    EventRateMeter cpu_usage_meter{static_cast<double>(clock_gettime_ns()), 3'000'000'000 /*ns*/}; // average cpu utilization last 3 second
+    EventRateMeter cpu_usage_meter{static_cast<double>(clock_gettime_ns()), 2'000'000'000 /*ns*/}; // average cpu utilization last 2 second
     HostToThreadTimesMap thread_data;
     /// In case of all of the above:
     /// - clickhouse-local
diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp
index 15ae6961513..0c2e71a71f3 100644
--- a/src/Common/SensitiveDataMasker.cpp
+++ b/src/Common/SensitiveDataMasker.cpp
@@ -13,12 +13,19 @@
 
 #include <Common/Exception.h>
 #include <Common/StringUtils/StringUtils.h>
+#include <Common/ProfileEvents.h>
 
 #ifndef NDEBUG
 #    include <iostream>
 #endif
 
 
+namespace ProfileEvents
+{
+    extern const Event QueryMaskingRulesMatch;
+}
+
+
 namespace DB
 {
 namespace ErrorCodes
@@ -165,6 +172,10 @@ size_t SensitiveDataMasker::wipeSensitiveData(std::string & data) const
     size_t matches = 0;
     for (const auto & rule : all_masking_rules)
         matches += rule->apply(data);
+
+    if (matches)
+        ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
+
     return matches;
 }
 
@@ -184,4 +195,18 @@ size_t SensitiveDataMasker::rulesCount() const
     return all_masking_rules.size();
 }
 
+
+std::string wipeSensitiveDataAndCutToLength(const std::string & str, size_t max_length)
+{
+    std::string res = str;
+
+    if (auto * masker = SensitiveDataMasker::getInstance())
+        masker->wipeSensitiveData(res);
+
+    if (max_length && (res.length() > max_length))
+        res.resize(max_length);
+
+    return res;
+}
+
 }
diff --git a/src/Common/SensitiveDataMasker.h b/src/Common/SensitiveDataMasker.h
index adb6f5d51e1..de5cc125dcc 100644
--- a/src/Common/SensitiveDataMasker.h
+++ b/src/Common/SensitiveDataMasker.h
@@ -69,4 +69,8 @@ public:
     size_t rulesCount() const;
 };
 
+/// Wipes sensitive data and cuts to a specified maximum length in one function call.
+/// If the maximum length is zero then the function doesn't cut to the maximum length.
+std::string wipeSensitiveDataAndCutToLength(const std::string & str, size_t max_length);
+
 }
diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h
index 281a65ca36a..d931b871550 100644
--- a/src/Common/SipHash.h
+++ b/src/Common/SipHash.h
@@ -189,6 +189,13 @@ public:
         finalize();
         return v0 ^ v1 ^ v2 ^ v3;
     }
+
+    UInt128 get128()
+    {
+        UInt128 res;
+        get128(res);
+        return res;
+    }
 };
 
 
@@ -208,9 +215,7 @@ inline UInt128 sipHash128(const char * data, const size_t size)
 {
     SipHash hash;
     hash.update(data, size);
-    UInt128 res;
-    hash.get128(res);
-    return res;
+    return hash.get128();
 }
 
 inline UInt64 sipHash64(const char * data, const size_t size)
diff --git a/src/Common/TaskStatsInfoGetter.cpp b/src/Common/TaskStatsInfoGetter.cpp
index b81da2f3fe2..0398b2b579c 100644
--- a/src/Common/TaskStatsInfoGetter.cpp
+++ b/src/Common/TaskStatsInfoGetter.cpp
@@ -8,6 +8,7 @@
 
 #include "hasLinuxCapability.h"
 #include <base/unaligned.h>
+#include <Common/logger_useful.h>
 
 #include <cerrno>
 #include <cstdio>
@@ -205,6 +206,20 @@ bool checkPermissionsImpl()
     {
         TaskStatsInfoGetter();
     }
+    catch (const Exception & e)
+    {
+        if (e.code() == ErrorCodes::NETLINK_ERROR)
+        {
+            /// This error happens all the time when running inside Docker - consider it ok,
+            /// don't create noise with this error.
+            LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "{}", getCurrentExceptionMessage(false));
+        }
+        else
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+        return false;
+    }
     catch (...)
     {
         tryLogCurrentException(__PRETTY_FUNCTION__);
diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h
index 5895ef3208a..48746dd5f2a 100644
--- a/src/Common/ThreadProfileEvents.h
+++ b/src/Common/ThreadProfileEvents.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <base/types.h>
+#include <base/getThreadId.h>
 #include <Common/ProfileEvents.h>
 #include <sys/time.h>
 #include <sys/resource.h>
@@ -47,6 +48,8 @@ struct RUsageCounters
     UInt64 soft_page_faults = 0;
     UInt64 hard_page_faults = 0;
 
+    UInt64 thread_id = 0;
+
     RUsageCounters() = default;
     RUsageCounters(const ::rusage & rusage_, UInt64 real_time_)
     {
@@ -61,6 +64,8 @@ struct RUsageCounters
 
         soft_page_faults = static_cast<UInt64>(rusage.ru_minflt);
         hard_page_faults = static_cast<UInt64>(rusage.ru_majflt);
+
+        thread_id = getThreadId();
     }
 
     static RUsageCounters current()
@@ -78,6 +83,12 @@ struct RUsageCounters
 
     static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events)
     {
+        chassert(prev.thread_id == curr.thread_id);
+        /// LONG_MAX is ~106751 days
+        chassert(curr.real_time - prev.real_time < LONG_MAX);
+        chassert(curr.user_time - prev.user_time < LONG_MAX);
+        chassert(curr.sys_time - prev.sys_time < LONG_MAX);
+
         profile_events.increment(ProfileEvents::RealTimeMicroseconds,   (curr.real_time - prev.real_time) / 1000U);
         profile_events.increment(ProfileEvents::UserTimeMicroseconds,   (curr.user_time - prev.user_time) / 1000U);
         profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U);
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 67450d8c779..6ec46e3e9dc 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -179,8 +179,8 @@ protected:
     /// Is used to send logs from logs_queue to client in case of fatal errors.
     std::function<void()> fatal_error_callback;
 
-    /// It is used to avoid enabling the query profiler when you have multiple ThreadStatus in the same thread
-    bool query_profiler_enabled = true;
+    /// See setInternalThread()
+    bool internal_thread = false;
 
     /// Requires access to query_id.
     friend class MemoryTrackerThreadSwitcher;
@@ -225,11 +225,21 @@ public:
         return global_context.lock();
     }
 
-    void disableProfiling()
-    {
-        assert(!query_profiler_real && !query_profiler_cpu);
-        query_profiler_enabled = false;
-    }
+    /// "Internal" ThreadStatus is used for materialized views for separate
+    /// tracking into system.query_views_log
+    ///
+    /// You can have multiple internal threads, but only one non-internal with
+    /// the same thread_id.
+    ///
+    /// "Internal" thread:
+    /// - cannot have query profiler
+    ///   since the running (main query) thread should already have one
+    /// - should not try to obtain latest counter on detach
+    ///   because detaching of such threads will be done from a different
+    ///   thread_id, and some counters are not available (i.e. getrusage()),
+    ///   but anyway they are accounted correctly in the main ThreadStatus of a
+    ///   query.
+    void setInternalThread();
 
     /// Starts new query and create new thread group for it, current thread becomes master thread of the query
     void initializeQuery();
diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp
index b38777efc03..79625d4894c 100644
--- a/src/Common/Throttler.cpp
+++ b/src/Common/Throttler.cpp
@@ -20,8 +20,6 @@ namespace ErrorCodes
 /// Just 10^9.
 static constexpr auto NS = 1000000000UL;
 
-static const size_t default_burst_seconds = 1;
-
 Throttler::Throttler(size_t max_speed_, const std::shared_ptr<Throttler> & parent_)
     : max_speed(max_speed_)
     , max_burst(max_speed_ * default_burst_seconds)
diff --git a/src/Common/Throttler.h b/src/Common/Throttler.h
index 9b6eff13506..708e9b939fa 100644
--- a/src/Common/Throttler.h
+++ b/src/Common/Throttler.h
@@ -17,6 +17,8 @@ namespace DB
 class Throttler
 {
 public:
+    static const size_t default_burst_seconds = 1;
+
     Throttler(size_t max_speed_, size_t max_burst_, const std::shared_ptr<Throttler> & parent_ = nullptr)
         : max_speed(max_speed_), max_burst(max_burst_), limit_exceeded_exception_message(""), tokens(max_burst), parent(parent_) {}
 
diff --git a/src/Common/examples/small_table.cpp b/src/Common/examples/small_table.cpp
index ca38516d09a..0e8a419e13d 100644
--- a/src/Common/examples/small_table.cpp
+++ b/src/Common/examples/small_table.cpp
@@ -27,7 +27,7 @@ int main(int, char **)
             std::cerr << x.getValue() << std::endl;
 
         DB::WriteBufferFromOwnString wb;
-        cont.writeText(wb);
+        cont.write(wb);
 
         std::cerr << "dump: " << wb.str() << std::endl;
     }
diff --git a/src/Common/formatIPv6.cpp b/src/Common/formatIPv6.cpp
index bc1878c0bc6..9c71debaa1e 100644
--- a/src/Common/formatIPv6.cpp
+++ b/src/Common/formatIPv6.cpp
@@ -146,7 +146,8 @@ void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_byte
             uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
             memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
             // Due to historical reasons formatIPv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
-            std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
+            if constexpr (std::endian::native == std::endian::little)
+                std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
 
             formatIPv4(ipv4_buffer, dst, std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), "0");
             // formatIPv4 has already added a null-terminator for us.
diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h
index 31d5e83760a..14093594cff 100644
--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@@ -56,11 +56,8 @@ inline bool parseIPv4(const char * src, unsigned char * dst)
     }
     if (*(src - 1) != '\0')
         return false;
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    reverseMemcpy(dst, &result, sizeof(result));
-#else
+
     memcpy(dst, &result, sizeof(result));
-#endif
     return true;
 }
 
@@ -138,7 +135,9 @@ inline bool parseIPv6(const char * src, unsigned char * dst)
         {
             if (!parseIPv4(curtok, tp))
                 return clear_dst();
-            std::reverse(tp, tp + IPV4_BINARY_LENGTH);
+
+            if constexpr (std::endian::native == std::endian::little)
+                std::reverse(tp, tp + IPV4_BINARY_LENGTH);
 
             tp += IPV4_BINARY_LENGTH;
             saw_xdigit = false;
@@ -207,7 +206,11 @@ inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail
     const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
     for (size_t octet = 0; octet < limit; ++octet)
     {
-        const uint8_t value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
+        uint8_t value = 0;
+        if constexpr (std::endian::native == std::endian::little)
+            value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
+        else
+            value = static_cast<uint8_t>(src[octet]);
         const auto * rep = one_byte_to_string_lookup_table[value];
         const uint8_t len = rep[0];
         const char* str = rep + 1;
diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp
index fd0b2495fde..0221a682577 100644
--- a/src/Common/tests/gtest_hash_table.cpp
+++ b/src/Common/tests/gtest_hash_table.cpp
@@ -15,6 +15,17 @@
 
 using namespace DB;
 
+namespace
+{
+std::vector<UInt64> getVectorWithNumbersUpToN(size_t n)
+{
+    std::vector<UInt64> res(n);
+    std::iota(res.begin(), res.end(), 0);
+    return res;
+}
+
+}
+
 
 /// To test dump functionality without using other hashes that can change
 template <typename T>
@@ -371,3 +382,48 @@ TEST(HashTable, Resize)
         ASSERT_EQ(actual, expected);
     }
 }
+
+
+using HashSetContent = std::vector<UInt64>;
+
+class TwoLevelHashSetFixture : public ::testing::TestWithParam<HashSetContent>
+{
+};
+
+
+TEST_P(TwoLevelHashSetFixture, WriteAsSingleLevel)
+{
+    using Key = UInt64;
+
+    {
+        const auto & hash_set_content = GetParam();
+
+        TwoLevelHashSet<Key, HashCRC32<Key>> two_level;
+        for (const auto & elem : hash_set_content)
+            two_level.insert(elem);
+
+        WriteBufferFromOwnString wb;
+        two_level.writeAsSingleLevel(wb);
+
+        ReadBufferFromString rb(wb.str());
+        HashSet<Key, HashCRC32<Key>> single_level;
+        single_level.read(rb);
+
+        EXPECT_EQ(single_level.size(), hash_set_content.size());
+        for (const auto & elem : hash_set_content)
+            EXPECT_NE(single_level.find(elem), nullptr);
+    }
+}
+
+
+INSTANTIATE_TEST_SUITE_P(
+    TwoLevelHashSetTests,
+    TwoLevelHashSetFixture,
+    ::testing::Values(
+        HashSetContent{},
+        getVectorWithNumbersUpToN(1),
+        getVectorWithNumbersUpToN(100),
+        getVectorWithNumbersUpToN(1000),
+        getVectorWithNumbersUpToN(10000),
+        getVectorWithNumbersUpToN(100000),
+        getVectorWithNumbersUpToN(1000000)));
diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp
index 8abc16ebb2a..47be6a9d328 100644
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@@ -47,8 +47,8 @@ bool CachedCompressedReadBuffer::nextImpl()
 
         auto cell = std::make_shared<UncompressedCacheCell>();
 
-        size_t size_decompressed;
-        size_t size_compressed_without_checksum;
+        size_t size_decompressed = 0;
+        size_t size_compressed_without_checksum = 0;
         cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
 
         if (cell->compressed_size)
diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp
index 7c22dec3777..31800b6b332 100644
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@@ -135,6 +135,9 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
     if (source_size < 2)
         throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
 
+    if (uncompressed_size == 0)
+        return;
+
     UInt8 bytes_size = source[0];
 
     if (bytes_size == 0)
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index 08b8668a3ab..b3a01f0e9bf 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
 }
 
 
-const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif";
+const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld";
 
 KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
     : server_id(NOT_EXIST)
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 82123dc8218..9b4e6a3cf10 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -142,6 +142,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr log_info_command = std::make_shared<LogInfoCommand>(keeper_dispatcher);
         factory.registerCommand(log_info_command);
 
+        FourLetterCommandPtr request_leader_command = std::make_shared<RequestLeaderCommand>(keeper_dispatcher);
+        factory.registerCommand(request_leader_command);
+
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
     }
@@ -507,4 +510,9 @@ String LogInfoCommand::run()
     return ret.str();
 }
 
+String RequestLeaderCommand::run()
+{
+    return keeper_dispatcher.requestLeader() ? "Sent leadership request to leader." : "Failed to send leadership request to leader.";
+}
+
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index a8801474bb0..8a8aacf7a3a 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -364,4 +364,17 @@ struct LogInfoCommand : public IFourLetterCommand
     ~LogInfoCommand() override = default;
 };
 
+/// Request to be leader.
+struct RequestLeaderCommand : public IFourLetterCommand
+{
+    explicit RequestLeaderCommand(KeeperDispatcher & keeper_dispatcher_)
+        : IFourLetterCommand(keeper_dispatcher_)
+    {
+    }
+
+    String name() override { return "rqld"; }
+    String run() override;
+    ~RequestLeaderCommand() override = default;
+};
+
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index 84345ca1ff5..632e5e65e5f 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -215,6 +215,12 @@ public:
     {
         return server->getKeeperLogInfo();
     }
+
+    /// Request to be leader.
+    bool requestLeader()
+    {
+        return server->requestLeader();
+    }
 };
 
 }
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 487d0dc4cc3..15470115998 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -932,4 +932,9 @@ KeeperLogInfo KeeperServer::getKeeperLogInfo()
     return log_info;
 }
 
+bool KeeperServer::requestLeader()
+{
+    return isLeader() || raft_instance->request_leadership();
+}
+
 }
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index 192c8f470b1..feadf3bb7ce 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -135,6 +135,8 @@ public:
     uint64_t createSnapshot();
 
     KeeperLogInfo getKeeperLogInfo();
+
+    bool requestLeader();
 };
 
 }
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index fe4050eb685..a9b57b26c49 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -194,6 +194,9 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
         // write only the root system path because of digest
         if (Coordination::matchPath(path.toView(), keeper_system_path) == Coordination::PathMatchResult::IS_CHILD)
         {
+            if (counter == snapshot.snapshot_container_size - 1)
+                break;
+
             ++it;
             continue;
         }
diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index 2e19d496407..75acc7ecb8b 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -65,7 +65,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
         auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config);
 
         auto endpoint = config.getString(config_prefix + ".endpoint");
-        auto new_uri = S3::URI{Poco::URI(endpoint)};
+        auto new_uri = S3::URI{endpoint};
 
         {
             std::lock_guard client_lock{snapshot_s3_client_mutex};
@@ -93,7 +93,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
             auth_settings.region,
             RemoteHostFilter(), s3_max_redirects,
             enable_s3_requests_logging,
-            /* for_disk_s3 = */ false);
+            /* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {});
 
         client_configuration.endpointOverride = new_uri.endpoint;
 
@@ -135,8 +135,8 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
         if (s3_client == nullptr)
             return;
 
-        S3Settings::ReadWriteSettings read_write_settings;
-        read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000;
+        S3Settings::RequestSettings request_settings_1;
+        request_settings_1.upload_part_size_multiply_parts_count_threshold = 10000;
 
         const auto create_writer = [&](const auto & key)
         {
@@ -145,7 +145,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
                 s3_client->client,
                 s3_client->uri.bucket,
                 key,
-                read_write_settings
+                request_settings_1
             };
         };
 
@@ -194,13 +194,15 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
         lock_writer.finalize();
 
         // We read back the written UUID, if it's the same we can upload the file
+        S3Settings::RequestSettings request_settings_2;
+        request_settings_2.max_single_read_retries = 1;
         ReadBufferFromS3 lock_reader
         {
             s3_client->client,
             s3_client->uri.bucket,
             lock_file,
             "",
-            1,
+            request_settings_2,
             {}
         };
 
diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index 33691e83d27..492766f8f51 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -667,9 +667,15 @@ Names Block::getDataTypeNames() const
 }
 
 
-std::unordered_map<String, size_t> Block::getNamesToIndexesMap() const
+Block::NameMap Block::getNamesToIndexesMap() const
 {
-    return index_by_name;
+    NameMap res;
+    res.reserve(index_by_name.size());
+
+    for (const auto & [name, index] : index_by_name)
+        res[name] = index;
+
+    return res;
 }
 
 
diff --git a/src/Core/Block.h b/src/Core/Block.h
index 5a5458cc8f7..eb9d57ea6f8 100644
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@@ -5,6 +5,8 @@
 #include <Core/ColumnsWithTypeAndName.h>
 #include <Core/NamesAndTypes.h>
 
+#include <Common/HashTable/HashMap.h>
+
 #include <initializer_list>
 #include <list>
 #include <map>
@@ -93,7 +95,10 @@ public:
     Names getNames() const;
     DataTypes getDataTypes() const;
     Names getDataTypeNames() const;
-    std::unordered_map<String, size_t> getNamesToIndexesMap() const;
+
+    /// Hash table match `column name -> position in the block`.
+    using NameMap = HashMap<StringRef, size_t, StringRefHash>;
+    NameMap getNamesToIndexesMap() const;
 
     Serializations getSerializations() const;
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 83252b6f0a9..9da79d5b4a0 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -90,6 +90,10 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
     M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
     M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
+    M(UInt64, s3_max_get_rps, 0, "Limit on S3 GET request per second rate before throttling. Zero means unlimited.", 0) \
+    M(UInt64, s3_max_get_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps`", 0) \
+    M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \
+    M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \
     M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
     M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \
     M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
@@ -139,6 +143,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
     M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
     M(UInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.", 0) \
+    M(Bool, enable_memory_bound_merging_of_aggregation_results, false, "Enable memory bound merging strategy for aggregation. Set it to true only if all nodes of your clusters have versions >= 22.12.", 0) \
     M(Bool, enable_positional_arguments, true, "Enable positional arguments in ORDER BY, GROUP BY and LIMIT BY", 0) \
     M(Bool, enable_extended_results_for_datetime_functions, false, "Enable date functions like toLastDayOfMonth return Date32 results (instead of Date results) for Date32/DateTime64 arguments.", 0) \
     \
@@ -600,7 +605,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
     M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
     M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \
-    M(Milliseconds, async_insert_cleanup_timeout_ms, 1000, "Time to wait before each iteration of cleaning up buffers for INSERT queries which don't appear anymore. Only has meaning at server startup.", 0) \
     \
     M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
     M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
@@ -664,6 +668,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, insert_keeper_retry_max_backoff_ms, 10000, "Max backoff timeout for keeper operations during insert", 0) \
     M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \
     M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
+    M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \
     // End of COMMON_SETTINGS
     // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
 
@@ -701,6 +706,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \
     MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0)                                                                                 \
     MAKE_OBSOLETE(M, Seconds, temporary_live_view_timeout, 1) \
+    MAKE_OBSOLETE(M, Milliseconds, async_insert_cleanup_timeout_ms, 1000) \
 
     /** The section above is for obsolete settings. Do not add anything there. */
 
@@ -847,6 +853,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(Bool, output_format_sql_insert_include_column_names, true, "Include column names in INSERT query", 0) \
     M(Bool, output_format_sql_insert_use_replace, false, "Use REPLACE statement instead of INSERT", 0) \
     M(Bool, output_format_sql_insert_quote_names, true, "Quote column names with '`' characters", 0) \
+    \
+    M(Bool, output_format_bson_string_as_string, false, "Use BSON String type instead of Binary for String columns.", 0) \
+    M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
 
 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
diff --git a/src/Core/SortDescription.cpp b/src/Core/SortDescription.cpp
index 59018fb13b4..66ca1539b71 100644
--- a/src/Core/SortDescription.cpp
+++ b/src/Core/SortDescription.cpp
@@ -3,6 +3,7 @@
 #include <IO/Operators.h>
 #include <Common/JSONBuilder.h>
 #include <Common/SipHash.h>
+#include <Common/typeid_cast.h>
 
 #if USE_EMBEDDED_COMPILER
 #include <DataTypes/Native.h>
@@ -58,6 +59,20 @@ bool SortDescription::hasPrefix(const SortDescription & prefix) const
     return true;
 }
 
+SortDescription commonPrefix(const SortDescription & lhs, const SortDescription & rhs)
+{
+    size_t i = 0;
+    for (; i < std::min(lhs.size(), rhs.size()); ++i)
+    {
+        if (lhs[i] != rhs[i])
+            break;
+    }
+
+    auto res = lhs;
+    res.erase(res.begin() + i, res.end());
+    return res;
+}
+
 #if USE_EMBEDDED_COMPILER
 
 static CHJIT & getJITInstance()
diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h
index 20a4bef8176..811ccb182f3 100644
--- a/src/Core/SortDescription.h
+++ b/src/Core/SortDescription.h
@@ -125,6 +125,9 @@ public:
     bool hasPrefix(const SortDescription & prefix) const;
 };
 
+/// Returns a copy of lhs containing only the prefix of columns matching rhs's columns.
+SortDescription commonPrefix(const SortDescription & lhs, const SortDescription & rhs);
+
 /** Compile sort description for header_types.
   * Description is compiled only if compilation attempts to compile identical description is more than min_count_to_compile_sort_description.
   */
diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp
index e711b34ffa9..edda0235bcc 100644
--- a/src/DataTypes/ObjectUtils.cpp
+++ b/src/DataTypes/ObjectUtils.cpp
@@ -981,4 +981,11 @@ Field FieldVisitorFoldDimension::operator()(const Array & x) const
     return res;
 }
 
+void setAllObjectsToDummyTupleType(NamesAndTypesList & columns)
+{
+    for (auto & column : columns)
+        if (column.type->hasDynamicSubcolumns())
+            column.type = createConcreteEmptyDynamicColumn(column.type);
+}
+
 }
diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h
index bd15edfe851..2bfcaae09ca 100644
--- a/src/DataTypes/ObjectUtils.h
+++ b/src/DataTypes/ObjectUtils.h
@@ -162,6 +162,8 @@ private:
     size_t num_dimensions_to_fold;
 };
 
+void setAllObjectsToDummyTupleType(NamesAndTypesList & columns);
+
 /// Receives range of objects, which contains collections
 /// of columns-like objects (e.g. ColumnsDescription or NamesAndTypesList)
 /// and deduces the common types of object columns for all entries.
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 20fa11e90e2..c0bc9d3f3a2 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -39,7 +39,7 @@ namespace ErrorCodes
     extern const int NO_ZOOKEEPER;
     extern const int LOGICAL_ERROR;
     extern const int BAD_ARGUMENTS;
-    extern const int REPLICA_IS_ALREADY_EXIST;
+    extern const int REPLICA_ALREADY_EXISTS;
     extern const int DATABASE_REPLICATION_FAILED;
     extern const int UNKNOWN_DATABASE;
     extern const int UNKNOWN_TABLE;
@@ -297,7 +297,7 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL
             if (is_create_query || replica_host_id != host_id)
             {
                 throw Exception(
-                    ErrorCodes::REPLICA_IS_ALREADY_EXIST,
+                    ErrorCodes::REPLICA_ALREADY_EXISTS,
                     "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
                     replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
             }
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index ec6a56ec2ab..104cf6d3346 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -284,7 +284,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
     std::unordered_set<String> attribute_names;
     std::vector<DictionaryAttribute> res_attributes;
 
-    const FormatSettings format_settings;
+    const FormatSettings format_settings = {};
 
     for (const auto & config_elem : config_elems)
     {
diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h
index 9d79ec3e702..353367b12b6 100644
--- a/src/Dictionaries/ExternalQueryBuilder.h
+++ b/src/Dictionaries/ExternalQueryBuilder.h
@@ -62,7 +62,7 @@ struct ExternalQueryBuilder
 
 
 private:
-    const FormatSettings format_settings;
+    const FormatSettings format_settings = {};
 
     void composeLoadAllQuery(WriteBuffer & out) const;
 
diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp
index 0a8d632efcb..9c751d5ce97 100644
--- a/src/Dictionaries/MongoDBDictionarySource.cpp
+++ b/src/Dictionaries/MongoDBDictionarySource.cpp
@@ -74,7 +74,6 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
 // Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value);
 // src/IO/WriteHelpers.h:146 #define writeCString(s, buf)
 #include <IO/WriteHelpers.h>
-#include <Processors/Transforms/MongoDBSource.h>
 
 
 namespace DB
diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h
index 0d4c5f6aa0b..ac5f19816d2 100644
--- a/src/Dictionaries/MongoDBDictionarySource.h
+++ b/src/Dictionaries/MongoDBDictionarySource.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Processors/Transforms/MongoDBSource.h>
 #include <Core/Block.h>
 
 #include "DictionaryStructure.h"
diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp
index af17289c8af..f9017446dda 100644
--- a/src/Disks/DiskDecorator.cpp
+++ b/src/Disks/DiskDecorator.cpp
@@ -4,7 +4,10 @@
 
 namespace DB
 {
-DiskDecorator::DiskDecorator(const DiskPtr & delegate_) : delegate(delegate_)
+
+DiskDecorator::DiskDecorator(const DiskPtr & delegate_)
+    : IDisk(/* name_= */ "<decorator>")
+    , delegate(delegate_)
 {
 }
 
@@ -226,9 +229,9 @@ void DiskDecorator::shutdown()
     delegate->shutdown();
 }
 
-void DiskDecorator::startup(ContextPtr context)
+void DiskDecorator::startupImpl(ContextPtr context)
 {
-    delegate->startup(context);
+    delegate->startupImpl(context);
 }
 
 void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map)
diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h
index 25278f905ba..f7eface8c66 100644
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@@ -74,12 +74,14 @@ public:
     bool checkUniqueId(const String & id) const override { return delegate->checkUniqueId(id); }
     DataSourceDescription getDataSourceDescription() const override { return delegate->getDataSourceDescription(); }
     bool isRemote() const override { return delegate->isRemote(); }
+    bool isReadOnly() const override { return delegate->isReadOnly(); }
+    bool isWriteOnce() const override { return delegate->isWriteOnce(); }
     bool supportZeroCopyReplication() const override { return delegate->supportZeroCopyReplication(); }
     bool supportParallelWrite() const override { return delegate->supportParallelWrite(); }
     void onFreeze(const String & path) override;
     SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
     void shutdown() override;
-    void startup(ContextPtr context) override;
+    void startupImpl(ContextPtr context) override;
     void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override;
 
     bool supportsCache() const override { return delegate->supportsCache(); }
diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp
index e6479727aad..79905283ddb 100644
--- a/src/Disks/DiskEncrypted.cpp
+++ b/src/Disks/DiskEncrypted.cpp
@@ -210,7 +210,7 @@ DiskEncrypted::DiskEncrypted(
 
 DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_)
     : DiskDecorator(settings_->wrapped_disk)
-    , name(name_)
+    , encrypted_name(name_)
     , disk_path(settings_->disk_path)
     , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path)
     , current_settings(std::move(settings_))
@@ -369,15 +369,19 @@ void DiskEncrypted::applyNewSettings(
     current_settings.set(std::move(new_settings));
 }
 
-void registerDiskEncrypted(DiskFactory & factory)
+void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check)
 {
-    auto creator = [](const String & name,
-                      const Poco::Util::AbstractConfiguration & config,
-                      const String & config_prefix,
-                      ContextPtr /*context*/,
-                      const DisksMap & map) -> DiskPtr
+    auto creator = [global_skip_access_check](
+        const String & name,
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix,
+        ContextPtr context,
+        const DisksMap & map) -> DiskPtr
     {
-        return std::make_shared<DiskEncrypted>(name, config, config_prefix, map);
+        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
+        DiskPtr disk = std::make_shared<DiskEncrypted>(name, config, config_prefix, map);
+        disk->startup(context, skip_access_check);
+        return disk;
     };
     factory.registerDiskType("encrypted", creator);
 }
diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 02b4104f36a..74da7cfa2c0 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -33,7 +33,7 @@ public:
     DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_);
     DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_);
 
-    const String & getName() const override { return name; }
+    const String & getName() const override { return encrypted_name; }
     const String & getPath() const override { return disk_absolute_path; }
 
     ReservationPtr reserve(UInt64 bytes) override;
@@ -261,7 +261,7 @@ private:
         return disk_path + path;
     }
 
-    const String name;
+    const String encrypted_name;
     const String disk_path;
     const String disk_absolute_path;
     MultiVersion<DiskEncryptedSettings> current_settings;
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index afd6a1b7b58..a1f91319cf2 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -500,7 +500,7 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi
 }
 
 DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_)
-    : name(name_)
+    : IDisk(name_)
     , disk_path(path_)
     , keep_free_space_bytes(keep_free_space_bytes_)
     , logger(&Poco::Logger::get("DiskLocal"))
@@ -528,26 +528,6 @@ DataSourceDescription DiskLocal::getDataSourceDescription() const
     return data_source_description;
 }
 
-void DiskLocal::startup(ContextPtr)
-{
-    try
-    {
-        broken = false;
-        disk_checker_magic_number = -1;
-        disk_checker_can_check_read = true;
-        readonly = !setup();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(logger, fmt::format("Disk {} is marked as broken during startup", name));
-        broken = true;
-        /// Disk checker is disabled when failing to start up.
-        disk_checker_can_check_read = false;
-    }
-    if (disk_checker && disk_checker_can_check_read)
-        disk_checker->startup();
-}
-
 void DiskLocal::shutdown()
 {
     if (disk_checker)
@@ -641,18 +621,30 @@ DiskObjectStoragePtr DiskLocal::createDiskObjectStorage()
     );
 }
 
-bool DiskLocal::setup()
+void DiskLocal::checkAccessImpl(const String & path)
 {
     try
     {
         fs::create_directories(disk_path);
+        if (!FS::canWrite(disk_path))
+        {
+            LOG_ERROR(logger, "Cannot write to the root directory of disk {} ({}).", name, disk_path);
+            readonly = true;
+            return;
+        }
     }
     catch (...)
     {
-        LOG_ERROR(logger, "Cannot create the directory of disk {} ({}).", name, disk_path);
-        throw;
+        LOG_ERROR(logger, "Cannot create the root directory of disk {} ({}).", name, disk_path);
+        readonly = true;
+        return;
     }
 
+    IDisk::checkAccessImpl(path);
+}
+
+void DiskLocal::setup()
+{
     try
     {
         if (!FS::canRead(disk_path))
@@ -666,7 +658,7 @@ bool DiskLocal::setup()
 
     /// If disk checker is disabled, just assume RW by default.
     if (!disk_checker)
-        return true;
+        return;
 
     try
     {
@@ -690,6 +682,7 @@ bool DiskLocal::setup()
 
     /// Try to create a new checker file. The disk status can be either broken or readonly.
     if (disk_checker_magic_number == -1)
+    {
         try
         {
             pcg32_fast rng(randomSeed());
@@ -709,12 +702,33 @@ bool DiskLocal::setup()
                 disk_checker_path,
                 name);
             disk_checker_can_check_read = false;
-            return true;
+            return;
         }
+    }
 
     if (disk_checker_magic_number == -1)
         throw Exception("disk_checker_magic_number is not initialized. It's a bug", ErrorCodes::LOGICAL_ERROR);
-    return true;
+}
+
+void DiskLocal::startupImpl(ContextPtr)
+{
+    broken = false;
+    disk_checker_magic_number = -1;
+    disk_checker_can_check_read = true;
+
+    try
+    {
+        setup();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(logger, fmt::format("Disk {} is marked as broken during startup", name));
+        broken = true;
+        /// Disk checker is disabled when failing to start up.
+        disk_checker_can_check_read = false;
+    }
+    if (disk_checker && disk_checker_can_check_read)
+        disk_checker->startup();
 }
 
 struct stat DiskLocal::stat(const String & path) const
@@ -741,13 +755,14 @@ MetadataStoragePtr DiskLocal::getMetadataStorage()
         std::static_pointer_cast<IDisk>(shared_from_this()), object_storage, getPath());
 }
 
-void registerDiskLocal(DiskFactory & factory)
+void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check)
 {
-    auto creator = [](const String & name,
-                      const Poco::Util::AbstractConfiguration & config,
-                      const String & config_prefix,
-                      ContextPtr context,
-                      const DisksMap & map) -> DiskPtr
+    auto creator = [global_skip_access_check](
+        const String & name,
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix,
+        ContextPtr context,
+        const DisksMap & map) -> DiskPtr
     {
         String path;
         UInt64 keep_free_space_bytes;
@@ -757,9 +772,10 @@ void registerDiskLocal(DiskFactory & factory)
             if (path == disk_ptr->getPath())
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} and disk {} cannot have the same path ({})", name, disk_name, path);
 
+        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
         std::shared_ptr<IDisk> disk
             = std::make_shared<DiskLocal>(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0));
-        disk->startup(context);
+        disk->startup(context, skip_access_check);
         return std::make_shared<DiskRestartProxy>(disk);
     };
     factory.registerDiskType("local", creator);
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index f79647b8541..14e29904422 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -28,8 +28,6 @@ public:
         ContextPtr context,
         UInt64 local_disk_check_period_ms);
 
-    const String & getName() const override { return name; }
-
     const String & getPath() const override { return disk_path; }
 
     ReservationPtr reserve(UInt64 bytes) override;
@@ -112,8 +110,9 @@ public:
     void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) override;
 
     bool isBroken() const override { return broken; }
+    bool isReadOnly() const override { return readonly; }
 
-    void startup(ContextPtr) override;
+    void startupImpl(ContextPtr context) override;
 
     void shutdown() override;
 
@@ -133,17 +132,19 @@ public:
 
     MetadataStoragePtr getMetadataStorage() override;
 
+protected:
+    void checkAccessImpl(const String & path) override;
+
 private:
     std::optional<UInt64> tryReserve(UInt64 bytes);
 
-    /// Setup disk for healthy check. Returns true if it's read-write, false if read-only.
+    /// Setup disk for healthy check.
     /// Throw exception if it's not possible to setup necessary files and directories.
-    bool setup();
+    void setup();
 
     /// Read magic number from disk checker file. Return std::nullopt if exception happens.
     std::optional<UInt32> readDiskCheckerMagicNumber() const noexcept;
 
-    const String name;
     const String disk_path;
     const String disk_checker_path = ".disk_checker_file";
     std::atomic<UInt64> keep_free_space_bytes;
diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp
index f4ca2a7459a..17d8d5b2d25 100644
--- a/src/Disks/DiskMemory.cpp
+++ b/src/Disks/DiskMemory.cpp
@@ -141,6 +141,11 @@ private:
 };
 
 
+DiskMemory::DiskMemory(const String & name_)
+    : IDisk(name_)
+    , disk_path("memory(" + name_ + ')')
+{}
+
 ReservationPtr DiskMemory::reserve(UInt64 /*bytes*/)
 {
     throw Exception("Method reserve is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
@@ -456,13 +461,20 @@ MetadataStoragePtr DiskMemory::getMetadataStorage()
 using DiskMemoryPtr = std::shared_ptr<DiskMemory>;
 
 
-void registerDiskMemory(DiskFactory & factory)
+void registerDiskMemory(DiskFactory & factory, bool global_skip_access_check)
 {
-    auto creator = [](const String & name,
-                      const Poco::Util::AbstractConfiguration & /*config*/,
-                      const String & /*config_prefix*/,
-                      ContextPtr /*context*/,
-                      const DisksMap & /*map*/) -> DiskPtr { return std::make_shared<DiskMemory>(name); };
+    auto creator = [global_skip_access_check](
+        const String & name,
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix,
+        ContextPtr context,
+        const DisksMap & /*map*/) -> DiskPtr
+    {
+        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
+        DiskPtr disk = std::make_shared<DiskMemory>(name);
+        disk->startup(context, skip_access_check);
+        return disk;
+    };
     factory.registerDiskType("memory", creator);
 }
 
diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h
index 78fb52a768d..624b89f6a50 100644
--- a/src/Disks/DiskMemory.h
+++ b/src/Disks/DiskMemory.h
@@ -8,7 +8,7 @@
 
 namespace DB
 {
-class DiskMemory;
+
 class ReadBufferFromFileBase;
 class WriteBufferFromFileBase;
 
@@ -22,9 +22,7 @@ class WriteBufferFromFileBase;
 class DiskMemory : public IDisk
 {
 public:
-    explicit DiskMemory(const String & name_) : name(name_), disk_path("memory://" + name_ + '/') {}
-
-    const String & getName() const override { return name; }
+    explicit DiskMemory(const String & name_);
 
     const String & getPath() const override { return disk_path; }
 
@@ -121,7 +119,6 @@ private:
     };
     using Files = std::unordered_map<String, FileData>; /// file path -> file data
 
-    const String name;
     const String disk_path;
     Files files;
     mutable std::mutex mutex;
diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp
index 2d923d71622..0b79ee51db9 100644
--- a/src/Disks/DiskRestartProxy.cpp
+++ b/src/Disks/DiskRestartProxy.cpp
@@ -79,7 +79,8 @@ private:
 };
 
 DiskRestartProxy::DiskRestartProxy(DiskPtr & delegate_)
-    : DiskDecorator(delegate_) { }
+    : DiskDecorator(delegate_)
+{}
 
 ReservationPtr DiskRestartProxy::reserve(UInt64 bytes)
 {
@@ -368,7 +369,8 @@ void DiskRestartProxy::restart(ContextPtr context)
 
     LOG_INFO(log, "Restart lock acquired. Restarting disk {}", DiskDecorator::getName());
 
-    DiskDecorator::startup(context);
+    /// NOTE: access checking will cause deadlock here, so skip it.
+    DiskDecorator::startup(context, /* skip_access_check= */ true);
 
     LOG_INFO(log, "Disk restarted {}", DiskDecorator::getName());
 }
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index 8a6bea2565b..2a60f32929c 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -6,6 +6,7 @@
 #include <Poco/Logger.h>
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>
+#include <Core/ServerUUID.h>
 #include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
 #include <Disks/ObjectStorages/FakeMetadataStorageFromDisk.h>
 #include <Disks/ObjectStorages/LocalObjectStorage.h>
@@ -17,6 +18,8 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
+    extern const int CANNOT_READ_ALL_DATA;
+    extern const int LOGICAL_ERROR;
 }
 
 bool IDisk::isDirectoryEmpty(const String & path) const
@@ -126,4 +129,87 @@ SyncGuardPtr IDisk::getDirectorySyncGuard(const String & /* path */) const
     return nullptr;
 }
 
+void IDisk::startup(ContextPtr context, bool skip_access_check)
+{
+    if (!skip_access_check)
+    {
+        if (isReadOnly())
+        {
+            LOG_DEBUG(&Poco::Logger::get("IDisk"),
+                "Skip access check for disk {} (read-only disk).",
+                getName());
+        }
+        else
+            checkAccess();
+    }
+    startupImpl(context);
+}
+
+void IDisk::checkAccess()
+{
+    DB::UUID server_uuid = DB::ServerUUID::get();
+    if (server_uuid == DB::UUIDHelpers::Nil)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized");
+    const String path = fmt::format("clickhouse_access_check_{}", DB::toString(server_uuid));
+
+    checkAccessImpl(path);
+}
+
+/// NOTE: should we mark the disk readonly if the write/unlink fails instead of throws?
+void IDisk::checkAccessImpl(const String & path)
+try
+{
+    const std::string_view payload("test", 4);
+
+    /// write
+    {
+        auto file = writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
+        try
+        {
+            file->write(payload.data(), payload.size());
+        }
+        catch (...)
+        {
+            /// Log current exception, because finalize() can throw a different exception.
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+            file->finalize();
+            throw;
+        }
+    }
+
+    /// read
+    {
+        auto file = readFile(path);
+        String buf(payload.size(), '0');
+        file->readStrict(buf.data(), buf.size());
+        if (buf != payload)
+        {
+            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
+                "Content of {}::{} does not matches after read ({} vs {})", name, path, buf, payload);
+        }
+    }
+
+    /// read with offset
+    {
+        auto file = readFile(path);
+        auto offset = 2;
+        String buf(payload.size() - offset, '0');
+        file->seek(offset, 0);
+        file->readStrict(buf.data(), buf.size());
+        if (buf != payload.substr(offset))
+        {
+            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
+                "Content of {}::{} does not matches after read with offset ({} vs {})", name, path, buf, payload.substr(offset));
+        }
+    }
+
+    /// remove
+    removeFile(path);
+}
+catch (Exception & e)
+{
+    e.addMessage(fmt::format("While checking access for disk {}", name));
+    throw;
+}
+
 }
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 66a5c55f7f7..381c491470f 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -107,8 +107,9 @@ class IDisk : public Space
 {
 public:
     /// Default constructor.
-    explicit IDisk(std::shared_ptr<Executor> executor_ = std::make_shared<SyncExecutor>())
-        : executor(executor_)
+    explicit IDisk(const String & name_, std::shared_ptr<Executor> executor_ = std::make_shared<SyncExecutor>())
+        : name(name_)
+        , executor(executor_)
     {
     }
 
@@ -121,6 +122,9 @@ public:
     /// It's not required to be a local filesystem path.
     virtual const String & getPath() const = 0;
 
+    /// Return disk name.
+    const String & getName() const override { return name; }
+
     /// Total available space on the disk.
     virtual UInt64 getTotalSpace() const = 0;
 
@@ -308,14 +312,19 @@ public:
 
     virtual bool isReadOnly() const { return false; }
 
+    virtual bool isWriteOnce() const { return false; }
+
     /// Check if disk is broken. Broken disks will have 0 space and cannot be used.
     virtual bool isBroken() const { return false; }
 
     /// Invoked when Global Context is shutdown.
     virtual void shutdown() {}
 
-    /// Performs action on disk startup.
-    virtual void startup(ContextPtr) {}
+    /// Performs access check and custom action on disk startup.
+    void startup(ContextPtr context, bool skip_access_check);
+
+    /// Performs custom action on disk startup.
+    virtual void startupImpl(ContextPtr) {}
 
     /// Return some uniq string for file, overrode for IDiskRemote
     /// Required for distinguish different copies of the same part on remote disk
@@ -398,6 +407,8 @@ public:
 protected:
     friend class DiskDecorator;
 
+    const String name;
+
     /// Returns executor to perform asynchronous operations.
     virtual Executor & getExecutor() { return *executor; }
 
@@ -406,8 +417,13 @@ protected:
     /// A derived class may override copy() to provide a faster implementation.
     void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir = true);
 
+    virtual void checkAccessImpl(const String & path);
+
 private:
     std::shared_ptr<Executor> executor;
+
+    /// Check access to the disk.
+    void checkAccess();
 };
 
 using Disks = std::vector<DiskPtr>;
diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
index e60fea46ed4..a409ddde9ec 100644
--- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
@@ -133,14 +133,25 @@ void AsynchronousReadIndirectBufferFromRemoteFS::prefetch()
 
 void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t position)
 {
-    if (prefetch_future.valid())
+    /// Do not reinitialize internal state in case the new end of range is already included.
+    /// Actually it is likely that we will anyway reinitialize it as seek method is called after
+    /// changing end position, but seek avoiding feature might help to avoid reinitialization,
+    /// so this check is useful to save the prefetch for the time when we try to avoid seek by
+    /// reading and ignoring some data.
+    if (!read_until_position || position > *read_until_position)
     {
-        prefetch_future.wait();
-        prefetch_future = {};
-    }
+        /// We must wait on future and reset the prefetch here, because otherwise there might be
+        /// a race between reading the data in the threadpool and impl->setReadUntilPosition()
+        /// which reinitializes internal remote read buffer (because if we have a new read range
+        /// then we need a new range request) and in case of reading from cache we need to request
+        /// and hold more file segment ranges from cache.
+        if (prefetch_future.valid())
+        {
+            ProfileEvents::increment(ProfileEvents::RemoteFSCancelledPrefetches);
+            prefetch_future.wait();
+            prefetch_future = {};
+        }
 
-    if (position > read_until_position)
-    {
         read_until_position = position;
         impl->setReadUntilPosition(*read_until_position);
     }
@@ -149,12 +160,6 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t pos
 
 void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilEnd()
 {
-    if (prefetch_future.valid())
-    {
-        prefetch_future.wait();
-        prefetch_future = {};
-    }
-
     read_until_position = impl->getFileSize();
     impl->setReadUntilPosition(*read_until_position);
 }
diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index 98da89f81ed..b274786f162 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -42,7 +42,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     if (read_hint.has_value())
         estimated_size = *read_hint;
     else if (file_size.has_value())
-        estimated_size = file_size.has_value() ? *file_size : 0;
+        estimated_size = *file_size;
 
     if (!existing_memory
         && settings.local_fs_method == LocalFSReadMethod::mmap
@@ -158,7 +158,15 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
 #endif
 
     ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
-    return create(settings.local_fs_buffer_size, flags);
+
+    size_t buffer_size = settings.local_fs_buffer_size;
+    /// Check if the buffer can be smaller than default
+    if (read_hint.has_value() && *read_hint > 0 && *read_hint < buffer_size)
+        buffer_size = *read_hint;
+    if (file_size.has_value() && *file_size < buffer_size)
+        buffer_size = *file_size;
+
+    return create(buffer_size, flags);
 }
 
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
index 6a12d8ef2e8..df377cdf710 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
@@ -17,55 +17,9 @@
 namespace DB
 {
 
-namespace ErrorCodes
+void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access_check)
 {
-    extern const int PATH_ACCESS_DENIED;
-}
-
-namespace
-{
-
-constexpr char test_file[] = "test.txt";
-constexpr char test_str[] = "test";
-constexpr size_t test_str_size = 4;
-
-void checkWriteAccess(IDisk & disk)
-{
-    auto file = disk.writeFile(test_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
-    file->write(test_str, test_str_size);
-}
-
-void checkReadAccess(IDisk & disk)
-{
-    auto file = disk.readFile(test_file);
-    String buf(test_str_size, '0');
-    file->readStrict(buf.data(), test_str_size);
-    if (buf != test_str)
-        throw Exception("No read access to disk", ErrorCodes::PATH_ACCESS_DENIED);
-}
-
-void checkReadWithOffset(IDisk & disk)
-{
-    auto file = disk.readFile(test_file);
-    auto offset = 2;
-    auto test_size = test_str_size - offset;
-    String buf(test_size, '0');
-    file->seek(offset, 0);
-    file->readStrict(buf.data(), test_size);
-    if (buf != test_str + offset)
-        throw Exception("Failed to read file with offset", ErrorCodes::PATH_ACCESS_DENIED);
-}
-
-void checkRemoveAccess(IDisk & disk)
-{
-    disk.removeFile(test_file);
-}
-
-}
-
-void registerDiskAzureBlobStorage(DiskFactory & factory)
-{
-    auto creator = [](
+    auto creator = [global_skip_access_check](
         const String & name,
         const Poco::Util::AbstractConfiguration & config,
         const String & config_prefix,
@@ -94,15 +48,8 @@ void registerDiskAzureBlobStorage(DiskFactory & factory)
             copy_thread_pool_size
         );
 
-        if (!config.getBool(config_prefix + ".skip_access_check", false))
-        {
-            checkWriteAccess(*azure_blob_storage_disk);
-            checkReadAccess(*azure_blob_storage_disk);
-            checkReadWithOffset(*azure_blob_storage_disk);
-            checkRemoveAccess(*azure_blob_storage_disk);
-        }
-
-        azure_blob_storage_disk->startup(context);
+        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
+        azure_blob_storage_disk->startup(context, skip_access_check);
 
         return std::make_shared<DiskRestartProxy>(azure_blob_storage_disk);
     };
@@ -117,7 +64,7 @@ void registerDiskAzureBlobStorage(DiskFactory & factory)
 namespace DB
 {
 
-void registerDiskAzureBlobStorage(DiskFactory &) {}
+void registerDiskAzureBlobStorage(DiskFactory &, bool /* global_skip_access_check */) {}
 
 }
 
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index b84382a762a..2d67203be0f 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -101,6 +101,8 @@ public:
 
     bool isReadOnly() const override { return object_storage->isReadOnly(); }
 
+    bool isWriteOnce() const override { return object_storage->isWriteOnce(); }
+
     const std::string & getCacheConfigName() const { return cache_config_name; }
 
     ObjectStoragePtr getWrappedObjectStorage() { return object_storage; }
diff --git a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
index 902ebd0fcc8..d8c4a9d42fd 100644
--- a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
+++ b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
@@ -16,7 +16,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-void registerDiskCache(DiskFactory & factory)
+void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check */)
 {
     auto creator = [](const String & name,
                     const Poco::Util::AbstractConfiguration & config,
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 8814d12d6eb..263a9a9d0e1 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -109,8 +109,7 @@ DiskObjectStorage::DiskObjectStorage(
     ObjectStoragePtr object_storage_,
     bool send_metadata_,
     uint64_t thread_pool_size_)
-    : IDisk(getAsyncExecutor(log_name, thread_pool_size_))
-    , name(name_)
+    : IDisk(name_, getAsyncExecutor(log_name, thread_pool_size_))
     , object_storage_root_path(object_storage_root_path_)
     , log (&Poco::Logger::get("DiskObjectStorage(" + log_name + ")"))
     , metadata_storage(std::move(metadata_storage_))
@@ -420,9 +419,8 @@ void DiskObjectStorage::shutdown()
     LOG_INFO(log, "Disk {} shut down", name);
 }
 
-void DiskObjectStorage::startup(ContextPtr context)
+void DiskObjectStorage::startupImpl(ContextPtr context)
 {
-
     LOG_INFO(log, "Starting up disk {}", name);
     object_storage->startup();
 
@@ -499,6 +497,11 @@ bool DiskObjectStorage::isReadOnly() const
     return object_storage->isReadOnly();
 }
 
+bool DiskObjectStorage::isWriteOnce() const
+{
+    return object_storage->isWriteOnce();
+}
+
 DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage()
 {
     return std::make_shared<DiskObjectStorage>(
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h
index 333fcb258e4..00e3cf98142 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.h
@@ -45,8 +45,6 @@ public:
 
     bool supportParallelWrite() const override { return object_storage->supportParallelWrite(); }
 
-    const String & getName() const override { return name; }
-
     const String & getPath() const override { return metadata_storage->getPath(); }
 
     StoredObjects getStorageObjects(const String & local_path) const override;
@@ -138,7 +136,7 @@ public:
 
     void shutdown() override;
 
-    void startup(ContextPtr context) override;
+    void startupImpl(ContextPtr context) override;
 
     ReservationPtr reserve(UInt64 bytes) override;
 
@@ -177,6 +175,12 @@ public:
     /// with static files, so only read-only operations are allowed for this storage.
     bool isReadOnly() const override;
 
+    /// Is object write-once?
+    /// For example: S3PlainObjectStorage is write once, this means that it
+    /// does support BACKUP to this disk, but does not support INSERT into
+    /// MergeTree table on this disk.
+    bool isWriteOnce() const override;
+
     /// Add a cache layer.
     /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage))
     /// There can be any number of cache layers:
@@ -206,7 +210,6 @@ private:
     /// execution.
     DiskTransactionPtr createObjectStorageTransaction();
 
-    const String name;
     const String object_storage_root_path;
     Poco::Logger * log;
 
diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp
index a9189e0101b..7bec0ee5a6c 100644
--- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp
+++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp
@@ -14,13 +14,14 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-void registerDiskHDFS(DiskFactory & factory)
+void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check)
 {
-    auto creator = [](const String & name,
-                      const Poco::Util::AbstractConfiguration & config,
-                      const String & config_prefix,
-                      ContextPtr context_,
-                      const DisksMap & /*map*/) -> DiskPtr
+    auto creator = [global_skip_access_check](
+        const String & name,
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix,
+        ContextPtr context,
+        const DisksMap & /*map*/) -> DiskPtr
     {
         String uri{config.getString(config_prefix + ".endpoint")};
         checkHDFSURL(uri);
@@ -31,19 +32,20 @@ void registerDiskHDFS(DiskFactory & factory)
         std::unique_ptr<HDFSObjectStorageSettings> settings = std::make_unique<HDFSObjectStorageSettings>(
             config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
             config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
-            context_->getSettingsRef().hdfs_replication
+            context->getSettingsRef().hdfs_replication
         );
 
 
         /// FIXME Cache currently unsupported :(
         ObjectStoragePtr hdfs_storage = std::make_unique<HDFSObjectStorage>(uri, std::move(settings), config);
 
-        auto [_, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context_);
+        auto [_, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
 
         auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri);
         uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16);
+        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
 
-        DiskPtr disk_result = std::make_shared<DiskObjectStorage>(
+        DiskPtr disk = std::make_shared<DiskObjectStorage>(
             name,
             uri,
             "DiskHDFS",
@@ -51,8 +53,9 @@ void registerDiskHDFS(DiskFactory & factory)
             std::move(hdfs_storage),
             /* send_metadata = */ false,
             copy_thread_pool_size);
+        disk->startup(context, skip_access_check);
 
-        return std::make_shared<DiskRestartProxy>(disk_result);
+        return std::make_shared<DiskRestartProxy>(disk);
     };
 
     factory.registerDiskType("hdfs", creator);
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index c570dfb6e9b..2cfb4d43a43 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -199,6 +199,7 @@ public:
     virtual bool supportsCache() const { return false; }
 
     virtual bool isReadOnly() const { return false; }
+    virtual bool isWriteOnce() const { return false; }
 
     virtual bool supportParallelWrite() const { return false; }
 
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
index 010fc103254..ce5171fedee 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
@@ -4,6 +4,7 @@
 #include <Common/getRandomASCIIString.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
+#include <optional>
 #include <ranges>
 #include <filesystem>
 
@@ -62,7 +63,7 @@ UnlinkFileOperation::UnlinkFileOperation(const std::string & path_, IDisk & disk
 
 void UnlinkFileOperation::execute(std::unique_lock<std::shared_mutex> &)
 {
-    auto buf = disk.readFile(path);
+    auto buf = disk.readFile(path, ReadSettings{}, std::nullopt, disk.getFileSize(path));
     readStringUntilEOF(prev_data, *buf);
     disk.removeFile(path);
 }
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 099a7d458d0..996268079e8 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -175,7 +175,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
             bucket,
             path,
             version_id,
-            settings_ptr->s3_settings.max_single_read_retries,
+            settings_ptr->request_settings,
             disk_read_settings,
             /* use_external_buffer */true,
             /* offset */0,
@@ -212,7 +212,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObject( /// NOLINT
         bucket,
         object.absolute_path,
         version_id,
-        settings_ptr->s3_settings.max_single_read_retries,
+        settings_ptr->request_settings,
         patchSettings(read_settings));
 }
 
@@ -238,7 +238,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
         client.get(),
         bucket,
         object.absolute_path,
-        settings_ptr->s3_settings,
+        settings_ptr->request_settings,
         attributes,
         buf_size,
         std::move(scheduler),
@@ -489,7 +489,7 @@ void S3ObjectStorage::copyObjectImpl(
     throwIfError(outcome);
 
     auto settings_ptr = s3_settings.get();
-    if (settings_ptr->s3_settings.check_objects_after_upload)
+    if (settings_ptr->request_settings.check_objects_after_upload)
     {
         auto object_head = requestObjectHeadData(dst_bucket, dst_key);
         if (!object_head.IsSuccess())
@@ -533,7 +533,7 @@ void S3ObjectStorage::copyObjectMultipartImpl(
 
     std::vector<String> part_tags;
 
-    size_t upload_part_size = settings_ptr->s3_settings.min_upload_part_size;
+    size_t upload_part_size = settings_ptr->request_settings.min_upload_part_size;
     for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size)
     {
         ProfileEvents::increment(ProfileEvents::S3UploadPartCopy);
@@ -586,7 +586,7 @@ void S3ObjectStorage::copyObjectMultipartImpl(
         throwIfError(outcome);
     }
 
-    if (settings_ptr->s3_settings.check_objects_after_upload)
+    if (settings_ptr->request_settings.check_objects_after_upload)
     {
         auto object_head = requestObjectHeadData(dst_bucket, dst_key);
         if (!object_head.IsSuccess())
@@ -643,19 +643,22 @@ void S3ObjectStorage::startup()
 
 void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
 {
-    s3_settings.set(getSettings(config, config_prefix, context));
-    client.set(getClient(config, config_prefix, context));
+    auto new_s3_settings = getSettings(config, config_prefix, context);
+    auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
+    s3_settings.set(std::move(new_s3_settings));
+    client.set(std::move(new_client));
     applyRemoteThrottlingSettings(context);
 }
 
 std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
     const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
 {
+    auto new_s3_settings = getSettings(config, config_prefix, context);
+    auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
     return std::make_unique<S3ObjectStorage>(
-        getClient(config, config_prefix, context),
-        getSettings(config, config_prefix, context),
+        std::move(new_client), std::move(new_s3_settings),
         version_id, s3_capabilities, new_namespace,
-        S3::URI(Poco::URI(config.getString(config_prefix + ".endpoint"))).endpoint);
+        config.getString(config_prefix + ".endpoint"));
 }
 
 }
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 447ca034aac..0a07639e253 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -23,17 +23,17 @@ struct S3ObjectStorageSettings
     S3ObjectStorageSettings() = default;
 
     S3ObjectStorageSettings(
-        const S3Settings::ReadWriteSettings & s3_settings_,
+        const S3Settings::RequestSettings & request_settings_,
         uint64_t min_bytes_for_seek_,
         int32_t list_object_keys_size_,
         int32_t objects_chunk_size_to_delete_)
-        : s3_settings(s3_settings_)
+        : request_settings(request_settings_)
         , min_bytes_for_seek(min_bytes_for_seek_)
         , list_object_keys_size(list_object_keys_size_)
         , objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
     {}
 
-    S3Settings::ReadWriteSettings s3_settings;
+    S3Settings::RequestSettings request_settings;
 
     uint64_t min_bytes_for_seek;
     int32_t list_object_keys_size;
@@ -216,6 +216,11 @@ public:
     {
         data_source_description.type = DataSourceType::S3_Plain;
     }
+
+    /// Notes:
+    /// - supports BACKUP to this disk
+    /// - does not support INSERT into MergeTree table on this disk
+    bool isWriteOnce() const override { return true; }
 };
 
 }
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index e61987163d2..ee6b798629c 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -4,6 +4,7 @@
 
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/logger_useful.h>
+#include <Common/Throttler.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
@@ -32,17 +33,26 @@ namespace ErrorCodes
 
 std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
 {
-    S3Settings::ReadWriteSettings rw_settings;
-    rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries);
-    rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size);
-    rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor);
-    rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold);
-    rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size);
-    rw_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", context->getSettingsRef().s3_check_objects_after_upload);
-    rw_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", context->getSettingsRef().s3_max_unexpected_write_error_retries);
+    const Settings & settings = context->getSettingsRef();
+    S3Settings::RequestSettings request_settings;
+    request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries);
+    request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size);
+    request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor);
+    request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold);
+    request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size);
+    request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload);
+    request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries);
+
+    // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used.
+    if (UInt64 max_get_rps = config.getUInt64(config_prefix + ".s3_max_get_rps", settings.s3_max_get_rps))
+        request_settings.get_request_throttler = std::make_shared<Throttler>(
+            max_get_rps, config.getUInt64(config_prefix + ".s3_max_get_burst", settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps));
+    if (UInt64 max_put_rps = config.getUInt64(config_prefix + ".s3_max_put_rps", settings.s3_max_put_rps))
+        request_settings.put_request_throttler = std::make_shared<Throttler>(
+            max_put_rps, config.getUInt64(config_prefix + ".s3_max_put_burst", settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps));
 
     return std::make_unique<S3ObjectStorageSettings>(
-        rw_settings,
+        request_settings,
         config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
         config.getInt(config_prefix + ".list_object_keys_size", 1000),
         config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000));
@@ -112,16 +122,22 @@ std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & pre
 }
 
 
-std::unique_ptr<Aws::S3::S3Client> getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
+std::unique_ptr<Aws::S3::S3Client> getClient(
+    const Poco::Util::AbstractConfiguration & config,
+    const String & config_prefix,
+    ContextPtr context,
+    const S3ObjectStorageSettings & settings)
 {
     S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
         config.getString(config_prefix + ".region", ""),
         context->getRemoteHostFilter(),
         static_cast<int>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
         context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
-        /* for_disk_s3 = */ true);
+        /* for_disk_s3 = */ true,
+        settings.request_settings.get_request_throttler,
+        settings.request_settings.put_request_throttler);
 
-    S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
+    S3::URI uri(config.getString(config_prefix + ".endpoint"));
     if (uri.key.back() != '/')
         throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h
index 05ba8819f83..04eb7aced8e 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.h
+++ b/src/Disks/ObjectStorages/S3/diskSettings.h
@@ -22,7 +22,7 @@ struct S3ObjectStorageSettings;
 
 std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
 
-std::unique_ptr<Aws::S3::S3Client> getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
+std::unique_ptr<Aws::S3::S3Client> getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings);
 
 }
 
diff --git a/src/Disks/ObjectStorages/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h
deleted file mode 100644
index 1defc673c2e..00000000000
--- a/src/Disks/ObjectStorages/S3/parseConfig.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <aws/core/client/DefaultRetryStrategy.h>
-#include <IO/S3Common.h>
-#include <Storages/StorageS3Settings.h>
-#include <Disks/ObjectStorages/S3/ProxyConfiguration.h>
-#include <Disks/ObjectStorages/S3/ProxyListConfiguration.h>
-#include <Disks/ObjectStorages/S3/ProxyResolverConfiguration.h>
-#include <Disks/DiskRestartProxy.h>
-#include <Disks/DiskLocal.h>
-#include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
-
-
-namespace DB
-{
-
-
-std::unique_ptr<DiskS3Settings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
-
-std::shared_ptr<Aws::S3::S3Client> getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
-
-
-}
diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
index e09aef22122..533a925aa1b 100644
--- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
+++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
@@ -22,6 +22,7 @@
 #include <IO/S3Common.h>
 
 #include <Storages/StorageS3Settings.h>
+#include <Core/ServerUUID.h>
 
 
 namespace DB
@@ -30,92 +31,80 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
-    extern const int PATH_ACCESS_DENIED;
+    extern const int LOGICAL_ERROR;
 }
 
 namespace
 {
 
-void checkWriteAccess(IDisk & disk)
+class CheckAccess
 {
-    auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
-    try
-    {
-        file->write("test", 4);
-    }
-    catch (...)
-    {
-        /// Log current exception, because finalize() can throw a different exception.
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-        file->finalize();
-        throw;
-    }
-}
-
-void checkReadAccess(const String & disk_name, IDisk & disk)
-{
-    auto file = disk.readFile("test_acl");
-    String buf(4, '0');
-    file->readStrict(buf.data(), 4);
-    if (buf != "test")
-        throw Exception("No read access to S3 bucket in disk " + disk_name, ErrorCodes::PATH_ACCESS_DENIED);
-}
-
-void checkRemoveAccess(IDisk & disk)
-{
-    disk.removeFile("test_acl");
-}
-
-bool checkBatchRemoveIsMissing(S3ObjectStorage & storage, const String & key_with_trailing_slash)
-{
-    StoredObject object(key_with_trailing_slash + "_test_remove_objects_capability");
-    try
-    {
-        auto file = storage.writeObject(object, WriteMode::Rewrite);
-        file->write("test", 4);
-        file->finalize();
-    }
-    catch (...)
+public:
+    static bool checkBatchRemove(S3ObjectStorage & storage, const String & key_with_trailing_slash)
     {
+        /// NOTE: key_with_trailing_slash is the disk prefix, it is required
+        /// because access is done via S3ObjectStorage not via IDisk interface
+        /// (since we don't have disk yet).
+        const String path = fmt::format("{}clickhouse_remove_objects_capability_{}", key_with_trailing_slash, getServerUUID());
+        StoredObject object(path);
         try
         {
-            storage.removeObject(object);
+            auto file = storage.writeObject(object, WriteMode::Rewrite);
+            file->write("test", 4);
+            file->finalize();
         }
         catch (...)
         {
+            try
+            {
+                storage.removeObject(object);
+            }
+            catch (...)
+            {
+            }
+            return true; /// We don't have write access, therefore no information about batch remove.
         }
-        return false; /// We don't have write access, therefore no information about batch remove.
-    }
-    try
-    {
-        /// Uses `DeleteObjects` request (batch delete).
-        storage.removeObjects({object});
-        return false;
-    }
-    catch (const Exception &)
-    {
         try
         {
-            storage.removeObject(object);
+            /// Uses `DeleteObjects` request (batch delete).
+            storage.removeObjects({object});
+            return true;
         }
-        catch (...)
+        catch (const Exception &)
         {
+            try
+            {
+                storage.removeObject(object);
+            }
+            catch (...)
+            {
+            }
+            return false;
         }
-        return true;
     }
-}
 
-}
-
-void registerDiskS3(DiskFactory & factory)
-{
-    auto creator = [](const String & name,
-                      const Poco::Util::AbstractConfiguration & config,
-                      const String & config_prefix,
-                      ContextPtr context,
-                      const DisksMap & /*map*/) -> DiskPtr
+private:
+    static String getServerUUID()
     {
-        S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
+        DB::UUID server_uuid = DB::ServerUUID::get();
+        if (server_uuid == DB::UUIDHelpers::Nil)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized");
+        return DB::toString(server_uuid);
+    }
+};
+
+}
+
+void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
+{
+    auto creator = [global_skip_access_check](
+        const String & name,
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix,
+        ContextPtr context,
+        const DisksMap & /*map*/) -> DiskPtr
+    {
+        S3::URI uri(config.getString(config_prefix + ".endpoint"));
 
         if (uri.key.empty())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString());
@@ -130,31 +119,26 @@ void registerDiskS3(DiskFactory & factory)
         chassert(type == "s3" || type == "s3_plain");
 
         MetadataStoragePtr metadata_storage;
+        auto settings = getSettings(config, config_prefix, context);
+        auto client = getClient(config, config_prefix, context, *settings);
         if (type == "s3_plain")
         {
-            s3_storage = std::make_shared<S3PlainObjectStorage>(
-                getClient(config, config_prefix, context),
-                getSettings(config, config_prefix, context),
-                uri.version_id, s3_capabilities, uri.bucket, uri.endpoint);
+            s3_storage = std::make_shared<S3PlainObjectStorage>(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint);
             metadata_storage = std::make_shared<MetadataStorageFromPlainObjectStorage>(s3_storage, uri.key);
         }
         else
         {
-            s3_storage = std::make_shared<S3ObjectStorage>(
-                getClient(config, config_prefix, context),
-                getSettings(config, config_prefix, context),
-                uri.version_id, s3_capabilities, uri.bucket, uri.endpoint);
-
+            s3_storage = std::make_shared<S3ObjectStorage>(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint);
             auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
             metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
         }
 
-        bool skip_access_check = config.getBool(config_prefix + ".skip_access_check", false);
-
+        /// NOTE: should we still perform this check for clickhouse-disks?
+        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
         if (!skip_access_check)
         {
             /// If `support_batch_delete` is turned on (default), check and possibly switch it off.
-            if (s3_capabilities.support_batch_delete && checkBatchRemoveIsMissing(*s3_storage, uri.key))
+            if (s3_capabilities.support_batch_delete && !CheckAccess::checkBatchRemove(*s3_storage, uri.key))
             {
                 LOG_WARNING(
                     &Poco::Logger::get("registerDiskS3"),
@@ -170,7 +154,7 @@ void registerDiskS3(DiskFactory & factory)
         bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);
         uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16);
 
-        std::shared_ptr<DiskObjectStorage> s3disk = std::make_shared<DiskObjectStorage>(
+        DiskObjectStoragePtr s3disk = std::make_shared<DiskObjectStorage>(
             name,
             uri.key,
             type == "s3" ? "DiskS3" : "DiskS3Plain",
@@ -179,15 +163,7 @@ void registerDiskS3(DiskFactory & factory)
             send_metadata,
             copy_thread_pool_size);
 
-        /// This code is used only to check access to the corresponding disk.
-        if (!skip_access_check)
-        {
-            checkWriteAccess(*s3disk);
-            checkReadAccess(name, *s3disk);
-            checkRemoveAccess(*s3disk);
-        }
-
-        s3disk->startup(context);
+        s3disk->startup(context, skip_access_check);
 
         std::shared_ptr<IDisk> disk_result = s3disk;
 
@@ -201,6 +177,6 @@ void registerDiskS3(DiskFactory & factory)
 
 #else
 
-void registerDiskS3(DiskFactory &) {}
+void registerDiskS3(DiskFactory &, bool /* global_skip_access_check */) {}
 
 #endif
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
index 12c2cd16a9f..ab5d86fd836 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
@@ -13,7 +13,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int FILE_DOESNT_EXIST;
-    extern const int NETWORK_ERROR;
 }
 
 MetadataStorageFromStaticFilesWebServer::MetadataStorageFromStaticFilesWebServer(
@@ -38,7 +37,7 @@ bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) c
     if (fs_path.has_extension())
         fs_path = fs_path.parent_path();
 
-    initializeIfNeeded(fs_path, false);
+    initializeIfNeeded(fs_path);
 
     if (object_storage.files.empty())
         return false;
@@ -123,39 +122,21 @@ std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(
     return result;
 }
 
-bool MetadataStorageFromStaticFilesWebServer::initializeIfNeeded(const std::string & path, std::optional<bool> throw_on_error) const
+void MetadataStorageFromStaticFilesWebServer::initializeIfNeeded(const std::string & path) const
 {
     if (object_storage.files.find(path) == object_storage.files.end())
     {
-        try
-        {
-            object_storage.initialize(fs::path(object_storage.url) / path);
-        }
-        catch (...)
-        {
-            const auto message = getCurrentExceptionMessage(false);
-            bool can_throw = throw_on_error.has_value() ? *throw_on_error : CurrentThread::isInitialized() && CurrentThread::get().getQueryContext();
-            if (can_throw)
-                throw Exception(ErrorCodes::NETWORK_ERROR, "Cannot load disk metadata. Error: {}", message);
-
-            LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
-            return false;
-        }
+        object_storage.initialize(fs::path(object_storage.url) / path);
     }
-
-    return true;
 }
 
 DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(const std::string & path) const
 {
     std::vector<fs::path> dir_file_paths;
 
-    if (!initializeIfNeeded(path))
-    {
+    initializeIfNeeded(path);
+    if (!exists(path))
         return std::make_unique<StaticDirectoryIterator>(std::move(dir_file_paths));
-    }
-
-    assertExists(path);
 
     for (const auto & [file_path, _] : object_storage.files)
     {
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
index 338a2690b8f..6a7c8128b4a 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
@@ -19,7 +19,7 @@ private:
 
     void assertExists(const std::string & path) const;
 
-    bool initializeIfNeeded(const std::string & path, std::optional<bool> throw_on_error = std::nullopt) const;
+    void initializeIfNeeded(const std::string & path) const;
 
 public:
     explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_);
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index f97409cfc6c..ecd8b0b4501 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -46,7 +46,10 @@ void WebObjectStorage::initialize(const String & uri_path) const
             Poco::Net::HTTPRequest::HTTP_GET,
             ReadWriteBufferFromHTTP::OutStreamCallback(),
             ConnectionTimeouts::getHTTPTimeouts(getContext()),
-            credentials);
+            credentials,
+            /* max_redirects= */ 0,
+            /* buffer_size_= */ DBMS_DEFAULT_BUFFER_SIZE,
+            getContext()->getReadSettings());
 
         String file_name;
         FileData file_data{};
@@ -82,6 +85,15 @@ void WebObjectStorage::initialize(const String & uri_path) const
 
         files.emplace(std::make_pair(dir_name, FileData({ .type = FileType::Directory })));
     }
+    catch (HTTPException & e)
+    {
+        /// 404 - no files
+        if (e.getHTTPStatus() == Poco::Net::HTTPResponse::HTTP_NOT_FOUND)
+            return;
+
+        e.addMessage("while loading disk metadata");
+        throw;
+    }
     catch (Exception & e)
     {
         e.addMessage("while loading disk metadata");
diff --git a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp
index 5ef3fad4a0a..253d32ceb14 100644
--- a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp
+++ b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp
@@ -14,15 +14,17 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-void registerDiskWebServer(DiskFactory & factory)
+void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check)
 {
-    auto creator = [](const String & disk_name,
-                      const Poco::Util::AbstractConfiguration & config,
-                      const String & config_prefix,
-                      ContextPtr context,
-                      const DisksMap & /*map*/) -> DiskPtr
+    auto creator = [global_skip_access_check](
+        const String & disk_name,
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix,
+        ContextPtr context,
+        const DisksMap & /*map*/) -> DiskPtr
     {
         String uri{config.getString(config_prefix + ".endpoint")};
+        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
 
         if (!uri.ends_with('/'))
             throw Exception(
@@ -41,7 +43,7 @@ void registerDiskWebServer(DiskFactory & factory)
         auto metadata_storage = std::make_shared<MetadataStorageFromStaticFilesWebServer>(assert_cast<const WebObjectStorage &>(*object_storage));
         std::string root_path;
 
-        return std::make_shared<DiskObjectStorage>(
+        DiskPtr disk = std::make_shared<DiskObjectStorage>(
             disk_name,
             root_path,
             "DiskWebServer",
@@ -49,6 +51,8 @@ void registerDiskWebServer(DiskFactory & factory)
             object_storage,
             /* send_metadata */false,
             /* threadpool_size */16);
+        disk->startup(context, skip_access_check);
+        return disk;
     };
 
     factory.registerDiskType("web", creator);
diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp
index 54ad74d47b5..bc677438243 100644
--- a/src/Disks/registerDisks.cpp
+++ b/src/Disks/registerDisks.cpp
@@ -7,55 +7,55 @@
 namespace DB
 {
 
-void registerDiskLocal(DiskFactory & factory);
-void registerDiskMemory(DiskFactory & factory);
+void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check);
+void registerDiskMemory(DiskFactory & factory, bool global_skip_access_check);
 
 #if USE_AWS_S3
-void registerDiskS3(DiskFactory & factory);
+void registerDiskS3(DiskFactory & factory, bool global_skip_access_check);
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-void registerDiskAzureBlobStorage(DiskFactory & factory);
+void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access_check);
 #endif
 
 #if USE_SSL
-void registerDiskEncrypted(DiskFactory & factory);
+void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check);
 #endif
 
 #if USE_HDFS
-void registerDiskHDFS(DiskFactory & factory);
+void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check);
 #endif
 
-void registerDiskWebServer(DiskFactory & factory);
+void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check);
 
-void registerDiskCache(DiskFactory & factory);
+void registerDiskCache(DiskFactory & factory, bool global_skip_access_check);
 
-void registerDisks()
+void registerDisks(bool global_skip_access_check)
 {
     auto & factory = DiskFactory::instance();
 
-    registerDiskLocal(factory);
-    registerDiskMemory(factory);
+    registerDiskLocal(factory, global_skip_access_check);
+    registerDiskMemory(factory, global_skip_access_check);
 
 #if USE_AWS_S3
-    registerDiskS3(factory);
+    registerDiskS3(factory, global_skip_access_check);
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-    registerDiskAzureBlobStorage(factory);
+    registerDiskAzureBlobStorage(factory, global_skip_access_check);
 #endif
 
 #if USE_SSL
-    registerDiskEncrypted(factory);
+    registerDiskEncrypted(factory, global_skip_access_check);
 #endif
 
 #if USE_HDFS
-    registerDiskHDFS(factory);
+    registerDiskHDFS(factory, global_skip_access_check);
 #endif
 
-    registerDiskWebServer(factory);
+    registerDiskWebServer(factory, global_skip_access_check);
 
-    registerDiskCache(factory);
+    registerDiskCache(factory, global_skip_access_check);
 }
 
 }
diff --git a/src/Disks/registerDisks.h b/src/Disks/registerDisks.h
index 8c68cc52bde..1658f18f86b 100644
--- a/src/Disks/registerDisks.h
+++ b/src/Disks/registerDisks.h
@@ -2,5 +2,10 @@
 
 namespace DB
 {
-void registerDisks();
+
+/// @param global_skip_access_check - skip access check regardless regardless
+///                                   .skip_access_check config directive (used
+///                                   for clickhouse-disks)
+void registerDisks(bool global_skip_access_check);
+
 }
diff --git a/src/Formats/BSONTypes.cpp b/src/Formats/BSONTypes.cpp
new file mode 100644
index 00000000000..813c155325a
--- /dev/null
+++ b/src/Formats/BSONTypes.cpp
@@ -0,0 +1,106 @@
+#include <Formats/BSONTypes.h>
+#include <Common/Exception.h>
+#include <Common/hex.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_TYPE;
+}
+
+static std::string byteToHexString(uint8_t byte)
+{
+    return "0x" + getHexUIntUppercase(byte);
+}
+
+BSONType getBSONType(uint8_t value)
+{
+    if ((value >= 0x01 && value <= 0x13) || value == 0xFF || value == 0x7f)
+        return BSONType(value);
+
+    throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown BSON type: {}", byteToHexString(value));
+}
+
+BSONBinarySubtype getBSONBinarySubtype(uint8_t value)
+{
+    if (value <= 0x07)
+        return BSONBinarySubtype(value);
+
+    throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown BSON binary subtype: {}", byteToHexString(value));
+}
+
+std::string getBSONTypeName(BSONType type)
+{
+    switch (type)
+    {
+        case BSONType::BINARY:
+            return "Binary";
+        case BSONType::SYMBOL:
+            return "Symbol";
+        case BSONType::ARRAY:
+            return "Array";
+        case BSONType::DOCUMENT:
+            return "Document";
+        case BSONType::TIMESTAMP:
+            return "Timestamp";
+        case BSONType::INT64:
+            return "Int64";
+        case BSONType::INT32:
+            return "Int32";
+        case BSONType::BOOL:
+            return "Bool";
+        case BSONType::DOUBLE:
+            return "Double";
+        case BSONType::STRING:
+            return "String";
+        case BSONType::DECIMAL128:
+            return "Decimal128";
+        case BSONType::JAVA_SCRIPT_CODE_W_SCOPE:
+            return "JavaScript code w/ scope";
+        case BSONType::JAVA_SCRIPT_CODE:
+            return "JavaScript code";
+        case BSONType::DB_POINTER:
+            return "DBPointer";
+        case BSONType::REGEXP:
+            return "Regexp";
+        case BSONType::DATETIME:
+            return "Datetime";
+        case BSONType::OBJECT_ID:
+            return "ObjectId";
+        case BSONType::UNDEFINED:
+            return "Undefined";
+        case BSONType::NULL_VALUE:
+            return "Null";
+        case BSONType::MAX_KEY:
+            return "Max key";
+        case BSONType::MIN_KEY:
+            return "Min key";
+    }
+}
+
+std::string getBSONBinarySubtypeName(BSONBinarySubtype subtype)
+{
+    switch (subtype)
+    {
+        case BSONBinarySubtype::BINARY:
+            return "Binary";
+        case BSONBinarySubtype::FUNCTION:
+            return "Function";
+        case BSONBinarySubtype::BINARY_OLD:
+            return "Binary (Old)";
+        case BSONBinarySubtype::UUID_OLD:
+            return "UUID (Old)";
+        case BSONBinarySubtype::UUID:
+            return "UUID";
+        case BSONBinarySubtype::MD5:
+            return "MD5";
+        case BSONBinarySubtype::ENCRYPTED_BSON_VALUE:
+            return "Encrypted BSON value";
+        case BSONBinarySubtype::COMPRESSED_BSON_COLUMN:
+            return "Compressed BSON column";
+    }
+}
+
+}
diff --git a/src/Formats/BSONTypes.h b/src/Formats/BSONTypes.h
new file mode 100644
index 00000000000..2d20cdae698
--- /dev/null
+++ b/src/Formats/BSONTypes.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+namespace DB
+{
+
+static const uint8_t BSON_DOCUMENT_END = 0x00;
+using BSONSizeT = uint32_t;
+static const BSONSizeT MAX_BSON_SIZE = std::numeric_limits<BSONSizeT>::max();
+
+/// See details on https://bsonspec.org/spec.html
+enum class BSONType
+{
+    DOUBLE = 0x01,
+    STRING = 0x02,
+    DOCUMENT = 0x03,
+    ARRAY = 0x04,
+    BINARY = 0x05,
+    UNDEFINED = 0x06,
+    OBJECT_ID = 0x07,
+    BOOL = 0x08,
+    DATETIME = 0x09,
+    NULL_VALUE = 0x0A,
+    REGEXP = 0x0B,
+    DB_POINTER = 0x0C,
+    JAVA_SCRIPT_CODE = 0x0D,
+    SYMBOL = 0x0E,
+    JAVA_SCRIPT_CODE_W_SCOPE = 0x0F,
+    INT32 = 0x10,
+    TIMESTAMP = 0x11,
+    INT64 = 0x12,
+    DECIMAL128 = 0x13,
+    MIN_KEY = 0xFF,
+    MAX_KEY = 0x7F,
+};
+
+enum class BSONBinarySubtype
+{
+    BINARY = 0x00,
+    FUNCTION = 0x01,
+    BINARY_OLD = 0x02,
+    UUID_OLD = 0x03,
+    UUID = 0x04,
+    MD5 = 0x05,
+    ENCRYPTED_BSON_VALUE = 0x06,
+    COMPRESSED_BSON_COLUMN = 0x07,
+};
+
+BSONType getBSONType(uint8_t value);
+std::string getBSONTypeName(BSONType type);
+
+BSONBinarySubtype getBSONBinarySubtype(uint8_t value);
+std::string getBSONBinarySubtypeName(BSONBinarySubtype subtype);
+
+}
diff --git a/src/Formats/ColumnMapping.cpp b/src/Formats/ColumnMapping.cpp
index 8704619e477..b9285a3bc09 100644
--- a/src/Formats/ColumnMapping.cpp
+++ b/src/Formats/ColumnMapping.cpp
@@ -18,7 +18,7 @@ void ColumnMapping::setupByHeader(const Block & header)
 }
 
 void ColumnMapping::addColumns(
-    const Names & column_names, const std::unordered_map<String, size_t> & column_indexes_by_names, const FormatSettings & settings)
+    const Names & column_names, const Block::NameMap & column_indexes_by_names, const FormatSettings & settings)
 {
     std::vector<bool> read_columns(column_indexes_by_names.size(), false);
 
@@ -26,8 +26,8 @@ void ColumnMapping::addColumns(
     {
         names_of_columns.push_back(name);
 
-        const auto column_it = column_indexes_by_names.find(name);
-        if (column_it == column_indexes_by_names.end())
+        const auto * column_it = column_indexes_by_names.find(name);
+        if (!column_it)
         {
             if (settings.skip_unknown_fields)
             {
@@ -41,7 +41,7 @@ void ColumnMapping::addColumns(
                 name, column_indexes_for_input_fields.size());
         }
 
-        const auto column_index = column_it->second;
+        const auto column_index = column_it->getMapped();
 
         if (read_columns[column_index])
             throw Exception("Duplicate field found while parsing format header: " + name, ErrorCodes::INCORRECT_DATA);
diff --git a/src/Formats/ColumnMapping.h b/src/Formats/ColumnMapping.h
index c0f2d459924..c20e598580f 100644
--- a/src/Formats/ColumnMapping.h
+++ b/src/Formats/ColumnMapping.h
@@ -28,7 +28,7 @@ struct ColumnMapping
     void setupByHeader(const Block & header);
 
     void addColumns(
-        const Names & column_names, const std::unordered_map<String, size_t> & column_indexes_by_names, const FormatSettings & settings);
+        const Names & column_names, const Block::NameMap & column_indexes_by_names, const FormatSettings & settings);
 
     void insertDefaultsForNotSeenColumns(MutableColumns & columns, std::vector<UInt8> & read_columns);
 };
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index bba94e98e49..2414d8c5ba4 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -834,17 +834,23 @@ DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::E
     return data_types;
 }
 
+String getAdditionalFormatInfoForAllRowBasedFormats(const FormatSettings & settings)
+{
+    return fmt::format(
+        "schema_inference_hints={}, max_rows_to_read_for_schema_inference={}",
+        settings.schema_inference_hints,
+        settings.max_rows_to_read_for_schema_inference);
+}
+
 String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule)
 {
-    String result;
+    String result = getAdditionalFormatInfoForAllRowBasedFormats(settings);
     /// First, settings that are common for all text formats:
-    result = fmt::format(
-        "schema_inference_hints={}, try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}, max_rows_to_read_for_schema_inference={}",
-        settings.schema_inference_hints,
+    result += fmt::format(
+        ", try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}",
         settings.try_infer_integers,
         settings.try_infer_dates,
-        settings.try_infer_datetimes,
-        settings.max_rows_to_read_for_schema_inference);
+        settings.try_infer_datetimes);
 
     /// Second, format-specific settings:
     switch (escaping_rule)
diff --git a/src/Formats/EscapingRuleUtils.h b/src/Formats/EscapingRuleUtils.h
index c8b710002a5..a7d920ec374 100644
--- a/src/Formats/EscapingRuleUtils.h
+++ b/src/Formats/EscapingRuleUtils.h
@@ -77,6 +77,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c
 void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set<const IDataType *> * numbers_parsed_from_json_strings = nullptr);
 void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings);
 
+String getAdditionalFormatInfoForAllRowBasedFormats(const FormatSettings & settings);
 String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule);
 
 void checkSupportedDelimiterAfterField(FormatSettings::EscapingRule escaping_rule, const String & delimiter, const DataTypePtr & type);
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index a882fcf5009..9c54a3526db 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -13,6 +13,7 @@
 #include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
 #include <Poco/URI.h>
 #include <Common/Exception.h>
+#include <Common/KnownObjectNames.h>
 #include <fcntl.h>
 #include <unistd.h>
 
@@ -177,6 +178,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.try_infer_integers = settings.input_format_try_infer_integers;
     format_settings.try_infer_dates = settings.input_format_try_infer_dates;
     format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes;
+    format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string;
+    format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference;
 
     /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
     if (format_settings.schema.is_server)
@@ -445,6 +448,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_
         throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
     target = std::move(input_creator);
     registerFileExtension(name, name);
+    KnownFormatNames::instance().add(name);
 }
 
 void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker)
@@ -483,6 +487,7 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp
         throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
     target = std::move(output_creator);
     registerFileExtension(name, name);
+    KnownFormatNames::instance().add(name);
 }
 
 void FormatFactory::registerFileExtension(const String & extension, const String & format_name)
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 3ff227c5b56..c7c9bfc816c 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -303,6 +303,12 @@ struct FormatSettings
         bool use_replace = false;
         bool quote_names = true;
     } sql_insert;
+
+    struct
+    {
+        bool output_string_as_string;
+        bool skip_fields_with_unsupported_types_in_schema_inference;
+    } bson;
 };
 
 }
diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 020a7b32403..926e3478ad8 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -231,7 +231,14 @@ namespace JSONUtils
             {
                 auto type = getDataTypeFromFieldImpl(key_value_pair.second, settings, numbers_parsed_from_json_strings);
                 if (!type)
+                {
+                    /// If we couldn't infer nested type and Object type is not enabled,
+                    /// we can't determine the type of this JSON field.
+                    if (!settings.json.try_infer_objects)
+                        return nullptr;
+
                     continue;
+                }
 
                 if (settings.json.try_infer_objects && isObject(type))
                     return std::make_shared<DataTypeObject>("json", true);
diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp
index ba40fe442ab..285e234167b 100644
--- a/src/Formats/registerFormats.cpp
+++ b/src/Formats/registerFormats.cpp
@@ -19,6 +19,7 @@ void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory);
 void registerFileSegmentationEngineHiveText(FormatFactory & factory);
 #endif
 void registerFileSegmentationEngineLineAsString(FormatFactory & factory);
+void registerFileSegmentationEngineBSONEachRow(FormatFactory & factory);
 
 /// Formats for both input/output.
 
@@ -49,6 +50,8 @@ void registerInputFormatJSONColumns(FormatFactory & factory);
 void registerOutputFormatJSONColumns(FormatFactory & factory);
 void registerInputFormatJSONCompactColumns(FormatFactory & factory);
 void registerOutputFormatJSONCompactColumns(FormatFactory & factory);
+void registerInputFormatBSONEachRow(FormatFactory & factory);
+void registerOutputFormatBSONEachRow(FormatFactory & factory);
 void registerInputFormatJSONColumnsWithMetadata(FormatFactory & factory);
 void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory);
 void registerInputFormatProtobuf(FormatFactory & factory);
@@ -136,7 +139,7 @@ void registerTSKVSchemaReader(FormatFactory & factory);
 void registerValuesSchemaReader(FormatFactory & factory);
 void registerTemplateSchemaReader(FormatFactory & factory);
 void registerMySQLSchemaReader(FormatFactory & factory);
-
+void registerBSONEachRowSchemaReader(FormatFactory & factory);
 
 void registerFileExtensions(FormatFactory & factory);
 
@@ -155,6 +158,7 @@ void registerFormats()
     registerFileSegmentationEngineHiveText(factory);
 #endif
     registerFileSegmentationEngineLineAsString(factory);
+    registerFileSegmentationEngineBSONEachRow(factory);
 
 
     registerInputFormatNative(factory);
@@ -184,6 +188,8 @@ void registerFormats()
     registerOutputFormatJSONColumns(factory);
     registerInputFormatJSONCompactColumns(factory);
     registerOutputFormatJSONCompactColumns(factory);
+    registerInputFormatBSONEachRow(factory);
+    registerOutputFormatBSONEachRow(factory);
     registerInputFormatJSONColumnsWithMetadata(factory);
     registerOutputFormatJSONColumnsWithMetadata(factory);
     registerInputFormatProtobuf(factory);
@@ -267,6 +273,7 @@ void registerFormats()
     registerValuesSchemaReader(factory);
     registerTemplateSchemaReader(factory);
     registerMySQLSchemaReader(factory);
+    registerBSONEachRowSchemaReader(factory);
 }
 
 }
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index c84e23da85b..93374f933b7 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -29,9 +29,9 @@ list (APPEND PRIVATE_LIBS
         ch_contrib::zlib
         boost::filesystem
         divide_impl
+        ch_contrib::xxHash
 )
 
-
 if (TARGET ch_rust::blake3)
     list (APPEND PUBLIC_LIBS
         ch_rust::blake3
@@ -66,8 +66,6 @@ if (TARGET ch_contrib::base64)
     list (APPEND PRIVATE_LIBS ch_contrib::base64)
 endif()
 
-list (APPEND PRIVATE_LIBS ch_contrib::lz4)
-
 if (ENABLE_NLP)
     list (APPEND PRIVATE_LIBS ch_contrib::cld2)
 endif()
diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp
index 775696ded8a..187d82981db 100644
--- a/src/Functions/FunctionsBinaryRepresentation.cpp
+++ b/src/Functions/FunctionsBinaryRepresentation.cpp
@@ -566,7 +566,8 @@ public:
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
-        if (!isString(arguments[0]))
+        WhichDataType which(arguments[0]);
+        if (!which.isStringOrFixedString())
             throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
                             ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
@@ -612,6 +613,39 @@ public:
 
             return col_res;
         }
+        else if (const ColumnFixedString * col_fix_string = checkAndGetColumn<ColumnFixedString>(column.get()))
+        {
+            auto col_res = ColumnString::create();
+
+            ColumnString::Chars & out_vec = col_res->getChars();
+            ColumnString::Offsets & out_offsets = col_res->getOffsets();
+
+            const ColumnString::Chars & in_vec = col_fix_string->getChars();
+            size_t n = col_fix_string->getN();
+
+            size_t size = col_fix_string->size();
+            out_offsets.resize(size);
+            out_vec.resize(in_vec.size() / word_size + size);
+
+            char * begin = reinterpret_cast<char *>(out_vec.data());
+            char * pos = begin;
+            size_t prev_offset = 0;
+
+            for (size_t i = 0; i < size; ++i)
+            {
+                size_t new_offset = prev_offset + n;
+
+                Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset]), pos);
+
+                out_offsets[i] = pos - begin;
+
+                prev_offset = new_offset;
+            }
+
+            out_vec.resize(pos - begin);
+
+            return col_res;
+        }
         else
         {
             throw Exception("Illegal column " + arguments[0].column->getName()
diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp
index eaf62e232f7..3fea5e9d898 100644
--- a/src/Functions/FunctionsCodingIP.cpp
+++ b/src/Functions/FunctionsCodingIP.cpp
@@ -232,8 +232,8 @@ public:
 private:
     static bool isIPv4Mapped(const UInt8 * address)
     {
-        return (unalignedLoad<UInt64>(address) == 0) &&
-               ((unalignedLoad<UInt64>(address + 8) & 0x00000000FFFFFFFFull) == 0x00000000FFFF0000ull);
+        return (unalignedLoadLE<UInt64>(address) == 0) &&
+               ((unalignedLoadLE<UInt64>(address + 8) & 0x00000000FFFFFFFFull) == 0x00000000FFFF0000ull);
     }
 
     static void cutAddress(const unsigned char * address, char *& dst, UInt8 zeroed_tail_bytes_count)
@@ -514,7 +514,11 @@ private:
     static void mapIPv4ToIPv6(UInt32 in, UInt8 * buf)
     {
         unalignedStore<UInt64>(buf, 0);
-        unalignedStore<UInt64>(buf + 8, 0x00000000FFFF0000ull | (static_cast<UInt64>(ntohl(in)) << 32));
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+            unalignedStoreLE<UInt64>(buf + 8, 0x00000000FFFF0000ull | (static_cast<UInt64>(ntohl(in)) << 32));
+#else
+            unalignedStoreLE<UInt64>(buf + 8, 0x00000000FFFF0000ull | (static_cast<UInt64>(__builtin_bswap32(ntohl(in))) << 32));
+#endif
     }
 };
 
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 208da8a78fe..d7be0ecf701 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -2297,6 +2297,10 @@ struct ToStringMonotonicity
         if (const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(type_ptr))
             type_ptr = low_cardinality_type->getDictionaryType().get();
 
+        /// Order on enum values (which is the order on integers) is completely arbitrary in respect to the order on strings.
+        if (WhichDataType(type).isEnum())
+            return not_monotonic;
+
         /// `toString` function is monotonous if the argument is Date or Date32 or DateTime or String, or non-negative numbers with the same number of symbols.
         if (checkDataTypes<DataTypeDate, DataTypeDate32, DataTypeDateTime, DataTypeString>(type_ptr))
             return positive;
diff --git a/src/Functions/FunctionsDecimalArithmetics.cpp b/src/Functions/FunctionsDecimalArithmetics.cpp
new file mode 100644
index 00000000000..f275f169914
--- /dev/null
+++ b/src/Functions/FunctionsDecimalArithmetics.cpp
@@ -0,0 +1,17 @@
+#include <Functions/FunctionsDecimalArithmetics.h>
+#include <Functions/FunctionFactory.h>
+
+namespace DB
+{
+REGISTER_FUNCTION(DivideDecimals)
+{
+    factory.registerFunction<FunctionsDecimalArithmetics<DivideDecimalsImpl>>(Documentation(
+        "Decimal division with given precision. Slower than simple `divide`, but has controlled precision and no sound overflows"));
+}
+
+REGISTER_FUNCTION(MultiplyDecimals)
+{
+    factory.registerFunction<FunctionsDecimalArithmetics<MultiplyDecimalsImpl>>(Documentation(
+        "Decimal multiplication with given precision. Slower than simple `divide`, but has controlled precision and no sound overflows"));
+}
+}
diff --git a/src/Functions/FunctionsDecimalArithmetics.h b/src/Functions/FunctionsDecimalArithmetics.h
new file mode 100644
index 00000000000..9806d13ed30
--- /dev/null
+++ b/src/Functions/FunctionsDecimalArithmetics.h
@@ -0,0 +1,457 @@
+#pragma once
+#include <type_traits>
+#include <Core/AccurateComparison.h>
+
+#include <DataTypes/DataTypesDecimal.h>
+#include <Columns/ColumnsNumber.h>
+#include <Functions/IFunction.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/castTypeToEither.h>
+#include <IO/WriteHelpers.h>
+
+#include <Common/logger_useful.h>
+#include <Poco/Logger.h>
+#include <Loggers/Loggers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int DECIMAL_OVERFLOW;
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_DIVISION;
+}
+
+
+struct DecimalOpHelpers
+{
+    /* These functions perform main arithmetic logic.
+     * As soon as intermediate results may not fit Decimal256 (e.g. 1e36, scale 10),
+     * we may not operate with Decimals. Later on this big number may be shrunk (e.g. result scale is 0 in the case above).
+     * That's why we need to store intermediate results in a flexible extendable storage (here we use std::vector)
+     * Here we operate on numbers using simple digit arithmetic.
+     * This is the reason these functions are slower than traditional ones.
+     *
+     * Here and below we use UInt8 for storing digits (0-9 range with maximum carry of 9 will definitely fit this)
+     */
+    static std::vector<UInt8> multiply(const std::vector<UInt8> & num1, const std::vector<UInt8> & num2)
+    {
+        UInt16 const len1 = num1.size();
+        UInt16 const len2 = num2.size();
+        if (len1 == 0 || len2 == 0)
+            return {0};
+
+        std::vector<UInt8> result(len1 + len2, 0);
+        UInt16 i_n1 = 0;
+        UInt16 i_n2;
+
+        for (Int32 i = len1 - 1; i >= 0; --i)
+        {
+            UInt16 carry = 0;
+            i_n2 = 0;
+            for (Int32 j = len2 - 1; j >= 0; --j)
+            {
+                if (unlikely(i_n1 + i_n2 >= len1 + len2))
+                    throw DB::Exception("Numeric overflow: result bigger that Decimal256", ErrorCodes::DECIMAL_OVERFLOW);
+                UInt16 sum = num1[i] * num2[j] + result[i_n1 + i_n2] + carry;
+                carry = sum / 10;
+                result[i_n1 + i_n2] = sum % 10;
+                ++i_n2;
+            }
+
+            if (carry > 0)
+            {
+                if (unlikely(i_n1 + i_n2 >= len1 + len2))
+                    throw DB::Exception("Numeric overflow: result bigger that Decimal256", ErrorCodes::DECIMAL_OVERFLOW);
+                result[i_n1 + i_n2] += carry;
+            }
+
+            ++i_n1;
+        }
+
+        // Maximum Int32 value exceeds 2 billion, we can safely use it for array length storing
+        Int32 i = static_cast<Int32>(result.size() - 1);
+
+        while (i >= 0 && result[i] == 0)
+        {
+            result.pop_back();
+            --i;
+        }
+        if (i == -1)
+            return {0};
+
+        std::reverse(result.begin(), result.end());
+        return result;
+    }
+
+    static std::vector<UInt8> divide(const std::vector<UInt8> & number, const Int256 & divisor)
+    {
+        std::vector<UInt8> result;
+        const auto max_index = number.size() - 1;
+
+        UInt16 idx = 0;
+        Int256 temp = 0;
+
+        while (temp < divisor && max_index > idx)
+        {
+            temp = temp * 10 + number[idx];
+            ++idx;
+        }
+
+        if (unlikely(temp == 0))
+            return {0};
+
+        while (max_index >= idx)
+        {
+            result.push_back(temp / divisor);
+            temp = (temp % divisor) * 10 + number[idx];
+            ++idx;
+        }
+        result.push_back(temp / divisor);
+
+        return result;
+    }
+
+    static std::vector<UInt8> toDigits(Int256 x)
+    {
+        std::vector<UInt8> result;
+        if (x >= 10)
+            result = toDigits(x / 10);
+
+        result.push_back(x % 10);
+        return result;
+    }
+
+    static UInt256 fromDigits(const std::vector<UInt8> & digits)
+    {
+        Int256 result = 0;
+        Int256 scale = 0;
+        for (auto i = digits.rbegin(); i != digits.rend(); ++i)
+        {
+            result += DecimalUtils::scaleMultiplier<Decimal256>(scale) * (*i);
+            ++scale;
+        }
+        return result;
+    }
+};
+
+
+struct DivideDecimalsImpl
+{
+    static constexpr auto name = "divideDecimal";
+
+    template <typename FirstType, typename SecondType>
+    static inline Decimal256
+    execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale)
+    {
+        if (b.value == 0)
+            throw DB::Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION);
+        if (a.value == 0)
+            return Decimal256(0);
+
+        Int256 sign_a = a.value < 0 ? -1 : 1;
+        Int256 sign_b = b.value < 0 ? -1 : 1;
+
+        std::vector<UInt8> a_digits = DecimalOpHelpers::toDigits(a.value * sign_a);
+
+        while (scale_a < scale_b + result_scale)
+        {
+            a_digits.push_back(0);
+            ++scale_a;
+        }
+
+        while (scale_a > scale_b + result_scale && !a_digits.empty())
+        {
+            a_digits.pop_back();
+            --scale_a;
+        }
+
+        if (a_digits.empty())
+            return Decimal256(0);
+
+        std::vector<UInt8> divided = DecimalOpHelpers::divide(a_digits, b.value * sign_b);
+
+        if (divided.size() > DecimalUtils::max_precision<Decimal256>)
+            throw DB::Exception("Numeric overflow: result bigger that Decimal256", ErrorCodes::DECIMAL_OVERFLOW);
+        return Decimal256(sign_a * sign_b * DecimalOpHelpers::fromDigits(divided));
+    }
+};
+
+
+struct MultiplyDecimalsImpl
+{
+    static constexpr auto name = "multiplyDecimal";
+
+    template <typename FirstType, typename SecondType>
+    static inline Decimal256
+    execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale)
+    {
+        if (a.value == 0 || b.value == 0)
+            return Decimal256(0);
+
+        Int256 sign_a = a.value < 0 ? -1 : 1;
+        Int256 sign_b = b.value < 0 ? -1 : 1;
+
+        std::vector<UInt8> a_digits = DecimalOpHelpers::toDigits(a.value * sign_a);
+        std::vector<UInt8> b_digits = DecimalOpHelpers::toDigits(b.value * sign_b);
+
+        std::vector<UInt8> multiplied = DecimalOpHelpers::multiply(a_digits, b_digits);
+
+        UInt16 product_scale = scale_a + scale_b;
+        while (product_scale < result_scale)
+        {
+            multiplied.push_back(0);
+            ++product_scale;
+        }
+
+        while (product_scale > result_scale&& !multiplied.empty())
+        {
+            multiplied.pop_back();
+            --product_scale;
+        }
+
+        if (multiplied.empty())
+            return Decimal256(0);
+
+        if (multiplied.size() > DecimalUtils::max_precision<Decimal256>)
+            throw DB::Exception("Numeric overflow: result bigger that Decimal256", ErrorCodes::DECIMAL_OVERFLOW);
+
+        return Decimal256(sign_a * sign_b * DecimalOpHelpers::fromDigits(multiplied));
+    }
+};
+
+
+template <typename ResultType, typename Transform>
+struct Processor
+{
+    const Transform transform;
+
+    explicit Processor(Transform transform_)
+        : transform(std::move(transform_))
+    {}
+
+    template <typename FirstArgVectorType, typename SecondArgType>
+    void NO_INLINE
+    vectorConstant(const FirstArgVectorType & vec_first, const SecondArgType second_value,
+                   PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
+    {
+        size_t size = vec_first.size();
+        vec_to.resize(size);
+
+        for (size_t i = 0; i < size; ++i)
+            vec_to[i] = transform.execute(vec_first[i], second_value, scale_a, scale_b, result_scale);
+    }
+
+    template <typename FirstArgVectorType, typename SecondArgVectorType>
+    void NO_INLINE
+    vectorVector(const FirstArgVectorType & vec_first, const SecondArgVectorType & vec_second,
+                 PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
+    {
+        size_t size = vec_first.size();
+        vec_to.resize(size);
+
+        for (size_t i = 0; i < size; ++i)
+            vec_to[i] = transform.execute(vec_first[i], vec_second[i], scale_a, scale_b, result_scale);
+    }
+
+    template <typename FirstArgType, typename SecondArgVectorType>
+    void NO_INLINE
+    constantVector(const FirstArgType & first_value, const SecondArgVectorType & vec_second,
+                   PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
+    {
+        size_t size = vec_second.size();
+        vec_to.resize(size);
+
+        for (size_t i = 0; i < size; ++i)
+            vec_to[i] = transform.execute(first_value, vec_second[i], scale_a, scale_b, result_scale);
+    }
+};
+
+
+template <typename FirstArgType, typename SecondArgType, typename ResultType, typename Transform>
+struct DecimalArithmeticsImpl
+{
+    static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type)
+    {
+        using FirstArgValueType = typename FirstArgType::FieldType;
+        using FirstArgColumnType = typename FirstArgType::ColumnType;
+        using SecondArgValueType = typename SecondArgType::FieldType;
+        using SecondArgColumnType = typename SecondArgType::ColumnType;
+        using ResultColumnType = typename ResultType::ColumnType;
+
+        UInt16 scale_a = getDecimalScale(*arguments[0].type);
+        UInt16 scale_b = getDecimalScale(*arguments[1].type);
+        UInt16 result_scale = getDecimalScale(*result_type->getPtr());
+
+        auto op = Processor<ResultType, Transform>{std::move(transform)};
+
+        auto result_col = result_type->createColumn();
+        auto col_to = assert_cast<ResultColumnType *>(result_col.get());
+
+        const auto * first_col = checkAndGetColumn<FirstArgColumnType>(arguments[0].column.get());
+        const auto * second_col = checkAndGetColumn<SecondArgColumnType>(arguments[1].column.get());
+        const auto * first_col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
+        const auto * second_col_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
+
+        if (first_col)
+        {
+            if (second_col_const)
+                op.vectorConstant(first_col->getData(), second_col_const->template getValue<SecondArgValueType>(), col_to->getData(), scale_a, scale_b, result_scale);
+            else
+                op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale);
+        }
+        else if (first_col_const)
+        {
+            op.constantVector(first_col_const->template getValue<FirstArgValueType>(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale);
+        }
+        else
+        {
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
+                            arguments[0].column->getName(), Transform::name);
+        }
+
+        return result_col;
+    }
+};
+
+
+template <typename Transform>
+class FunctionsDecimalArithmetics : public IFunction
+{
+public:
+    static constexpr auto name = Transform::name;
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionsDecimalArithmetics>(); }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    bool isVariadic() const override { return true; }
+    size_t getNumberOfArguments() const override { return 0; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (arguments.size() != 2 && arguments.size() != 3)
+            throw Exception("Number of arguments for function " + getName() + " does not match: 2 or 3 expected",
+                            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        if (!isDecimal(arguments[0].type) || !isDecimal(arguments[1].type))
+            throw Exception("Arguments for " + getName() + " function must be Decimal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        UInt8 scale = std::max(getDecimalScale(*arguments[0].type->getPtr()), getDecimalScale(*arguments[1].type->getPtr()));
+
+        if (arguments.size() == 3)
+        {
+            WhichDataType which_scale(arguments[2].type.get());
+
+            if (!which_scale.isUInt8())
+                throw Exception(
+                    "Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
+                        + ". Should be constant UInt8 from range[0, 76]",
+                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+            const ColumnConst * scale_column = checkAndGetColumnConst<ColumnUInt8>(arguments[2].column.get());
+
+            if (!scale_column)
+                throw Exception(
+                    "Illegal column of third argument of function " + getName() + ". Should be constant UInt8",
+                        ErrorCodes::ILLEGAL_COLUMN);
+
+            scale = scale_column->getValue<UInt8>();
+        }
+
+        /**
+        At compile time, result is unknown. We only know the Scale (number of fractional digits) at runtime.
+        Also nothing is known about size of whole part.
+        As in simple division/multiplication for decimals, we scale the result up, but is is explicit here and no downscale is performed.
+        It guarantees that result will have given scale and it can also be MANUALLY converted to other decimal types later.
+        **/
+        if (scale > DecimalUtils::max_precision<Decimal256>)
+            throw Exception("Illegal value of third argument of function " + this->getName() + ": must be integer in range [0, 76]",
+                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        return std::make_shared<DataTypeDecimal256>(DecimalUtils::max_precision<Decimal256>, scale);
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
+    {
+        return resolveOverload(arguments, result_type);
+    }
+
+private:
+    //long resolver to call proper templated func
+    ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
+    {
+        WhichDataType which_dividend(arguments[0].type.get());
+        WhichDataType which_divisor(arguments[1].type.get());
+        if (which_dividend.isDecimal32())
+        {
+            using DividendType = DataTypeDecimal32;
+            if (which_divisor.isDecimal32())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal64())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal128())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal256())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+        }
+
+        else if (which_dividend.isDecimal64())
+        {
+            using DividendType = DataTypeDecimal64;
+            if (which_divisor.isDecimal32())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal64())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal128())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal256())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+
+        }
+
+        else if (which_dividend.isDecimal128())
+        {
+            using DividendType = DataTypeDecimal128;
+            if (which_divisor.isDecimal32())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal64())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal128())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal256())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+
+        }
+
+        else if (which_dividend.isDecimal256())
+        {
+            using DividendType = DataTypeDecimal256;
+            if (which_divisor.isDecimal32())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal64())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal128())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+            else if (which_divisor.isDecimal256())
+                return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
+
+        }
+
+        // the compiler is happy now
+        return nullptr;
+    }
+};
+
+}
+
diff --git a/src/Functions/FunctionsHashing.cpp b/src/Functions/FunctionsHashing.cpp
index fb631deb4b1..8f616b0be94 100644
--- a/src/Functions/FunctionsHashing.cpp
+++ b/src/Functions/FunctionsHashing.cpp
@@ -39,6 +39,13 @@ REGISTER_FUNCTION(Hashing)
 
     factory.registerFunction<FunctionXxHash32>();
     factory.registerFunction<FunctionXxHash64>();
+    factory.registerFunction<FunctionXXH3>(
+        {
+            "Calculates value of XXH3 64-bit hash function. Refer to https://github.com/Cyan4973/xxHash for detailed documentation.",
+            Documentation::Examples{{"hash", "SELECT xxh3('ClickHouse')"}},
+            Documentation::Categories{"Hash"}
+        },
+        FunctionFactory::CaseSensitive);
 
     factory.registerFunction<FunctionWyHash64>();
 
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index ec0a489471b..ee5f3ea86b5 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -3,12 +3,18 @@
 #include <city.h>
 #include <farmhash.h>
 #include <metrohash.h>
+#include <wyhash.h>
 #include <MurmurHash2.h>
 #include <MurmurHash3.h>
-#include <wyhash.h>
 
 #include "config.h"
 
+#ifdef __clang__
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wused-but-marked-unused"
+#endif
+#include <xxhash.h>
+
 #if USE_BLAKE3
 #    include <blake3.h>
 #endif
@@ -17,7 +23,6 @@
 #include <Common/typeid_cast.h>
 #include <Common/safe_cast.h>
 #include <Common/HashTable/Hash.h>
-#include <xxhash.h>
 
 #if USE_SSL
 #    include <openssl/md4.h>
@@ -588,7 +593,7 @@ struct ImplXxHash32
     static constexpr auto name = "xxHash32";
     using ReturnType = UInt32;
 
-    static auto apply(const char * s, const size_t len) { return XXH32(s, len, 0); }
+    static auto apply(const char * s, const size_t len) { return XXH_INLINE_XXH32(s, len, 0); }
     /**
       *  With current implementation with more than 1 arguments it will give the results
       *  non-reproducible from outside of CH.
@@ -609,7 +614,24 @@ struct ImplXxHash64
     using ReturnType = UInt64;
     using uint128_t = CityHash_v1_0_2::uint128;
 
-    static auto apply(const char * s, const size_t len) { return XXH64(s, len, 0); }
+    static auto apply(const char * s, const size_t len) { return XXH_INLINE_XXH64(s, len, 0); }
+
+    /*
+       With current implementation with more than 1 arguments it will give the results
+       non-reproducible from outside of CH. (see comment on ImplXxHash32).
+     */
+    static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); }
+
+    static constexpr bool use_int_hash_for_pods = false;
+};
+
+struct ImplXXH3
+{
+    static constexpr auto name = "xxh3";
+    using ReturnType = UInt64;
+    using uint128_t = CityHash_v1_0_2::uint128;
+
+    static auto apply(const char * s, const size_t len) { return XXH_INLINE_XXH3_64bits(s, len); }
 
     /*
        With current implementation with more than 1 arguments it will give the results
@@ -1508,7 +1530,12 @@ using FunctionHiveHash = FunctionAnyHash<HiveHashImpl>;
 
 using FunctionXxHash32 = FunctionAnyHash<ImplXxHash32>;
 using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
+using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
 
 using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
 using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;
 }
+
+#ifdef __clang__
+#    pragma clang diagnostic pop
+#endif
diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp
index c856419c9e8..2234c582ba6 100644
--- a/src/Functions/FunctionsJSON.cpp
+++ b/src/Functions/FunctionsJSON.cpp
@@ -20,17 +20,19 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 
-#include <DataTypes/Serializations/SerializationDecimal.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/Serializations/SerializationDecimal.h>
 
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
@@ -720,8 +722,16 @@ public:
                 return false;
         }
 
-        auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
-        col_vec.insertValue(value);
+        if (dest.getDataType() == TypeIndex::LowCardinality)
+        {
+            ColumnLowCardinality & col_low = assert_cast<ColumnLowCardinality &>(dest);
+            col_low.insertData(reinterpret_cast<const char *>(&value), sizeof(value));
+        }
+        else
+        {
+            auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
+            col_vec.insertValue(value);
+        }
         return true;
     }
 };
@@ -825,8 +835,17 @@ public:
             return JSONExtractRawImpl<JSONParser>::insertResultToColumn(dest, element, {});
 
         auto str = element.getString();
-        ColumnString & col_str = assert_cast<ColumnString &>(dest);
-        col_str.insertData(str.data(), str.size());
+
+        if (dest.getDataType() == TypeIndex::LowCardinality)
+        {
+            ColumnLowCardinality & col_low = assert_cast<ColumnLowCardinality &>(dest);
+            col_low.insertData(str.data(), str.size());
+        }
+        else
+        {
+            ColumnString & col_str = assert_cast<ColumnString &>(dest);
+            col_str.insertData(str.data(), str.size());
+        }
         return true;
     }
 };
@@ -855,25 +874,41 @@ struct JSONExtractTree
         }
     };
 
-    class LowCardinalityNode : public Node
+    class LowCardinalityFixedStringNode : public Node
     {
     public:
-        LowCardinalityNode(DataTypePtr dictionary_type_, std::unique_ptr<Node> impl_)
-            : dictionary_type(dictionary_type_), impl(std::move(impl_)) {}
+        explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { }
         bool insertResultToColumn(IColumn & dest, const Element & element) override
         {
-            auto from_col = dictionary_type->createColumn();
-            if (impl->insertResultToColumn(*from_col, element))
+            // If element is an object we delegate the insertion to JSONExtractRawImpl
+            if (element.isObject())
+                return JSONExtractRawImpl<JSONParser>::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length);
+            else if (!element.isString())
+                return false;
+
+            auto str = element.getString();
+            if (str.size() > fixed_length)
+                return false;
+
+            // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation.
+            // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution)
+            // the data is padded here and written directly to the Low Cardinality Column
+            if (str.size() == fixed_length)
             {
-                std::string_view value = from_col->getDataAt(0).toView();
-                assert_cast<ColumnLowCardinality &>(dest).insertData(value.data(), value.size());
-                return true;
+                assert_cast<ColumnLowCardinality &>(dest).insertData(str.data(), str.size());
             }
-            return false;
+            else
+            {
+                String padded_str(str);
+                padded_str.resize(fixed_length, '\0');
+
+                assert_cast<ColumnLowCardinality &>(dest).insertData(padded_str.data(), padded_str.size());
+            }
+            return true;
         }
+
     private:
-        DataTypePtr dictionary_type;
-        std::unique_ptr<Node> impl;
+        const size_t fixed_length;
     };
 
     class UUIDNode : public Node
@@ -885,7 +920,15 @@ struct JSONExtractTree
                 return false;
 
             auto uuid = parseFromString<UUID>(element.getString());
-            assert_cast<ColumnUUID &>(dest).insert(uuid);
+            if (dest.getDataType() == TypeIndex::LowCardinality)
+            {
+                ColumnLowCardinality & col_low = assert_cast<ColumnLowCardinality &>(dest);
+                col_low.insertData(reinterpret_cast<const char *>(&uuid), sizeof(uuid));
+            }
+            else
+            {
+                assert_cast<ColumnUUID &>(dest).insert(uuid);
+            }
             return true;
         }
     };
@@ -928,6 +971,7 @@ struct JSONExtractTree
             assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(value);
             return true;
         }
+
     private:
         DataTypePtr data_type;
     };
@@ -946,13 +990,18 @@ struct JSONExtractTree
     public:
         bool insertResultToColumn(IColumn & dest, const Element & element) override
         {
-            if (!element.isString())
+            if (element.isNull())
                 return false;
-            auto & col_str = assert_cast<ColumnFixedString &>(dest);
+
+            if (!element.isString())
+                return JSONExtractRawImpl<JSONParser>::insertResultToFixedStringColumn(dest, element, {});
+
             auto str = element.getString();
+            auto & col_str = assert_cast<ColumnFixedString &>(dest);
             if (str.size() > col_str.getN())
                 return false;
             col_str.insertData(str.data(), str.size());
+
             return true;
         }
     };
@@ -1178,9 +1227,18 @@ struct JSONExtractTree
             case TypeIndex::UUID: return std::make_unique<UUIDNode>();
             case TypeIndex::LowCardinality:
             {
+                // The low cardinality case is treated in two different ways:
+                // For FixedString type, an especial class is implemented for inserting the data in the destination column,
+                // as the string length must be passed in order to check and pad the incoming data.
+                // For the rest of low cardinality types, the insertion is done in their corresponding class, adapting the data
+                // as needed for the insertData function of the ColumnLowCardinality.
                 auto dictionary_type = typeid_cast<const DataTypeLowCardinality *>(type.get())->getDictionaryType();
-                auto impl = build(function_name, dictionary_type);
-                return std::make_unique<LowCardinalityNode>(dictionary_type, std::move(impl));
+                if ((*dictionary_type).getTypeId() == TypeIndex::FixedString)
+                {
+                    auto fixed_length = typeid_cast<const DataTypeFixedString *>(dictionary_type.get())->getN();
+                    return std::make_unique<LowCardinalityFixedStringNode>(fixed_length);
+                }
+                return build(function_name, dictionary_type);
             }
             case TypeIndex::Decimal256: return std::make_unique<DecimalNode<Decimal256>>(type);
             case TypeIndex::Decimal128: return std::make_unique<DecimalNode<Decimal128>>(type);
@@ -1332,13 +1390,63 @@ public:
 
     static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view)
     {
-        ColumnString & col_str = assert_cast<ColumnString &>(dest);
-        auto & chars = col_str.getChars();
-        WriteBufferFromVector<ColumnString::Chars> buf(chars, AppendModeTag());
+        if (dest.getDataType() == TypeIndex::LowCardinality)
+        {
+            ColumnString::Chars chars;
+            WriteBufferFromVector<ColumnString::Chars> buf(chars, AppendModeTag());
+            traverse(element, buf);
+            buf.finalize();
+            assert_cast<ColumnLowCardinality &>(dest).insertData(reinterpret_cast<const char *>(chars.data()), chars.size());
+        }
+        else
+        {
+            ColumnString & col_str = assert_cast<ColumnString &>(dest);
+            auto & chars = col_str.getChars();
+            WriteBufferFromVector<ColumnString::Chars> buf(chars, AppendModeTag());
+            traverse(element, buf);
+            buf.finalize();
+            chars.push_back(0);
+            col_str.getOffsets().push_back(chars.size());
+        }
+        return true;
+    }
+
+    // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column
+    static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view)
+    {
+        ColumnFixedString::Chars chars;
+        WriteBufferFromVector<ColumnFixedString::Chars> buf(chars, AppendModeTag());
         traverse(element, buf);
         buf.finalize();
-        chars.push_back(0);
-        col_str.getOffsets().push_back(chars.size());
+
+        auto & col_str = assert_cast<ColumnFixedString &>(dest);
+
+        if (chars.size() > col_str.getN())
+            return false;
+
+        chars.resize_fill(col_str.getN());
+        col_str.insertData(reinterpret_cast<const char *>(chars.data()), chars.size());
+
+
+        return true;
+    }
+
+    // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column
+    static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length)
+    {
+        if (element.getObject().size() > fixed_length)
+            return false;
+
+        ColumnFixedString::Chars chars;
+        WriteBufferFromVector<ColumnFixedString::Chars> buf(chars, AppendModeTag());
+        traverse(element, buf);
+        buf.finalize();
+
+        if (chars.size() > fixed_length)
+            return false;
+        chars.resize_fill(fixed_length);
+        assert_cast<ColumnLowCardinality &>(dest).insertData(reinterpret_cast<const char *>(chars.data()), chars.size());
+
         return true;
     }
 
diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp
index 9bb0abc6369..0a5aa657a89 100644
--- a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp
@@ -4,11 +4,14 @@
 #include <unordered_set>
 #include <stack>
 
+#include <Parsers/ASTAlterQuery.h>
+#include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTCreateFunctionQuery.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
+#include "Parsers/ASTColumnDeclaration.h"
 
 
 namespace DB
@@ -19,24 +22,96 @@ namespace ErrorCodes
     extern const int UNSUPPORTED_METHOD;
 }
 
-void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &)
+void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast)
 {
-    auto * function = ast->as<ASTFunction>();
-    if (!function)
+    const auto visit_child_with_shared_ptr = [&](ASTPtr & child)
+    {
+        if (!child)
+            return;
+
+        auto * old_value = child.get();
+        visit(child);
+        ast->setOrReplace(old_value, child);
+    };
+
+    if (auto * col_decl = ast->as<ASTColumnDeclaration>())
+    {
+        visit_child_with_shared_ptr(col_decl->default_expression);
+        visit_child_with_shared_ptr(col_decl->ttl);
+        return;
+    }
+
+    if (auto * storage = ast->as<ASTStorage>())
+    {
+        const auto visit_child = [&](IAST * & child)
+        {
+            if (!child)
+                return;
+
+            if (const auto * function = child->template as<ASTFunction>())
+            {
+                std::unordered_set<std::string> udf_in_replace_process;
+                auto replace_result = tryToReplaceFunction(*function, udf_in_replace_process);
+                if (replace_result)
+                    ast->setOrReplace(child, replace_result);
+            }
+
+            visit(child);
+        };
+
+        visit_child(storage->partition_by);
+        visit_child(storage->primary_key);
+        visit_child(storage->order_by);
+        visit_child(storage->sample_by);
+        visit_child(storage->ttl_table);
+
+        return;
+    }
+
+    if (auto * alter = ast->as<ASTAlterCommand>())
+    {
+        visit_child_with_shared_ptr(alter->col_decl);
+        visit_child_with_shared_ptr(alter->column);
+        visit_child_with_shared_ptr(alter->partition);
+        visit_child_with_shared_ptr(alter->order_by);
+        visit_child_with_shared_ptr(alter->sample_by);
+        visit_child_with_shared_ptr(alter->index_decl);
+        visit_child_with_shared_ptr(alter->index);
+        visit_child_with_shared_ptr(alter->constraint_decl);
+        visit_child_with_shared_ptr(alter->constraint);
+        visit_child_with_shared_ptr(alter->projection_decl);
+        visit_child_with_shared_ptr(alter->projection);
+        visit_child_with_shared_ptr(alter->predicate);
+        visit_child_with_shared_ptr(alter->update_assignments);
+        visit_child_with_shared_ptr(alter->values);
+        visit_child_with_shared_ptr(alter->ttl);
+        visit_child_with_shared_ptr(alter->select);
+
+        return;
+    }
+
+    if (const auto * function = ast->template as<ASTFunction>())
+    {
+        std::unordered_set<std::string> udf_in_replace_process;
+        auto replace_result = tryToReplaceFunction(*function, udf_in_replace_process);
+        if (replace_result)
+            ast = replace_result;
+    }
+
+    for (auto & child : ast->children)
+        visit(child);
+}
+
+void UserDefinedSQLFunctionVisitor::visit(IAST * ast)
+{
+    if (!ast)
         return;
 
-    std::unordered_set<std::string> udf_in_replace_process;
-    auto replace_result = tryToReplaceFunction(*function, udf_in_replace_process);
-    if (replace_result)
-        ast = replace_result;
+    for (auto & child : ast->children)
+        visit(child);
 }
 
-bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr &)
-{
-    return true;
-}
-
-ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & function, std::unordered_set<std::string> & udf_in_replace_process)
+ASTPtr UserDefinedSQLFunctionVisitor::tryToReplaceFunction(const ASTFunction & function, std::unordered_set<std::string> & udf_in_replace_process)
 {
     if (udf_in_replace_process.find(function.name) != udf_in_replace_process.end())
         throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h
index 686594c088f..c8cbf396707 100644
--- a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h
+++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h
@@ -19,26 +19,14 @@ class ASTFunction;
   * After applying visitor:
   * SELECT number + 1 FROM system.numbers LIMIT 10;
   */
-class UserDefinedSQLFunctionMatcher
+class UserDefinedSQLFunctionVisitor
 {
 public:
-    using Visitor = InDepthNodeVisitor<UserDefinedSQLFunctionMatcher, true>;
-
-    struct Data
-    {
-    };
-
-    static void visit(ASTPtr & ast, Data & data);
-    static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
-
+    static void visit(ASTPtr & ast);
 private:
-    static void visit(ASTFunction & func, const Data & data);
-
+    static void visit(IAST *);
     static ASTPtr tryToReplaceFunction(const ASTFunction & function, std::unordered_set<std::string> & udf_in_replace_process);
 
 };
 
-/// Visits AST nodes and collect their aliases in one map (with links to source nodes).
-using UserDefinedSQLFunctionVisitor = UserDefinedSQLFunctionMatcher::Visitor;
-
 }
diff --git a/src/Functions/canonicalRand.cpp b/src/Functions/canonicalRand.cpp
index d0b8c655e14..0f168142177 100644
--- a/src/Functions/canonicalRand.cpp
+++ b/src/Functions/canonicalRand.cpp
@@ -34,7 +34,7 @@ private:
 
 struct NameCanonicalRand
 {
-    static constexpr auto name = "canonicalRand";
+    static constexpr auto name = "randCanonical";
 };
 
 class FunctionCanonicalRand : public FunctionRandomImpl<CanonicalRandImpl, Float64, NameCanonicalRand>
@@ -52,7 +52,7 @@ REGISTER_FUNCTION(CanonicalRand)
 The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1).
 Non-deterministic. Return type is Float64.
         )",
-        Documentation::Examples{{"canonicalRand", "SELECT canonicalRand()"}},
+        Documentation::Examples{{"randCanonical", "SELECT randCanonical()"}},
         Documentation::Categories{"Mathematical"}});
 }
 
diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp
index 12813c3d852..7af1c61d3b8 100644
--- a/src/Functions/filesystem.cpp
+++ b/src/Functions/filesystem.cpp
@@ -1,31 +1,40 @@
-#include <Functions/IFunction.h>
-#include <Functions/FunctionFactory.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Disks/IDisk.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunction.h>
 #include <Interpreters/Context.h>
-#include <filesystem>
 #include <Poco/Util/AbstractConfiguration.h>
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int UNKNOWN_DISK;
+}
 namespace
 {
 
 struct FilesystemAvailable
 {
     static constexpr auto name = "filesystemAvailable";
-    static std::uintmax_t get(const std::filesystem::space_info & spaceinfo) { return spaceinfo.available; }
+    static std::uintmax_t get(const DiskPtr & disk) { return disk->getAvailableSpace(); }
 };
 
-struct FilesystemFree
+struct FilesystemUnreserved
 {
-    static constexpr auto name = "filesystemFree";
-    static std::uintmax_t get(const std::filesystem::space_info & spaceinfo) { return spaceinfo.free; }
+    static constexpr auto name = "filesystemUnreserved";
+    static std::uintmax_t get(const DiskPtr & disk) { return disk->getUnreservedSpace(); }
 };
 
 struct FilesystemCapacity
 {
     static constexpr auto name = "filesystemCapacity";
-    static std::uintmax_t get(const std::filesystem::space_info & spaceinfo) { return spaceinfo.capacity; }
+    static std::uintmax_t get(const DiskPtr & disk) { return disk->getTotalSpace(); }
 };
 
 template <typename Impl>
@@ -34,34 +43,72 @@ class FilesystemImpl : public IFunction
 public:
     static constexpr auto name = Impl::name;
 
-    static FunctionPtr create(ContextPtr context)
-    {
-        return std::make_shared<FilesystemImpl<Impl>>(std::filesystem::space(context->getPath()));
-    }
+    static FunctionPtr create(ContextPtr context_) { return std::make_shared<FilesystemImpl<Impl>>(context_); }
+
+    explicit FilesystemImpl(ContextPtr context_) : context(context_) { }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
     {
         return false;
     }
 
-    explicit FilesystemImpl(std::filesystem::space_info spaceinfo_) : spaceinfo(spaceinfo_) { }
-
     String getName() const override { return name; }
+
+    bool isVariadic() const override { return true; }
+
     size_t getNumberOfArguments() const override { return 0; }
     bool isDeterministic() const override { return false; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
+        if (arguments.size() > 1)
+        {
+            throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        }
+        if (arguments.size() == 1 && !isStringOrFixedString(arguments[0]))
+        {
+            throw Exception(
+                "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
         return std::make_shared<DataTypeUInt64>();
     }
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
-        return DataTypeUInt64().createColumnConst(input_rows_count, static_cast<UInt64>(Impl::get(spaceinfo)));
+        if (arguments.empty())
+        {
+            auto disk = context->getDisk("default");
+            return DataTypeUInt64().createColumnConst(input_rows_count, Impl::get(disk));
+        }
+        else
+        {
+            auto col = arguments[0].column;
+            if (const ColumnString * col_str = checkAndGetColumn<ColumnString>(col.get()))
+            {
+                auto disk_map = context->getDisksMap();
+
+                auto col_res = ColumnVector<UInt64>::create(col_str->size());
+                auto & data = col_res->getData();
+                for (size_t i = 0; i < col_str->size(); ++i)
+                {
+                    auto disk_name = col_str->getDataAt(i).toString();
+                    if (auto it = disk_map.find(disk_name); it != disk_map.end())
+                        data[i] = Impl::get(it->second);
+                    else
+                        throw Exception(
+                            "Unknown disk name " + disk_name + " while execute function " + getName(), ErrorCodes::UNKNOWN_DISK);
+                }
+                return col_res;
+            }
+            throw Exception(
+                "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
+        }
     }
 
 private:
-    std::filesystem::space_info spaceinfo;
+    ContextPtr context;
 };
 
 }
@@ -70,7 +117,7 @@ REGISTER_FUNCTION(Filesystem)
 {
     factory.registerFunction<FilesystemImpl<FilesystemAvailable>>();
     factory.registerFunction<FilesystemImpl<FilesystemCapacity>>();
-    factory.registerFunction<FilesystemImpl<FilesystemFree>>();
+    factory.registerFunction<FilesystemImpl<FilesystemUnreserved>>();
 }
 
 }
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index f3401713834..0baf64c83d9 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1016,6 +1016,7 @@ public:
     size_t getNumberOfArguments() const override { return 3; }
 
     bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForNothing() const override { return false; }
     bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
     {
         settings.enable_lazy_execution_for_first_argument = false;
diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp
index 9cd104cd1dc..be052b25af4 100644
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@@ -182,7 +182,7 @@ REGISTER_FUNCTION(ModuloLegacy)
 
 struct NamePositiveModulo
 {
-    static constexpr auto name = "positive_modulo";
+    static constexpr auto name = "positiveModulo";
 };
 using FunctionPositiveModulo = BinaryArithmeticOverloadResolver<PositiveModuloImpl, NamePositiveModulo, false>;
 
@@ -191,11 +191,17 @@ REGISTER_FUNCTION(PositiveModulo)
     factory.registerFunction<FunctionPositiveModulo>(
         {
             R"(
-Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positive_modulo` always return non-negative number.
+Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positiveModulo` always return non-negative number.
+Returns the difference between `a` and the nearest integer not greater than `a` divisible by `b`.
+In other words, the function returning the modulus (modulo) in the terms of Modular Arithmetic.
         )",
-            Documentation::Examples{{"positive_modulo", "SELECT positive_modulo(-1000, 32);"}},
+            Documentation::Examples{{"positiveModulo", "SELECT positiveModulo(-1, 10);"}},
             Documentation::Categories{"Arithmetic"}},
         FunctionFactory::CaseInsensitive);
+
+    factory.registerAlias("positive_modulo", "positiveModulo", FunctionFactory::CaseInsensitive);
+    /// Compatibility with Spark:
+    factory.registerAlias("pmod", "positiveModulo", FunctionFactory::CaseInsensitive);
 }
 
 }
diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp
index 6fc722e32f4..f658528a2a7 100644
--- a/src/Functions/multiIf.cpp
+++ b/src/Functions/multiIf.cpp
@@ -50,6 +50,7 @@ public:
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
     size_t getNumberOfArguments() const override { return 0; }
     bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForNothing() const override { return false; }
 
     ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override
     {
diff --git a/src/Functions/runningConcurrency.cpp b/src/Functions/runningConcurrency.cpp
index c759476006f..c112165fda7 100644
--- a/src/Functions/runningConcurrency.cpp
+++ b/src/Functions/runningConcurrency.cpp
@@ -57,7 +57,7 @@ namespace DB
 
                 if (unlikely(begin > end))
                 {
-                    const FormatSettings default_format;
+                    const FormatSettings default_format{};
                     WriteBufferFromOwnString buf_begin, buf_end;
                     begin_serializaion->serializeTextQuoted(*(arguments[0].column), i, buf_begin, default_format);
                     end_serialization->serializeTextQuoted(*(arguments[1].column), i, buf_end, default_format);
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index c015d4566d6..9abbec1a53c 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -312,15 +312,29 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR
         || status == Poco::Net::HTTPResponse::HTTP_PARTIAL_CONTENT /// Reading with Range header was successful.
         || (isRedirect(status) && allow_redirects)))
     {
-        std::stringstream error_message;        // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        error_message.exceptions(std::ios::failbit);
-        error_message << "Received error from remote server " << request.getURI() << ". HTTP status code: " << status << " "
-                      << response.getReason() << ", body: " << istr.rdbuf();
+        int code = status == Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS
+            ? ErrorCodes::RECEIVED_ERROR_TOO_MANY_REQUESTS
+            : ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
 
-        throw Exception(error_message.str(),
-            status == HTTP_TOO_MANY_REQUESTS ? ErrorCodes::RECEIVED_ERROR_TOO_MANY_REQUESTS
-                                             : ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER);
+        std::stringstream body; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        body.exceptions(std::ios::failbit);
+        body << istr.rdbuf();
+
+        throw HTTPException(code, request.getURI(), status, response.getReason(), body.str());
     }
 }
 
+std::string HTTPException::makeExceptionMessage(
+    const std::string & uri,
+    Poco::Net::HTTPResponse::HTTPStatus http_status,
+    const std::string & reason,
+    const std::string & body)
+{
+    return fmt::format(
+        "Received error from remote server {}. "
+        "HTTP status code: {} {}, "
+        "body: {}",
+        uri, http_status, reason, body);
+}
+
 }
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index 51da17d4ca7..fe2b769df6c 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -17,8 +17,6 @@
 namespace DB
 {
 
-constexpr int HTTP_TOO_MANY_REQUESTS = 429;
-
 class HTTPServerResponse;
 
 class SingleEndpointHTTPSessionPool : public PoolBase<Poco::Net::HTTPClientSession>
@@ -35,6 +33,38 @@ public:
     SingleEndpointHTTPSessionPool(const std::string & host_, UInt16 port_, bool https_, size_t max_pool_size_);
 };
 
+class HTTPException : public Exception
+{
+public:
+    HTTPException(
+        int code,
+        const std::string & uri,
+        Poco::Net::HTTPResponse::HTTPStatus http_status_,
+        const std::string & reason,
+        const std::string & body
+    )
+        : Exception(makeExceptionMessage(uri, http_status_, reason, body), code)
+        , http_status(http_status_)
+    {}
+
+    HTTPException * clone() const override { return new HTTPException(*this); }
+    void rethrow() const override { throw *this; }
+
+    int getHTTPStatus() const { return http_status; }
+
+private:
+    Poco::Net::HTTPResponse::HTTPStatus http_status{};
+
+    static std::string makeExceptionMessage(
+        const std::string & uri,
+        Poco::Net::HTTPResponse::HTTPStatus http_status,
+        const std::string & reason,
+        const std::string & body);
+
+    const char * name() const noexcept override { return "DB::HTTPException"; }
+    const char * className() const noexcept override { return "DB::HTTPException"; }
+};
+
 using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry;
 using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
 
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index c49941b025d..c14fbecf223 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -45,7 +45,7 @@ ReadBufferFromS3::ReadBufferFromS3(
     const String & bucket_,
     const String & key_,
     const String & version_id_,
-    UInt64 max_single_read_retries_,
+    const S3Settings::RequestSettings & request_settings_,
     const ReadSettings & settings_,
     bool use_external_buffer_,
     size_t offset_,
@@ -56,7 +56,7 @@ ReadBufferFromS3::ReadBufferFromS3(
     , bucket(bucket_)
     , key(key_)
     , version_id(version_id_)
-    , max_single_read_retries(max_single_read_retries_)
+    , request_settings(request_settings_)
     , offset(offset_)
     , read_until_position(read_until_position_)
     , read_settings(settings_)
@@ -105,7 +105,7 @@ bool ReadBufferFromS3::nextImpl()
     }
 
     size_t sleep_time_with_backoff_milliseconds = 100;
-    for (size_t attempt = 0; (attempt < max_single_read_retries) && !next_result; ++attempt)
+    for (size_t attempt = 0; attempt < request_settings.max_single_read_retries && !next_result; ++attempt)
     {
         Stopwatch watch;
         try
@@ -166,7 +166,7 @@ bool ReadBufferFromS3::nextImpl()
                 attempt,
                 e.message());
 
-            if (attempt + 1 == max_single_read_retries)
+            if (attempt + 1 == request_settings.max_single_read_retries)
                 throw;
 
             /// Pause before next attempt.
@@ -349,7 +349,7 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader()
         bucket,
         key,
         version_id,
-        s3_max_single_read_retries,
+        request_settings,
         read_settings,
         false /*use_external_buffer*/,
         next_range->first,
diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h
index cc836bba495..17b13bf7d62 100644
--- a/src/IO/ReadBufferFromS3.h
+++ b/src/IO/ReadBufferFromS3.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Common/RangeGenerator.h>
+#include <Storages/StorageS3Settings.h>
 #include "config.h"
 
 #if USE_AWS_S3
@@ -33,7 +34,7 @@ private:
     String bucket;
     String key;
     String version_id;
-    UInt64 max_single_read_retries;
+    const S3Settings::RequestSettings request_settings;
 
     /// These variables are atomic because they can be used for `logging only`
     /// (where it is not important to get consistent result)
@@ -52,7 +53,7 @@ public:
         const String & bucket_,
         const String & key_,
         const String & version_id_,
-        UInt64 max_single_read_retries_,
+        const S3Settings::RequestSettings & request_settings_,
         const ReadSettings & settings_,
         bool use_external_buffer = false,
         size_t offset_ = 0,
@@ -100,7 +101,7 @@ public:
         const String & version_id_,
         size_t range_step_,
         size_t object_size_,
-        UInt64 s3_max_single_read_retries_,
+        const S3Settings::RequestSettings & request_settings_,
         const ReadSettings & read_settings_)
         : client_ptr(client_ptr_)
         , bucket(bucket_)
@@ -110,7 +111,7 @@ public:
         , range_generator(object_size_, range_step_)
         , range_step(range_step_)
         , object_size(object_size_)
-        , s3_max_single_read_retries(s3_max_single_read_retries_)
+        , request_settings(request_settings_)
     {
         assert(range_step > 0);
         assert(range_step < object_size);
@@ -135,7 +136,7 @@ private:
     size_t range_step;
     size_t object_size;
 
-    UInt64 s3_max_single_read_retries;
+    const S3Settings::RequestSettings request_settings;
 };
 
 }
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index b9e0e0507cc..127912a0b2a 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1278,6 +1278,25 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf)
     }
 }
 
+void skipNullTerminated(ReadBuffer & buf)
+{
+    while (!buf.eof())
+    {
+        char * next_pos = find_first_symbols<'\0'>(buf.position(), buf.buffer().end());
+        buf.position() = next_pos;
+
+        if (!buf.hasPendingData())
+            continue;
+
+        if (*buf.position() == '\0')
+        {
+            ++buf.position();
+            return;
+        }
+    }
+}
+
+
 void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current)
 {
     assert(current >= in.position());
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index a7227811261..33783dc026a 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -1448,6 +1448,8 @@ void skipToCarriageReturnOrEOF(ReadBuffer & buf);
 /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences.
 void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
 
+/// Skip to next character after next \0. If no \0 in stream, skip to end.
+void skipNullTerminated(ReadBuffer & buf);
 
 /** This function just copies the data from buffer's internal position (in.position())
   * to current position (from arguments) into memory.
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 7d053bebe4a..33dd3250c9f 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -11,6 +11,7 @@
 
 #include <Common/logger_useful.h>
 #include <Common/Stopwatch.h>
+#include <Common/Throttler.h>
 #include <IO/HTTPCommon.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
@@ -76,12 +77,16 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
         const RemoteHostFilter & remote_host_filter_,
         unsigned int s3_max_redirects_,
         bool enable_s3_requests_logging_,
-        bool for_disk_s3_)
+        bool for_disk_s3_,
+        const ThrottlerPtr & get_request_throttler_,
+        const ThrottlerPtr & put_request_throttler_)
     : force_region(force_region_)
     , remote_host_filter(remote_host_filter_)
     , s3_max_redirects(s3_max_redirects_)
     , enable_s3_requests_logging(enable_s3_requests_logging_)
     , for_disk_s3(for_disk_s3_)
+    , get_request_throttler(get_request_throttler_)
+    , put_request_throttler(put_request_throttler_)
 {
 }
 
@@ -128,6 +133,8 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
     , s3_max_redirects(client_configuration.s3_max_redirects)
     , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging)
     , for_disk_s3(client_configuration.for_disk_s3)
+    , get_request_throttler(client_configuration.get_request_throttler)
+    , put_request_throttler(client_configuration.put_request_throttler)
     , extra_headers(client_configuration.extra_headers)
 {
 }
@@ -245,6 +252,23 @@ void PocoHTTPClient::makeRequestInternal(
     if (enable_s3_requests_logging)
         LOG_TEST(log, "Make request to: {}", uri);
 
+    switch (request.GetMethod())
+    {
+        case Aws::Http::HttpMethod::HTTP_GET:
+        case Aws::Http::HttpMethod::HTTP_HEAD:
+            if (get_request_throttler)
+                get_request_throttler->add(1);
+            break;
+        case Aws::Http::HttpMethod::HTTP_PUT:
+        case Aws::Http::HttpMethod::HTTP_POST:
+        case Aws::Http::HttpMethod::HTTP_PATCH:
+            if (put_request_throttler)
+                put_request_throttler->add(1);
+            break;
+        case Aws::Http::HttpMethod::HTTP_DELETE:
+            break; // Not throttled
+    }
+
     addMetric(request, S3MetricType::Count);
     CurrentMetrics::Increment metric_increment{CurrentMetrics::S3Requests};
 
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 5649638285d..ed6e1793c2c 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -8,6 +8,7 @@
 #if USE_AWS_S3
 
 #include <Common/RemoteHostFilter.h>
+#include <Common/Throttler_fwd.h>
 #include <IO/ConnectionTimeouts.h>
 #include <IO/HTTPCommon.h>
 #include <IO/S3/SessionAwareIOStream.h>
@@ -48,6 +49,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
     unsigned int s3_max_redirects;
     bool enable_s3_requests_logging;
     bool for_disk_s3;
+    ThrottlerPtr get_request_throttler;
+    ThrottlerPtr put_request_throttler;
     HeaderCollection extra_headers;
 
     void updateSchemeAndRegion();
@@ -60,7 +63,9 @@ private:
         const RemoteHostFilter & remote_host_filter_,
         unsigned int s3_max_redirects_,
         bool enable_s3_requests_logging_,
-        bool for_disk_s3_
+        bool for_disk_s3_,
+        const ThrottlerPtr & get_request_throttler_,
+        const ThrottlerPtr & put_request_throttler_
     );
 
     /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization.
@@ -154,6 +159,16 @@ private:
     unsigned int s3_max_redirects;
     bool enable_s3_requests_logging;
     bool for_disk_s3;
+
+    /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler
+    /// (i.e. throttles GetObject, HeadObject)
+    ThrottlerPtr get_request_throttler;
+
+    /// Limits put request per second rate for PUT, COPY, POST, LIST requests
+    /// (i.e. throttles PutObject, CopyObject, ListObjects, CreateMultipartUpload, UploadPartCopy, UploadPart, CompleteMultipartUpload)
+    /// NOTE: DELETE and CANCEL requests are not throttled by either put or get throttler
+    ThrottlerPtr put_request_throttler;
+
     const HeaderCollection extra_headers;
 };
 
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index 9b2a65d84fc..3b7152c0e7e 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -76,7 +76,7 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders)
 
     DB::RemoteHostFilter remote_host_filter;
     unsigned int s3_max_redirects = 100;
-    DB::S3::URI uri(Poco::URI(http.getUrl() + "/IOTestAwsS3ClientAppendExtraHeaders/test.txt"));
+    DB::S3::URI uri(http.getUrl() + "/IOTestAwsS3ClientAppendExtraHeaders/test.txt");
     String access_key_id = "ACCESS_KEY_ID";
     String secret_access_key = "SECRET_ACCESS_KEY";
     String region = "us-east-1";
@@ -88,7 +88,9 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders)
         remote_host_filter,
         s3_max_redirects,
         enable_s3_requests_logging,
-        /* for_disk_s3 = */ false
+        /* for_disk_s3 = */ false,
+        /* get_request_throttler = */ {},
+        /* put_request_throttler = */ {}
     );
 
     client_configuration.endpointOverride = uri.endpoint;
@@ -113,12 +115,14 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders)
     ASSERT_TRUE(client);
 
     DB::ReadSettings read_settings;
+    DB::S3Settings::RequestSettings request_settings;
+    request_settings.max_single_read_retries = max_single_read_retries;
     DB::ReadBufferFromS3 read_buffer(
         client,
         uri.bucket,
         uri.key,
         version_id,
-        max_single_read_retries,
+        request_settings,
         read_settings
     );
 
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index 859f5ce796b..91f575d5097 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -573,7 +573,14 @@ public:
             /// AWS API tries credentials providers one by one. Some of providers (like ProfileConfigFileAWSCredentialsProvider) can be
             /// quite verbose even if nobody configured them. So we use our provider first and only after it use default providers.
             {
-                DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3);
+                DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(
+                    configuration.region,
+                    configuration.remote_host_filter,
+                    configuration.s3_max_redirects,
+                    configuration.enable_s3_requests_logging,
+                    configuration.for_disk_s3,
+                    configuration.get_request_throttler,
+                    configuration.put_request_throttler);
                 AddProvider(std::make_shared<AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider>(aws_client_configuration));
             }
 
@@ -610,7 +617,14 @@ public:
             }
             else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true")
             {
-                DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3);
+                DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(
+                    configuration.region,
+                    configuration.remote_host_filter,
+                    configuration.s3_max_redirects,
+                    configuration.enable_s3_requests_logging,
+                    configuration.for_disk_s3,
+                    configuration.get_request_throttler,
+                    configuration.put_request_throttler);
 
                 /// See MakeDefaultHttpResourceClientConfiguration().
                 /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside
@@ -731,12 +745,21 @@ namespace S3
         const RemoteHostFilter & remote_host_filter,
         unsigned int s3_max_redirects,
         bool enable_s3_requests_logging,
-        bool for_disk_s3)
+        bool for_disk_s3,
+        const ThrottlerPtr & get_request_throttler,
+        const ThrottlerPtr & put_request_throttler)
     {
-        return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging, for_disk_s3);
+        return PocoHTTPClientConfiguration(
+            force_region,
+            remote_host_filter,
+            s3_max_redirects,
+            enable_s3_requests_logging,
+            for_disk_s3,
+            get_request_throttler,
+            put_request_throttler);
     }
 
-    URI::URI(const Poco::URI & uri_)
+    URI::URI(const std::string & uri_)
     {
         /// Case when bucket name represented in domain name of S3 URL.
         /// E.g. (https://bucket-name.s3.Region.amazonaws.com/key)
@@ -754,16 +777,32 @@ namespace S3
         static constexpr auto OBS = "OBS";
         static constexpr auto OSS = "OSS";
 
-        uri = uri_;
+        uri = Poco::URI(uri_);
+
         storage_name = S3;
 
         if (uri.getHost().empty())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI.");
 
         /// Extract object version ID from query string.
+        bool has_version_id = false;
         for (const auto & [query_key, query_value] : uri.getQueryParameters())
             if (query_key == "versionId")
+            {
                 version_id = query_value;
+                has_version_id = true;
+            }
+
+        /// Poco::URI will ignore '?' when parsing the path, but if there is a vestionId in the http parameter,
+        /// '?' can not be used as a wildcard, otherwise it will be ambiguous.
+        /// If no "vertionId" in the http parameter, '?' can be used as a wildcard.
+        /// It is necessary to encode '?' to avoid deletion during parsing path.
+        if (!has_version_id && uri_.find('?') != String::npos)
+        {
+            String uri_with_question_mark_encode;
+            Poco::URI::encode(uri_, "?", uri_with_question_mark_encode);
+            uri = Poco::URI(uri_with_question_mark_encode);
+        }
 
         String name;
         String endpoint_authority_from_uri;
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 93e5eb78c7f..c68d76ece41 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -17,6 +17,7 @@
 #include <Poco/URI.h>
 
 #include <Common/Exception.h>
+#include <Common/Throttler_fwd.h>
 
 namespace Aws::S3
 {
@@ -88,7 +89,9 @@ public:
         const RemoteHostFilter & remote_host_filter,
         unsigned int s3_max_redirects,
         bool enable_s3_requests_logging,
-        bool for_disk_s3);
+        bool for_disk_s3,
+        const ThrottlerPtr & get_request_throttler,
+        const ThrottlerPtr & put_request_throttler);
 
 private:
     ClientFactory();
@@ -116,8 +119,7 @@ struct URI
 
     bool is_virtual_hosted_style;
 
-    explicit URI(const Poco::URI & uri_);
-    explicit URI(const std::string & uri_) : URI(Poco::URI(uri_)) {}
+    explicit URI(const std::string & uri_);
 
     static void validateBucket(const String & bucket, const Poco::URI & uri);
 };
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 9ed2c41fd01..56d487f165b 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -71,7 +71,7 @@ WriteBufferFromS3::WriteBufferFromS3(
     std::shared_ptr<const Aws::S3::S3Client> client_ptr_,
     const String & bucket_,
     const String & key_,
-    const S3Settings::ReadWriteSettings & s3_settings_,
+    const S3Settings::RequestSettings & request_settings_,
     std::optional<std::map<String, String>> object_metadata_,
     size_t buffer_size_,
     ThreadPoolCallbackRunner<void> schedule_,
@@ -79,10 +79,10 @@ WriteBufferFromS3::WriteBufferFromS3(
     : BufferWithOwnMemory<WriteBuffer>(buffer_size_, nullptr, 0)
     , bucket(bucket_)
     , key(key_)
-    , s3_settings(s3_settings_)
+    , request_settings(request_settings_)
     , client_ptr(std::move(client_ptr_))
     , object_metadata(std::move(object_metadata_))
-    , upload_part_size(s3_settings_.min_upload_part_size)
+    , upload_part_size(request_settings_.min_upload_part_size)
     , schedule(std::move(schedule_))
     , write_settings(write_settings_)
 {
@@ -107,7 +107,7 @@ void WriteBufferFromS3::nextImpl()
         write_settings.remote_throttler->add(offset());
 
     /// Data size exceeds singlepart upload threshold, need to use multipart upload.
-    if (multipart_upload_id.empty() && last_part_size > s3_settings.max_single_part_upload_size)
+    if (multipart_upload_id.empty() && last_part_size > request_settings.max_single_part_upload_size)
         createMultipartUpload();
 
     if (!multipart_upload_id.empty() && last_part_size > upload_part_size)
@@ -122,10 +122,10 @@ void WriteBufferFromS3::nextImpl()
 
 void WriteBufferFromS3::allocateBuffer()
 {
-    if (total_parts_uploaded != 0 && total_parts_uploaded % s3_settings.upload_part_size_multiply_parts_count_threshold == 0)
+    if (total_parts_uploaded != 0 && total_parts_uploaded % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
     {
-        upload_part_size *= s3_settings.upload_part_size_multiply_factor;
-        upload_part_size = std::min(upload_part_size, s3_settings.max_upload_part_size);
+        upload_part_size *= request_settings.upload_part_size_multiply_factor;
+        upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);
     }
 
     temporary_buffer = Aws::MakeShared<Aws::StringStream>("temporary buffer");
@@ -180,7 +180,7 @@ void WriteBufferFromS3::finalizeImpl()
     if (!multipart_upload_id.empty())
         completeMultipartUpload();
 
-    if (s3_settings.check_objects_after_upload)
+    if (request_settings.check_objects_after_upload)
     {
         LOG_TRACE(log, "Checking object {} exists after upload", key);
 
@@ -370,7 +370,7 @@ void WriteBufferFromS3::completeMultipartUpload()
 
     req.SetMultipartUpload(multipart_upload);
 
-    size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL);
+    size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
     for (size_t i = 0; i < max_retry; ++i)
     {
         ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload);
@@ -476,7 +476,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req)
 
 void WriteBufferFromS3::processPutRequest(const PutObjectTask & task)
 {
-    size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL);
+    size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
     for (size_t i = 0; i < max_retry; ++i)
     {
         ProfileEvents::increment(ProfileEvents::S3PutObject);
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 28f831856d7..b4b5a6d37a3 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -50,7 +50,7 @@ public:
         std::shared_ptr<const Aws::S3::S3Client> client_ptr_,
         const String & bucket_,
         const String & key_,
-        const S3Settings::ReadWriteSettings & s3_settings_,
+        const S3Settings::RequestSettings & request_settings_,
         std::optional<std::map<String, String>> object_metadata_ = std::nullopt,
         size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
         ThreadPoolCallbackRunner<void> schedule_ = {},
@@ -88,7 +88,7 @@ private:
 
     const String bucket;
     const String key;
-    const S3Settings::ReadWriteSettings s3_settings;
+    const S3Settings::RequestSettings request_settings;
     const std::shared_ptr<const Aws::S3::S3Client> client_ptr;
     const std::optional<std::map<String, String>> object_metadata;
 
diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp
index 161dc81266c..c088e41f1e8 100644
--- a/src/IO/tests/gtest_s3_uri.cpp
+++ b/src/IO/tests/gtest_s3_uri.cpp
@@ -20,55 +20,55 @@ struct TestCase
 };
 
 const TestCase TestCases[] = {
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data")),
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data"),
      "https://s3.us-east-2.amazonaws.com",
      "bucketname",
      "data",
      "",
      true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&secondKey=anotherKey")),
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&secondKey=anotherKey"),
+     "https://s3.us-east-2.amazonaws.com",
+     "bucketname",
+     "data?firstKey=someKey&secondKey=anotherKey",
+     "",
+     true},
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId&anotherKey=someOtherKey"),
+     "https://s3.us-east-2.amazonaws.com",
+     "bucketname",
+     "data",
+     "testVersionId",
+     true},
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&versionId=testVersionId&anotherKey=someOtherKey"),
+     "https://s3.us-east-2.amazonaws.com",
+     "bucketname",
+     "data",
+     "testVersionId",
+     true},
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?anotherKey=someOtherKey&versionId=testVersionId"),
+     "https://s3.us-east-2.amazonaws.com",
+     "bucketname",
+     "data",
+     "testVersionId",
+     true},
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId"),
+     "https://s3.us-east-2.amazonaws.com",
+     "bucketname",
+     "data",
+     "testVersionId",
+     true},
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId="),
      "https://s3.us-east-2.amazonaws.com",
      "bucketname",
      "data",
      "",
      true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId&anotherKey=someOtherKey")),
-     "https://s3.us-east-2.amazonaws.com",
-     "bucketname",
-     "data",
-     "testVersionId",
-     true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&versionId=testVersionId&anotherKey=someOtherKey")),
-     "https://s3.us-east-2.amazonaws.com",
-     "bucketname",
-     "data",
-     "testVersionId",
-     true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?anotherKey=someOtherKey&versionId=testVersionId")),
-     "https://s3.us-east-2.amazonaws.com",
-     "bucketname",
-     "data",
-     "testVersionId",
-     true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId")),
-     "https://s3.us-east-2.amazonaws.com",
-     "bucketname",
-     "data",
-     "testVersionId",
-     true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=")),
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId&"),
      "https://s3.us-east-2.amazonaws.com",
      "bucketname",
      "data",
      "",
      true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId&")),
-     "https://s3.us-east-2.amazonaws.com",
-     "bucketname",
-     "data",
-     "",
-     true},
-    {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId")),
+    {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId"),
      "https://s3.us-east-2.amazonaws.com",
      "bucketname",
      "data",
@@ -83,7 +83,7 @@ class S3UriTest : public testing::TestWithParam<std::string>
 TEST(S3UriTest, validPatterns)
 {
     {
-        S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/"));
+        S3::URI uri("https://jokserfn.s3.amazonaws.com/");
         ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("", uri.key);
@@ -91,7 +91,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://s3.amazonaws.com/jokserfn/"));
+        S3::URI uri("https://s3.amazonaws.com/jokserfn/");
         ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("", uri.key);
@@ -99,7 +99,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://amazonaws.com/bucket/"));
+        S3::URI uri("https://amazonaws.com/bucket/");
         ASSERT_EQ("https://amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucket", uri.bucket);
         ASSERT_EQ("", uri.key);
@@ -107,7 +107,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/data"));
+        S3::URI uri("https://jokserfn.s3.amazonaws.com/data");
         ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
@@ -115,7 +115,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://storage.amazonaws.com/jokserfn/data"));
+        S3::URI uri("https://storage.amazonaws.com/jokserfn/data");
         ASSERT_EQ("https://storage.amazonaws.com", uri.endpoint);
         ASSERT_EQ("jokserfn", uri.bucket);
         ASSERT_EQ("data", uri.key);
@@ -123,7 +123,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://bucketname.cos.ap-beijing.myqcloud.com/data"));
+        S3::URI uri("https://bucketname.cos.ap-beijing.myqcloud.com/data");
         ASSERT_EQ("https://cos.ap-beijing.myqcloud.com", uri.endpoint);
         ASSERT_EQ("bucketname", uri.bucket);
         ASSERT_EQ("data", uri.key);
@@ -131,7 +131,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data"));
+        S3::URI uri("https://bucketname.s3.us-east-2.amazonaws.com/data");
         ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucketname", uri.bucket);
         ASSERT_EQ("data", uri.key);
@@ -139,7 +139,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://s3.us-east-2.amazonaws.com/bucketname/data"));
+        S3::URI uri("https://s3.us-east-2.amazonaws.com/bucketname/data");
         ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucketname", uri.bucket);
         ASSERT_EQ("data", uri.key);
@@ -147,7 +147,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(false, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://bucketname.s3-us-east-2.amazonaws.com/data"));
+        S3::URI uri("https://bucketname.s3-us-east-2.amazonaws.com/data");
         ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucketname", uri.bucket);
         ASSERT_EQ("data", uri.key);
@@ -155,7 +155,7 @@ TEST(S3UriTest, validPatterns)
         ASSERT_EQ(true, uri.is_virtual_hosted_style);
     }
     {
-        S3::URI uri(Poco::URI("https://s3-us-east-2.amazonaws.com/bucketname/data"));
+        S3::URI uri("https://s3-us-east-2.amazonaws.com/bucketname/data");
         ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint);
         ASSERT_EQ("bucketname", uri.bucket);
         ASSERT_EQ("data", uri.key);
@@ -166,7 +166,7 @@ TEST(S3UriTest, validPatterns)
 
 TEST_P(S3UriTest, invalidPatterns)
 {
-    ASSERT_ANY_THROW(S3::URI(Poco::URI(GetParam())));
+    ASSERT_ANY_THROW(S3::URI new_uri(GetParam()));
 }
 
 TEST(S3UriTest, versionIdChecks)
diff --git a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp
index d1d8ee63b8e..b5478f434b4 100644
--- a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp
+++ b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp
@@ -32,13 +32,8 @@ QueryPipeline InterpreterShowAccessQuery::executeImpl() const
 
     /// Build the result column.
     MutableColumnPtr column = ColumnString::create();
-    WriteBufferFromOwnString buf;
     for (const auto & query : queries)
-    {
-        buf.restart();
-        formatAST(*query, buf, false, true);
-        column->insert(buf.str());
-    }
+        column->insert(query->formatWithSecretsHidden());
 
     String desc = "ACCESS";
     return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}}));
diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
index a6c4df3a86d..880ad434bba 100644
--- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
+++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
@@ -62,10 +62,7 @@ namespace
         }
 
         if (user.auth_data.getType() != AuthenticationType::NO_PASSWORD)
-        {
             query->auth_data = user.auth_data;
-            query->show_password = attach_mode; /// We don't show password unless it's an ATTACH statement.
-        }
 
         if (!user.settings.empty())
         {
@@ -256,19 +253,12 @@ QueryPipeline InterpreterShowCreateAccessEntityQuery::executeImpl()
 
     /// Build the result column.
     MutableColumnPtr column = ColumnString::create();
-    WriteBufferFromOwnString create_query_buf;
     for (const auto & create_query : create_queries)
-    {
-        formatAST(*create_query, create_query_buf, false, true);
-        column->insert(create_query_buf.str());
-        create_query_buf.restart();
-    }
+        column->insert(create_query->formatWithSecretsHidden());
 
     /// Prepare description of the result column.
-    WriteBufferFromOwnString desc_buf;
     const auto & show_query = query_ptr->as<const ASTShowCreateAccessEntityQuery &>();
-    formatAST(show_query, desc_buf, false, true);
-    String desc = desc_buf.str();
+    String desc = serializeAST(show_query);
     String prefix = "SHOW ";
     if (startsWith(desc, prefix))
         desc = desc.substr(prefix.length()); /// `desc` always starts with "SHOW ", so we can trim this prefix.
diff --git a/src/Interpreters/AggregationUtils.cpp b/src/Interpreters/AggregationUtils.cpp
index ed5e1512a1f..4e870e8152b 100644
--- a/src/Interpreters/AggregationUtils.cpp
+++ b/src/Interpreters/AggregationUtils.cpp
@@ -50,14 +50,15 @@ OutputBlockColumns prepareOutputBlockColumns(
 
             if (aggregate_functions[i]->isState())
             {
-                auto callback = [&](auto & subcolumn)
+                auto callback = [&](IColumn & subcolumn)
                 {
                     /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states.
-                    if (auto * column_aggregate_func = typeid_cast<ColumnAggregateFunction *>(subcolumn.get()))
+                    if (auto * column_aggregate_func = typeid_cast<ColumnAggregateFunction *>(&subcolumn))
                         for (auto & pool : aggregates_pools)
                             column_aggregate_func->addArena(pool);
                 };
-                callback(final_aggregate_columns[i]);
+
+                callback(*final_aggregate_columns[i]);
                 final_aggregate_columns[i]->forEachSubcolumnRecursively(callback);
             }
         }
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index c38006af975..95fa5ed543d 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -2508,6 +2508,8 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(
 void NO_INLINE Aggregator::mergeWithoutKeyDataImpl(
     ManyAggregatedDataVariants & non_empty_data) const
 {
+    ThreadPool thread_pool{params.max_threads};
+
     AggregatedDataVariantsPtr & res = non_empty_data[0];
 
     /// We merge all aggregation results to the first.
@@ -2517,7 +2519,15 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl(
         AggregatedDataWithoutKey & current_data = non_empty_data[result_num]->without_key;
 
         for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_functions[i]->merge(res_data + offsets_of_aggregate_states[i], current_data + offsets_of_aggregate_states[i], res->aggregates_pool);
+            if (aggregate_functions[i]->isAbleToParallelizeMerge())
+                aggregate_functions[i]->merge(
+                    res_data + offsets_of_aggregate_states[i],
+                    current_data + offsets_of_aggregate_states[i],
+                    thread_pool,
+                    res->aggregates_pool);
+            else
+                aggregate_functions[i]->merge(
+                    res_data + offsets_of_aggregate_states[i], current_data + offsets_of_aggregate_states[i], res->aggregates_pool);
 
         for (size_t i = 0; i < params.aggregates_size; ++i)
             aggregate_functions[i]->destroy(current_data + offsets_of_aggregate_states[i]);
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index bf85affcb90..a61f4bdc530 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -48,15 +48,22 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
     extern const int UNKNOWN_EXCEPTION;
     extern const int UNKNOWN_FORMAT;
+    extern const int BAD_ARGUMENTS;
 }
 
 AsynchronousInsertQueue::InsertQuery::InsertQuery(const ASTPtr & query_, const Settings & settings_)
-    : query(query_->clone()), settings(settings_)
+    : query(query_->clone())
+    , query_str(queryToString(query))
+    , settings(settings_)
+    , hash(calculateHash())
 {
 }
 
 AsynchronousInsertQueue::InsertQuery::InsertQuery(const InsertQuery & other)
-    : query(other.query->clone()), settings(other.settings)
+    : query(other.query->clone())
+    , query_str(other.query_str)
+    , settings(other.settings)
+    , hash(other.hash)
 {
 }
 
@@ -66,29 +73,33 @@ AsynchronousInsertQueue::InsertQuery::operator=(const InsertQuery & other)
     if (this != &other)
     {
         query = other.query->clone();
+        query_str = other.query_str;
         settings = other.settings;
+        hash = other.hash;
     }
 
     return *this;
 }
 
-UInt64 AsynchronousInsertQueue::InsertQuery::Hash::operator()(const InsertQuery & insert_query) const
+UInt128 AsynchronousInsertQueue::InsertQuery::calculateHash() const
 {
-    SipHash hash;
-    insert_query.query->updateTreeHash(hash);
+    SipHash siphash;
+    query->updateTreeHash(siphash);
 
-    for (const auto & setting : insert_query.settings.allChanged())
+    for (const auto & setting : settings.allChanged())
     {
-        hash.update(setting.getName());
-        applyVisitor(FieldVisitorHash(hash), setting.getValue());
+        siphash.update(setting.getName());
+        applyVisitor(FieldVisitorHash(siphash), setting.getValue());
     }
 
-    return hash.get64();
+    UInt128 res;
+    siphash.get128(res);
+    return res;
 }
 
 bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other) const
 {
-    return queryToString(query) == queryToString(other.query) && settings == other.settings;
+    return query_str == other.query_str && settings == other.settings;
 }
 
 AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_)
@@ -100,43 +111,31 @@ AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && qu
 
 void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr exception_)
 {
-    std::lock_guard lock(mutex);
-    finished = true;
+    if (finished.exchange(true))
+        return;
+
     if (exception_)
+    {
+        promise.set_exception(exception_);
         ProfileEvents::increment(ProfileEvents::FailedAsyncInsertQuery, 1);
-    exception = exception_;
-    cv.notify_all();
+    }
+    else
+    {
+        promise.set_value();
+    }
 }
 
-bool AsynchronousInsertQueue::InsertData::Entry::wait(const Milliseconds & timeout) const
-{
-    std::unique_lock lock(mutex);
-    return cv.wait_for(lock, timeout, [&] { return finished; });
-}
-
-bool AsynchronousInsertQueue::InsertData::Entry::isFinished() const
-{
-    std::lock_guard lock(mutex);
-    return finished;
-}
-
-std::exception_ptr AsynchronousInsertQueue::InsertData::Entry::getException() const
-{
-    std::lock_guard lock(mutex);
-    return exception;
-}
-
-
-AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout_)
+AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_)
     : WithContext(context_)
-    , cleanup_timeout(cleanup_timeout_)
+    , pool_size(pool_size_)
+    , queue_shards(pool_size)
     , pool(pool_size)
-    , dump_by_first_update_thread(&AsynchronousInsertQueue::busyCheck, this)
-    , cleanup_thread(&AsynchronousInsertQueue::cleanup, this)
 {
-    using namespace std::chrono;
+    if (!pool_size)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "pool_size cannot be zero");
 
-    assert(pool_size);
+    for (size_t i = 0; i < pool_size; ++i)
+        dump_by_first_update_threads.emplace_back([this, i] { processBatchDeadlines(i); });
 }
 
 AsynchronousInsertQueue::~AsynchronousInsertQueue()
@@ -144,34 +143,31 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
     /// TODO: add a setting for graceful shutdown.
 
     LOG_TRACE(log, "Shutting down the asynchronous insertion queue");
-
     shutdown = true;
-    {
-        std::lock_guard lock(deadline_mutex);
-        are_tasks_available.notify_one();
-    }
-    {
-        std::lock_guard lock(cleanup_mutex);
-        cleanup_can_run.notify_one();
-    }
 
-    assert(dump_by_first_update_thread.joinable());
-    dump_by_first_update_thread.join();
+    for (size_t i = 0; i < pool_size; ++i)
+    {
+        auto & shard = queue_shards[i];
 
-    assert(cleanup_thread.joinable());
-    cleanup_thread.join();
+        shard.are_tasks_available.notify_one();
+        assert(dump_by_first_update_threads[i].joinable());
+        dump_by_first_update_threads[i].join();
+
+        {
+            std::lock_guard lock(shard.mutex);
+
+            for (auto & [_, elem] : shard.queue)
+            {
+                for (const auto & entry : elem.data->entries)
+                {
+                    entry->finish(std::make_exception_ptr(Exception(
+                        ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)")));
+                }
+            }
+        }
+    }
 
     pool.wait();
-
-    std::lock_guard lock(currently_processing_mutex);
-    for (const auto & [_, entry] : currently_processing_queries)
-    {
-        if (!entry->isFinished())
-            entry->finish(std::make_exception_ptr(Exception(
-                ErrorCodes::TIMEOUT_EXCEEDED,
-                "Wait for async insert timeout exceeded)")));
-    }
-
     LOG_TRACE(log, "Asynchronous insertion queue finished");
 }
 
@@ -185,7 +181,7 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key,
     });
 }
 
-void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
+std::future<void> AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
 {
     query = query->clone();
     const auto & settings = query_context->getSettingsRef();
@@ -214,97 +210,77 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
         quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
 
     auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId());
+
     InsertQuery key{query, settings};
+    InsertDataPtr data_to_process;
+    std::future<void> insert_future;
+
+    auto shard_num = key.hash % pool_size;
+    auto & shard = queue_shards[shard_num];
 
     {
-        /// Firstly try to get entry from queue without exclusive lock.
-        std::shared_lock read_lock(rwlock);
-        if (auto it = queue.find(key); it != queue.end())
+        std::lock_guard lock(shard.mutex);
+
+        auto [it, inserted] = shard.iterators.try_emplace(key.hash);
+        if (inserted)
         {
-            pushImpl(std::move(entry), it);
-            return;
+            auto now = std::chrono::steady_clock::now();
+            auto timeout = now + Milliseconds{key.settings.async_insert_busy_timeout_ms};
+            it->second = shard.queue.emplace(timeout, Container{key, std::make_unique<InsertData>()}).first;
         }
+
+        auto queue_it = it->second;
+        auto & data = queue_it->second.data;
+        size_t entry_data_size = entry->bytes.size();
+
+        assert(data);
+        data->size_in_bytes += entry_data_size;
+        data->entries.emplace_back(entry);
+        insert_future = entry->getFuture();
+
+        LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'",
+            data->entries.size(), data->size_in_bytes, key.query_str);
+
+        /// Here we check whether we hit the limit on maximum data size in the buffer.
+        /// And use setting from query context.
+        /// It works, because queries with the same set of settings are already grouped together.
+        if (data->size_in_bytes > key.settings.async_insert_max_data_size)
+        {
+            data_to_process = std::move(data);
+            shard.iterators.erase(it);
+            shard.queue.erase(queue_it);
+        }
+
+        CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
+        ProfileEvents::increment(ProfileEvents::AsyncInsertQuery);
+        ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size);
     }
 
-    std::lock_guard write_lock(rwlock);
-    auto it = queue.emplace(key, std::make_shared<Container>()).first;
-    pushImpl(std::move(entry), it);
+    if (data_to_process)
+        scheduleDataProcessingJob(key, std::move(data_to_process), getContext());
+    else
+        shard.are_tasks_available.notify_one();
+
+    return insert_future;
 }
 
-void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator it)
+void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
 {
-    auto & [data_mutex, data] = *it->second;
-    std::lock_guard data_lock(data_mutex);
+    auto & shard = queue_shards[shard_num];
 
-    if (!data)
-    {
-        auto now = std::chrono::steady_clock::now();
-        data = std::make_unique<InsertData>(now);
-
-        std::lock_guard lock(deadline_mutex);
-        deadline_queue.insert({now + Milliseconds{it->first.settings.async_insert_busy_timeout_ms}, it});
-        are_tasks_available.notify_one();
-    }
-
-    size_t entry_data_size = entry->bytes.size();
-
-    data->size += entry_data_size;
-    data->entries.emplace_back(entry);
-
-    {
-        std::lock_guard currently_processing_lock(currently_processing_mutex);
-        currently_processing_queries.emplace(entry->query_id, entry);
-    }
-
-    LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'",
-        data->entries.size(), data->size, queryToString(it->first.query));
-
-    /// Here we check whether we hit the limit on maximum data size in the buffer.
-    /// And use setting from query context!
-    /// It works, because queries with the same set of settings are already grouped together.
-    if (data->size > it->first.settings.async_insert_max_data_size)
-        scheduleDataProcessingJob(it->first, std::move(data), getContext());
-
-    CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
-    ProfileEvents::increment(ProfileEvents::AsyncInsertQuery);
-    ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size);
-}
-
-void AsynchronousInsertQueue::waitForProcessingQuery(const String & query_id, const Milliseconds & timeout)
-{
-    InsertData::EntryPtr entry;
-
-    {
-        std::lock_guard lock(currently_processing_mutex);
-        auto it = currently_processing_queries.find(query_id);
-        if (it == currently_processing_queries.end())
-            return;
-
-        entry = it->second;
-    }
-
-    bool finished = entry->wait(timeout);
-
-    if (!finished)
-        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout.count());
-
-    if (auto exception = entry->getException())
-        std::rethrow_exception(exception);
-}
-
-void AsynchronousInsertQueue::busyCheck()
-{
     while (!shutdown)
     {
-        std::vector<QueueIterator> entries_to_flush;
+        std::vector<Container> entries_to_flush;
         {
-            std::unique_lock deadline_lock(deadline_mutex);
-            are_tasks_available.wait_for(deadline_lock, Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [this]()
+            std::unique_lock lock(shard.mutex);
+
+            shard.are_tasks_available.wait_for(lock,
+                Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [&shard, this]
             {
                 if (shutdown)
                     return true;
 
-                if (!deadline_queue.empty() && deadline_queue.begin()->first < std::chrono::steady_clock::now())
+                if (!shard.queue.empty() && shard.queue.begin()->first < std::chrono::steady_clock::now())
                     return true;
 
                 return false;
@@ -317,91 +293,22 @@ void AsynchronousInsertQueue::busyCheck()
 
             while (true)
             {
-                if (deadline_queue.empty() || deadline_queue.begin()->first > now)
+                if (shard.queue.empty() || shard.queue.begin()->first > now)
                     break;
 
-                entries_to_flush.emplace_back(deadline_queue.begin()->second);
-                deadline_queue.erase(deadline_queue.begin());
+                auto it = shard.queue.begin();
+                shard.iterators.erase(it->second.key.hash);
+
+                entries_to_flush.emplace_back(std::move(it->second));
+                shard.queue.erase(it);
             }
         }
 
-        std::shared_lock read_lock(rwlock);
         for (auto & entry : entries_to_flush)
-        {
-            auto & [key, elem] = *entry;
-            std::lock_guard data_lock(elem->mutex);
-            if (!elem->data)
-                continue;
-
-            scheduleDataProcessingJob(key, std::move(elem->data), getContext());
-        }
+            scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext());
     }
 }
 
-void AsynchronousInsertQueue::cleanup()
-{
-    while (true)
-    {
-        {
-            std::unique_lock cleanup_lock(cleanup_mutex);
-            cleanup_can_run.wait_for(cleanup_lock, Milliseconds(cleanup_timeout), [this]() -> bool { return shutdown; });
-
-            if (shutdown)
-                return;
-        }
-
-        std::vector<InsertQuery> keys_to_remove;
-
-        {
-            std::shared_lock read_lock(rwlock);
-
-            for (auto & [key, elem] : queue)
-            {
-                std::lock_guard data_lock(elem->mutex);
-                if (!elem->data)
-                    keys_to_remove.push_back(key);
-            }
-        }
-
-        if (!keys_to_remove.empty())
-        {
-            std::lock_guard write_lock(rwlock);
-            size_t total_removed = 0;
-
-            for (const auto & key : keys_to_remove)
-            {
-                auto it = queue.find(key);
-                if (it != queue.end() && !it->second->data)
-                {
-                    queue.erase(it);
-                    ++total_removed;
-                }
-            }
-
-            if (total_removed)
-                LOG_TRACE(log, "Removed stale entries for {} queries from asynchronous insertion queue", total_removed);
-        }
-
-        {
-            std::vector<String> ids_to_remove;
-            std::lock_guard lock(currently_processing_mutex);
-
-            for (const auto & [query_id, entry] : currently_processing_queries)
-                if (entry->isFinished())
-                    ids_to_remove.push_back(query_id);
-
-            if (!ids_to_remove.empty())
-            {
-                for (const auto & id : ids_to_remove)
-                    currently_processing_queries.erase(id);
-
-                LOG_TRACE(log, "Removed {} finished entries from asynchronous insertion queue", ids_to_remove.size());
-            }
-        }
-    }
-}
-
-
 static void appendElementsToLogSafe(
     AsynchronousInsertLog & log,
     std::vector<AsynchronousInsertLogElement> elements,
@@ -464,7 +371,7 @@ try
     {
         current_exception = e.displayText();
         LOG_ERROR(log, "Failed parsing for query '{}' with query id {}. {}",
-            queryToString(key.query), current_entry->query_id, current_exception);
+            key.query_str, current_entry->query_id, current_exception);
 
         for (const auto & column : result_columns)
             if (column->size() > total_rows)
@@ -546,7 +453,7 @@ try
         completed_executor.execute();
 
         LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'",
-            total_rows, total_bytes, queryToString(key.query));
+            total_rows, total_bytes, key.query_str);
     }
     catch (...)
     {
diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h
index fcf4e3d98d2..71a3bce235e 100644
--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@@ -4,10 +4,7 @@
 #include <Common/ThreadPool.h>
 #include <Core/Settings.h>
 #include <Poco/Logger.h>
-
-#include <atomic>
-#include <unordered_map>
-
+#include <future>
 
 namespace DB
 {
@@ -19,25 +16,29 @@ class AsynchronousInsertQueue : public WithContext
 public:
     using Milliseconds = std::chrono::milliseconds;
 
-    AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout);
+    AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_);
     ~AsynchronousInsertQueue();
 
-    void push(ASTPtr query, ContextPtr query_context);
-    void waitForProcessingQuery(const String & query_id, const Milliseconds & timeout);
+    std::future<void> push(ASTPtr query, ContextPtr query_context);
+    size_t getPoolSize() const { return pool_size; }
 
 private:
 
     struct InsertQuery
     {
+    public:
         ASTPtr query;
+        String query_str;
         Settings settings;
+        UInt128 hash;
 
         InsertQuery(const ASTPtr & query_, const Settings & settings_);
         InsertQuery(const InsertQuery & other);
         InsertQuery & operator=(const InsertQuery & other);
-
         bool operator==(const InsertQuery & other) const;
-        struct Hash { UInt64 operator()(const InsertQuery & insert_query) const; };
+
+    private:
+        UInt128 calculateHash() const;
     };
 
     struct InsertData
@@ -47,109 +48,84 @@ private:
         public:
             const String bytes;
             const String query_id;
-            std::chrono::time_point<std::chrono::system_clock> create_time;
+            const std::chrono::time_point<std::chrono::system_clock> create_time;
 
             Entry(String && bytes_, String && query_id_);
 
             void finish(std::exception_ptr exception_ = nullptr);
-            bool wait(const Milliseconds & timeout) const;
-            bool isFinished() const;
-            std::exception_ptr getException() const;
+            std::future<void> getFuture() { return promise.get_future(); }
+            bool isFinished() const { return finished; }
 
         private:
-            mutable std::mutex mutex;
-            mutable std::condition_variable cv;
-
-            bool finished = false;
-            std::exception_ptr exception;
+            std::promise<void> promise;
+            std::atomic_bool finished = false;
         };
 
-        explicit InsertData(std::chrono::steady_clock::time_point now)
-            : first_update(now)
-        {}
-
         using EntryPtr = std::shared_ptr<Entry>;
 
         std::list<EntryPtr> entries;
-        size_t size = 0;
-
-        /// Timestamp of the first insert into queue, or after the last queue dump.
-        /// Used to detect for how long the queue is active, so we can dump it by timer.
-        std::chrono::time_point<std::chrono::steady_clock> first_update;
+        size_t size_in_bytes = 0;
     };
 
     using InsertDataPtr = std::unique_ptr<InsertData>;
 
-    /// A separate container, that holds a data and a mutex for it.
-    /// When it's needed to process current chunk of data, it can be moved for processing
-    /// and new data can be recreated without holding a lock during processing.
     struct Container
     {
-        std::mutex mutex;
+        InsertQuery key;
         InsertDataPtr data;
     };
 
-    using Queue = std::unordered_map<InsertQuery, std::shared_ptr<Container>, InsertQuery::Hash>;
-    using QueueIterator = Queue::iterator;
     /// Ordered container
-    using DeadlineQueue = std::map<std::chrono::steady_clock::time_point, QueueIterator>;
+    /// Key is a timestamp of the first insert into batch.
+    /// Used to detect for how long the batch is active, so we can dump it by timer.
+    using Queue = std::map<std::chrono::steady_clock::time_point, Container>;
+    using QueueIterator = Queue::iterator;
+    using QueueIteratorByKey = std::unordered_map<UInt128, QueueIterator>;
 
+    struct QueueShard
+    {
+        mutable std::mutex mutex;
+        mutable std::condition_variable are_tasks_available;
 
-    mutable std::shared_mutex rwlock;
-    Queue queue;
+        Queue queue;
+        QueueIteratorByKey iterators;
+    };
 
-    /// This is needed only for using inside cleanup() function and correct signaling about shutdown
-    mutable std::mutex cleanup_mutex;
-    mutable std::condition_variable cleanup_can_run;
-
-    mutable std::mutex deadline_mutex;
-    mutable std::condition_variable are_tasks_available;
-    DeadlineQueue deadline_queue;
-
-    using QueryIdToEntry = std::unordered_map<String, InsertData::EntryPtr>;
-    mutable std::mutex currently_processing_mutex;
-    QueryIdToEntry currently_processing_queries;
+    const size_t pool_size;
+    std::vector<QueueShard> queue_shards;
 
     /// Logic and events behind queue are as follows:
-    ///  - busy_timeout:   if queue is active for too long and there are a lot of rapid inserts, then we dump the data, so it doesn't
-    ///                    grow for a long period of time and users will be able to select new data in deterministic manner.
-    ///  - stale_timeout:  if queue is stale for too long, then we dump the data too, so that users will be able to select the last
-    ///                    piece of inserted data.
+    ///  - async_insert_busy_timeout_ms:
+    ///   if queue is active for too long and there are a lot of rapid inserts, then we dump the data, so it doesn't
+    ///   grow for a long period of time and users will be able to select new data in deterministic manner.
     ///
-    /// During processing incoming INSERT queries we can also check whether the maximum size of data in buffer is reached (async_insert_max_data_size setting)
-    /// If so, then again we dump the data.
-
-    const Milliseconds cleanup_timeout;
+    /// During processing incoming INSERT queries we can also check whether the maximum size of data in buffer is reached
+    /// (async_insert_max_data_size setting). If so, then again we dump the data.
 
     std::atomic<bool> shutdown{false};
 
-    ThreadPool pool;  /// dump the data only inside this pool.
-    ThreadFromGlobalPool dump_by_first_update_thread;  /// uses busy_timeout and busyCheck()
-    ThreadFromGlobalPool cleanup_thread;               /// uses busy_timeout and cleanup()
+    /// Dump the data only inside this pool.
+    ThreadPool pool;
+
+    /// Uses async_insert_busy_timeout_ms and processBatchDeadlines()
+    std::vector<ThreadFromGlobalPool> dump_by_first_update_threads;
 
     Poco::Logger * log = &Poco::Logger::get("AsynchronousInsertQueue");
 
-    void busyCheck();
-    void cleanup();
-
-    /// Should be called with shared or exclusively locked 'rwlock'.
-    void pushImpl(InsertData::EntryPtr entry, QueueIterator it);
-
+    void processBatchDeadlines(size_t shard_num);
     void scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context);
+
     static void processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context);
 
     template <typename E>
     static void finishWithException(const ASTPtr & query, const std::list<InsertData::EntryPtr> & entries, const E & exception);
 
-    /// @param timeout - time to wait
-    /// @return true if shutdown requested
-    bool waitForShutdown(const Milliseconds & timeout);
-
 public:
-    auto getQueueLocked() const
+    auto getQueueLocked(size_t shard_num) const
     {
-        std::shared_lock lock(rwlock);
-        return std::make_pair(std::ref(queue), std::move(lock));
+        auto & shard = queue_shards[shard_num];
+        std::unique_lock lock(shard.mutex);
+        return std::make_pair(std::ref(shard.queue), std::move(lock));
     }
 };
 
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 4653491aac9..2e2f886a50a 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -174,18 +174,15 @@ void SelectStreamFactory::createForShard(
 }
 
 
-SelectStreamFactory::ShardPlans SelectStreamFactory::createForShardWithParallelReplicas(
+void SelectStreamFactory::createForShardWithParallelReplicas(
     const Cluster::ShardInfo & shard_info,
     const ASTPtr & query_ast,
     const StorageID & main_table,
-    const ASTPtr & table_function_ptr,
-    const ThrottlerPtr & throttler,
     ContextPtr context,
     UInt32 shard_count,
-    const std::shared_ptr<const StorageLimitsList> & storage_limits)
+    std::vector<QueryPlanPtr> & local_plans,
+    Shards & remote_shards)
 {
-    SelectStreamFactory::ShardPlans result;
-
     if (auto it = objects_by_shard.find(shard_info.shard_num); it != objects_by_shard.end())
         replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast);
 
@@ -213,8 +210,6 @@ SelectStreamFactory::ShardPlans SelectStreamFactory::createForShardWithParallelR
     size_t all_replicas_count = shard_info.getRemoteNodeCount();
 
     auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>();
-    auto remote_plan = std::make_unique<QueryPlan>();
-
 
     if (settings.prefer_localhost_replica && shard_info.isLocal())
     {
@@ -223,48 +218,22 @@ SelectStreamFactory::ShardPlans SelectStreamFactory::createForShardWithParallelR
         {
             ++all_replicas_count;
 
-            result.local_plan = createLocalPlan(
-                query_ast, header, context, processed_stage, shard_info.shard_num, shard_count, next_replica_number, all_replicas_count, coordinator);
+            local_plans.emplace_back(createLocalPlan(
+                query_ast, header, context, processed_stage, shard_info.shard_num, shard_count, next_replica_number, all_replicas_count, coordinator));
 
             ++next_replica_number;
         }
     }
 
-    Scalars scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
-    scalars.emplace(
-        "_shard_count", Block{{DataTypeUInt32().createColumnConst(1, shard_count), std::make_shared<DataTypeUInt32>(), "_shard_count"}});
-    auto external_tables = context->getExternalTables();
-
-    auto shard = Shard{
-        .query = query_ast,
-        .header = header,
-        .shard_info = shard_info,
-        .lazy = false,
-        .local_delay = 0,
-    };
-
     if (shard_info.hasRemoteConnections())
-    {
-        auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
-            coordinator,
-            shard,
-            header,
-            processed_stage,
-            main_table,
-            table_function_ptr,
-            context,
-            throttler,
-            std::move(scalars),
-            std::move(external_tables),
-            &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"),
-            storage_limits);
-
-        remote_plan->addStep(std::move(read_from_remote));
-        remote_plan->addInterpreterContext(context);
-        result.remote_plan = std::move(remote_plan);
-    }
-
-    return result;
+        remote_shards.emplace_back(Shard{
+            .query = query_ast,
+            .header = header,
+            .shard_info = shard_info,
+            .lazy = false,
+            .local_delay = 0,
+            .coordinator = coordinator,
+        });
 }
 
 }
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
index 8ebddea4988..a8f7d131b15 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@@ -1,12 +1,13 @@
 #pragma once
 
-#include <Core/QueryProcessingStage.h>
-#include <Interpreters/StorageID.h>
-#include <Storages/IStorage_fwd.h>
-#include <Storages/StorageSnapshot.h>
 #include <Client/ConnectionPool.h>
+#include <Core/QueryProcessingStage.h>
 #include <Interpreters/Cluster.h>
+#include <Interpreters/StorageID.h>
 #include <Parsers/IAST.h>
+#include <Storages/IStorage_fwd.h>
+#include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
+#include <Storages/StorageSnapshot.h>
 
 namespace DB
 {
@@ -47,6 +48,9 @@ public:
         /// (When there is a local replica with big delay).
         bool lazy = false;
         time_t local_delay = 0;
+
+        /// Set only if parallel reading from replicas is used.
+        std::shared_ptr<ParallelReplicasReadingCoordinator> coordinator;
     };
 
     using Shards = std::vector<Shard>;
@@ -76,16 +80,14 @@ public:
         std::unique_ptr<QueryPlan> remote_plan;
     };
 
-    ShardPlans createForShardWithParallelReplicas(
+    void createForShardWithParallelReplicas(
         const Cluster::ShardInfo & shard_info,
         const ASTPtr & query_ast,
         const StorageID & main_table,
-        const ASTPtr & table_function_ptr,
-        const ThrottlerPtr & throttler,
         ContextPtr context,
         UInt32 shard_count,
-        const std::shared_ptr<const StorageLimitsList> & storage_limits
-    );
+        std::vector<QueryPlanPtr> & local_plans,
+        Shards & remote_shards);
 
 private:
     const Block header;
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index e9ec38f3806..6f5de6d6e5a 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -1,19 +1,45 @@
-#include <Interpreters/ClusterProxy/executeQuery.h>
-#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
+#include <Core/QueryProcessingStage.h>
 #include <Core/Settings.h>
-#include <Interpreters/Context.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/Cluster.h>
+#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
+#include <Interpreters/ClusterProxy/executeQuery.h>
+#include <Interpreters/Context.h>
 #include <Interpreters/IInterpreter.h>
-#include <Interpreters/ProcessList.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
-#include <QueryPipeline/Pipe.h>
 #include <Parsers/queryToString.h>
+#include <Interpreters/ProcessList.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/ReadFromRemote.h>
 #include <Processors/QueryPlan/UnionStep.h>
+#include <QueryPipeline/Pipe.h>
 #include <Storages/SelectQueryInfo.h>
-#include <DataTypes/DataTypesNumber.h>
 
+using namespace DB;
+
+namespace
+{
+
+/// We determine output stream sort properties by a local plan (local because otherwise table could be unknown).
+/// If no local shard exist for this cluster, no sort properties will be provided, c'est la vie.
+auto getRemoteShardsOutputStreamSortingProperties(const std::vector<QueryPlanPtr> & plans, ContextMutablePtr context)
+{
+    SortDescription sort_description;
+    DataStream::SortScope sort_scope = DataStream::SortScope::None;
+    if (!plans.empty())
+    {
+        if (const auto * step = dynamic_cast<const ITransformingStep *>(plans.front()->getRootNode()->step.get());
+            step && step->getDataStreamTraits().can_enforce_sorting_properties_in_distributed_query)
+        {
+            step->adjustSettingsToEnforceSortingPropertiesInDistributedQuery(context);
+            sort_description = step->getOutputStream().sort_description;
+            sort_scope = step->getOutputStream().sort_scope;
+        }
+    }
+    return std::make_pair(sort_description, sort_scope);
+}
+}
 
 namespace DB
 {
@@ -190,6 +216,8 @@ void executeQuery(
             "_shard_count", Block{{DataTypeUInt32().createColumnConst(1, shards), std::make_shared<DataTypeUInt32>(), "_shard_count"}});
         auto external_tables = context->getExternalTables();
 
+        auto && [sort_description, sort_scope] = getRemoteShardsOutputStreamSortingProperties(plans, new_context);
+
         auto plan = std::make_unique<QueryPlan>();
         auto read_from_remote = std::make_unique<ReadFromRemote>(
             std::move(remote_shards),
@@ -203,7 +231,9 @@ void executeQuery(
             std::move(external_tables),
             log,
             shards,
-            query_info.storage_limits);
+            query_info.storage_limits,
+            std::move(sort_description),
+            std::move(sort_scope));
 
         read_from_remote->setStepDescription("Read from remote replica");
         plan->addStep(std::move(read_from_remote));
@@ -235,10 +265,13 @@ void executeQueryWithParallelReplicas(
     const StorageID & main_table,
     const ASTPtr & table_func_ptr,
     SelectStreamFactory & stream_factory,
-    const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
+    const ASTPtr & query_ast,
+    ContextPtr context,
+    const SelectQueryInfo & query_info,
     const ExpressionActionsPtr & sharding_key_expr,
     const std::string & sharding_key_column_name,
-    const ClusterPtr & not_optimized_cluster)
+    const ClusterPtr & not_optimized_cluster,
+    QueryProcessingStage::Enum processed_stage)
 {
     const Settings & settings = context->getSettingsRef();
 
@@ -261,6 +294,7 @@ void executeQueryWithParallelReplicas(
 
 
     std::vector<QueryPlanPtr> plans;
+    SelectStreamFactory::Shards remote_shards;
     size_t shards = query_info.getCluster()->getShardCount();
 
     for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
@@ -283,18 +317,43 @@ void executeQueryWithParallelReplicas(
         else
             query_ast_for_shard = query_ast;
 
-        auto shard_plans = stream_factory.createForShardWithParallelReplicas(shard_info,
-            query_ast_for_shard, main_table, table_func_ptr, throttler, context,
-            static_cast<UInt32>(shards), query_info.storage_limits);
+        stream_factory.createForShardWithParallelReplicas(
+            shard_info, query_ast_for_shard, main_table, context, static_cast<UInt32>(shards), plans, remote_shards);
+    }
 
-        if (!shard_plans.local_plan && !shard_plans.remote_plan)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "No plans were generated for reading from shard. This is a bug");
+    Scalars scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
+    scalars.emplace(
+        "_shard_count", Block{{DataTypeUInt32().createColumnConst(1, shards), std::make_shared<DataTypeUInt32>(), "_shard_count"}});
+    auto external_tables = context->getExternalTables();
 
-        if (shard_plans.local_plan)
-            plans.emplace_back(std::move(shard_plans.local_plan));
+    if (!remote_shards.empty())
+    {
+        auto new_context = Context::createCopy(context);
+        auto && [sort_description, sort_scope] = getRemoteShardsOutputStreamSortingProperties(plans, new_context);
 
-        if (shard_plans.remote_plan)
-            plans.emplace_back(std::move(shard_plans.remote_plan));
+        for (const auto & shard : remote_shards)
+        {
+            auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
+                shard.coordinator,
+                shard,
+                shard.header,
+                processed_stage,
+                main_table,
+                table_func_ptr,
+                new_context,
+                throttler,
+                scalars,
+                external_tables,
+                &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"),
+                query_info.storage_limits,
+                sort_description,
+                sort_scope);
+
+            auto remote_plan = std::make_unique<QueryPlan>();
+            remote_plan->addStep(std::move(read_from_remote));
+            remote_plan->addInterpreterContext(new_context);
+            plans.emplace_back(std::move(remote_plan));
+        }
     }
 
     if (plans.empty())
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index ac88752ce74..662fe47ca65 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -58,11 +58,13 @@ void executeQueryWithParallelReplicas(
     const StorageID & main_table,
     const ASTPtr & table_func_ptr,
     SelectStreamFactory & stream_factory,
-    const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
+    const ASTPtr & query_ast,
+    ContextPtr context,
+    const SelectQueryInfo & query_info,
     const ExpressionActionsPtr & sharding_key_expr,
     const std::string & sharding_key_column_name,
-    const ClusterPtr & not_optimized_cluster);
-
+    const ClusterPtr & not_optimized_cluster,
+    QueryProcessingStage::Enum processed_stage);
 }
 
 }
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index f53985acdae..0d8a414b3bc 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -24,6 +24,7 @@
 #include <Storages/IStorage.h>
 #include <Storages/MarkCache.h>
 #include <Storages/MergeTree/MergeList.h>
+#include <Storages/MergeTree/MovesList.h>
 #include <Storages/MergeTree/ReplicatedFetchList.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeSettings.h>
@@ -233,6 +234,7 @@ struct ContextSharedPart : boost::noncopyable
     ProcessList process_list;                               /// Executing queries at the moment.
     GlobalOvercommitTracker global_overcommit_tracker;
     MergeList merge_list;                                   /// The list of executable merge (for (Replicated)?MergeTree)
+    MovesList moves_list;                                   /// The list of executing moves (for (Replicated)?MergeTree)
     ReplicatedFetchList replicated_fetch_list;
     ConfigurationPtr users_config;                          /// Config with the users, profiles and quotas sections.
     InterserverIOHandler interserver_io_handler;            /// Handler for interserver communication.
@@ -641,6 +643,8 @@ const ProcessList & Context::getProcessList() const { return shared->process_lis
 OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; }
 MergeList & Context::getMergeList() { return shared->merge_list; }
 const MergeList & Context::getMergeList() const { return shared->merge_list; }
+MovesList & Context::getMovesList() { return shared->moves_list; }
+const MovesList & Context::getMovesList() const { return shared->moves_list; }
 ReplicatedFetchList & Context::getReplicatedFetchList() { return shared->replicated_fetch_list; }
 const ReplicatedFetchList & Context::getReplicatedFetchList() const { return shared->replicated_fetch_list; }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index a0b62da364e..bc89ce36edc 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -63,6 +63,7 @@ using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
 class InterserverIOHandler;
 class BackgroundSchedulePool;
 class MergeList;
+class MovesList;
 class ReplicatedFetchList;
 class Cluster;
 class Compiler;
@@ -775,6 +776,9 @@ public:
     MergeList & getMergeList();
     const MergeList & getMergeList() const;
 
+    MovesList & getMovesList();
+    const MovesList & getMovesList() const;
+
     ReplicatedFetchList & getReplicatedFetchList();
     const ReplicatedFetchList & getReplicatedFetchList() const;
 
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 980e8f6e7b6..1c551dc89e0 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -15,7 +15,6 @@
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTQueryWithTableAndOutput.h>
 #include <Databases/DatabaseReplicated.h>
-#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
 
 
 namespace DB
@@ -174,7 +173,7 @@ void DDLTaskBase::formatRewrittenQuery(ContextPtr context)
 {
     /// Convert rewritten AST back to string.
     query_str = queryToString(*query);
-    query_for_logging = maskSensitiveInfoInQueryForLogging(query_str, query, context);
+    query_for_logging = query->formatForLogging(context->getSettingsRef().log_queries_cut_to_length);
 }
 
 ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/)
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 7ceb0bf3a00..7c78d08fa24 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -1015,7 +1015,7 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table)
     for (const auto & [disk_name, disk] : getContext()->getDisksMap())
     {
         String data_path = "store/" + getPathForUUID(table.table_id.uuid);
-        if (!disk->exists(data_path) || disk->isReadOnly())
+        if (disk->isReadOnly() || !disk->exists(data_path))
             continue;
 
         LOG_INFO(log, "Removing data directory {} of dropped table {} from disk {}", data_path, table.table_id.getNameForLogs(), disk_name);
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 808637d3171..56730e70a3c 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1832,9 +1832,22 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
         if (storage && (query.sampleSize() || settings.parallel_replicas_count > 1))
         {
-            Names columns_for_sampling = metadata_snapshot->getColumnsRequiredForSampling();
-            additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
-                columns_for_sampling.begin(), columns_for_sampling.end());
+            // we evaluate sampling for Merge lazily so we need to get all the columns
+            if (storage->getName() == "Merge")
+            {
+                const auto columns = metadata_snapshot->getColumns().getAll();
+
+                for (const auto & column : columns)
+                {
+                    additional_required_columns_after_prewhere.push_back(column.name);
+                }
+            }
+            else
+            {
+                Names columns_for_sampling = metadata_snapshot->getColumnsRequiredForSampling();
+                additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
+                    columns_for_sampling.begin(), columns_for_sampling.end());
+            }
         }
 
         if (storage && query.final())
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 82f635017c9..22edac051a5 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -22,6 +22,9 @@
 #include <Storages/PartitionCommands.h>
 #include <Common/typeid_cast.h>
 
+#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
+#include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
+
 #include <boost/range/algorithm_ext/push_back.hpp>
 
 #include <algorithm>
@@ -66,6 +69,9 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
 {
     BlockIO res;
 
+    if (!UserDefinedSQLFunctionFactory::instance().empty())
+        UserDefinedSQLFunctionVisitor::visit(query_ptr);
+
     if (!alter.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext()))
     {
         DDLQueryOnClusterParams params;
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 50686936c2d..06abb6a9ec1 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -71,6 +71,9 @@
 #include <Common/logger_useful.h>
 #include <DataTypes/DataTypeFixedString.h>
 
+#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
+#include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
+
 
 #define MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS 256
 
@@ -368,6 +371,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns,
         const char * alias_end = alias_pos + alias.size();
         ParserExpression expression_parser;
         column_declaration->default_expression = parseQuery(expression_parser, alias_pos, alias_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+        column_declaration->children.push_back(column_declaration->default_expression);
 
         columns_list->children.emplace_back(column_declaration);
     }
@@ -1156,6 +1160,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         visitor.visit(*create.columns_list);
     }
 
+    // substitute possible UDFs with their definitions
+    if (!UserDefinedSQLFunctionFactory::instance().empty())
+        UserDefinedSQLFunctionVisitor::visit(query_ptr);
+
     /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way.
     TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create);
 
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 28f8e43ee9b..2fc733f5608 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -7,6 +7,7 @@
 #include <Access/Common/AccessRightsElement.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Storages/IStorage.h>
+#include <Storages/MergeTree/MergeTreeData.h>
 #include <Common/escapeForFileName.h>
 #include <Common/quoteString.h>
 #include <Common/typeid_cast.h>
@@ -120,6 +121,8 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
     auto [database, table] = query.if_exists ? DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context_)
                                              : DatabaseCatalog::instance().getDatabaseAndTable(table_id, context_);
 
+    checkStorageSupportsTransactionsIfNeeded(table, context_);
+
     if (database && table)
     {
         auto & ast_drop_query = query.as<ASTDropQuery &>();
@@ -207,18 +210,15 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
 
             table->checkTableCanBeDropped();
 
-            TableExclusiveLockHolder table_lock;
-            /// We don't need this lock for ReplicatedMergeTree
-            if (!table->supportsReplication())
-            {
-                /// And for simple MergeTree we can stop merges before acquiring the lock
-                auto merges_blocker = table->getActionLock(ActionLocks::PartsMerge);
-                table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout);
-            }
+            TableExclusiveLockHolder table_excl_lock;
+            /// We don't need any lock for ReplicatedMergeTree and for simple MergeTree
+            /// For the rest of tables types exclusive lock is needed
+            if (!std::dynamic_pointer_cast<MergeTreeData>(table))
+                table_excl_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout);
 
             auto metadata_snapshot = table->getInMemoryMetadataPtr();
             /// Drop table data, don't touch metadata
-            table->truncate(query_ptr, metadata_snapshot, context_, table_lock);
+            table->truncate(query_ptr, metadata_snapshot, context_, table_excl_lock);
         }
         else if (query.kind == ASTDropQuery::Kind::Drop)
         {
@@ -464,4 +464,16 @@ void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr
     }
 }
 
+bool InterpreterDropQuery::supportsTransactions() const
+{
+    /// Enable only for truncate table with MergeTreeData engine
+
+    auto & drop = query_ptr->as<ASTDropQuery &>();
+
+    return drop.cluster.empty()
+            && !drop.temporary
+            && drop.kind == ASTDropQuery::Kind::Truncate
+            && drop.table;
+}
+
 }
diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h
index 2b65039954b..edd84471c22 100644
--- a/src/Interpreters/InterpreterDropQuery.h
+++ b/src/Interpreters/InterpreterDropQuery.h
@@ -28,6 +28,8 @@ public:
 
     static void executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, const StorageID & target_table_id, bool sync);
 
+    bool supportsTransactions() const override;
+
 private:
     AccessRightsElements getRequiredAccessForDDLOnCluster() const;
     ASTPtr query_ptr;
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 07d273f7ebc..c7bf0e9f876 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -165,7 +165,7 @@ struct QueryASTSettings
 
 struct QueryTreeSettings
 {
-    bool run_passes = false;
+    bool run_passes = true;
     bool dump_passes = false;
     bool dump_ast = false;
     Int64 passes = -1;
diff --git a/src/Interpreters/InterpreterExternalDDLQuery.cpp b/src/Interpreters/InterpreterExternalDDLQuery.cpp
index 61fbc34784f..5c06ab4b818 100644
--- a/src/Interpreters/InterpreterExternalDDLQuery.cpp
+++ b/src/Interpreters/InterpreterExternalDDLQuery.cpp
@@ -13,6 +13,7 @@
 #    include <Interpreters/MySQL/InterpretersMySQLDDLQuery.h>
 #    include <Parsers/MySQL/ASTAlterQuery.h>
 #    include <Parsers/MySQL/ASTCreateQuery.h>
+#    include <Parsers/MySQL/ASTDropQuery.h>
 #endif
 
 namespace DB
@@ -44,7 +45,7 @@ BlockIO InterpreterExternalDDLQuery::execute()
         if (arguments.size() != 2 || !arguments[0]->as<ASTIdentifier>() || !arguments[1]->as<ASTIdentifier>())
             throw Exception("MySQL External require two identifier arguments.", ErrorCodes::BAD_ARGUMENTS);
 
-        if (external_ddl_query.external_ddl->as<ASTDropQuery>())
+        if (external_ddl_query.external_ddl->as<MySQLParser::ASTDropQuery>())
             return MySQLInterpreter::InterpreterMySQLDropQuery(
                 external_ddl_query.external_ddl, getContext(), getIdentifierName(arguments[0]),
                 getIdentifierName(arguments[1])).execute();
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 56a7e3d6996..07d5dcbbbfb 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -72,26 +72,28 @@
 
 #include <Storages/IStorage.h>
 #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
+#include <Storages/StorageDistributed.h>
 #include <Storages/StorageValues.h>
 #include <Storages/StorageView.h>
 
-#include <Functions/IFunction.h>
+#include <Columns/Collator.h>
+#include <Core/ColumnNumbers.h>
 #include <Core/Field.h>
 #include <Core/ProtocolDefines.h>
-#include <base/types.h>
-#include <base/sort.h>
-#include <Columns/Collator.h>
-#include <Common/FieldVisitorsAccurateComparison.h>
-#include <Common/FieldVisitorToString.h>
-#include <Common/typeid_cast.h>
-#include <Common/checkStackSize.h>
-#include <Core/ColumnNumbers.h>
+#include <Functions/IFunction.h>
 #include <Interpreters/Aggregator.h>
+#include <Interpreters/Cluster.h>
 #include <Interpreters/IJoin.h>
 #include <QueryPipeline/SizeLimits.h>
 #include <base/map.h>
+#include <base/sort.h>
+#include <base/types.h>
+#include <Common/FieldVisitorToString.h>
+#include <Common/FieldVisitorsAccurateComparison.h>
+#include <Common/checkStackSize.h>
 #include <Common/scope_guard_safe.h>
 #include <Parsers/FunctionParameterValuesVisitor.h>
+#include <Common/typeid_cast.h>
 
 namespace DB
 {
@@ -1092,6 +1094,9 @@ static InterpolateDescriptionPtr getInterpolateDescription(
 
 static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
 {
+    if (!query.groupBy())
+        return {};
+
     SortDescription order_descr;
     order_descr.reserve(query.groupBy()->children.size());
 
@@ -1764,7 +1769,8 @@ static void executeMergeAggregatedImpl(
     const Settings & settings,
     const NamesAndTypesList & aggregation_keys,
     const AggregateDescriptions & aggregates,
-    bool should_produce_results_in_order_of_bucket_number)
+    bool should_produce_results_in_order_of_bucket_number,
+    SortDescription group_by_sort_description)
 {
     auto keys = aggregation_keys.getNames();
     if (has_grouping_sets)
@@ -1794,7 +1800,11 @@ static void executeMergeAggregatedImpl(
         settings.distributed_aggregation_memory_efficient && is_remote_storage,
         settings.max_threads,
         settings.aggregation_memory_efficient_merge_threads,
-        should_produce_results_in_order_of_bucket_number);
+        should_produce_results_in_order_of_bucket_number,
+        settings.max_block_size,
+        settings.aggregation_in_order_max_block_bytes,
+        std::move(group_by_sort_description),
+        settings.enable_memory_bound_merging_of_aggregation_results);
 
     query_plan.addStep(std::move(merging_aggregated));
 }
@@ -1858,6 +1868,9 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(
             // Let's just choose the safe option since we don't know the value of `to_stage` here.
             const bool should_produce_results_in_order_of_bucket_number = true;
 
+            // It is used to determine if we should use memory bound merging strategy. Maybe it makes sense for projections, but so far this case is just left untouched.
+            SortDescription group_by_sort_description;
+
             executeMergeAggregatedImpl(
                 query_plan,
                 query_info.projection->aggregate_overflow_row,
@@ -1867,7 +1880,8 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(
                 context_->getSettingsRef(),
                 query_info.projection->aggregation_keys,
                 query_info.projection->aggregate_descriptions,
-                should_produce_results_in_order_of_bucket_number);
+                should_produce_results_in_order_of_bucket_number,
+                std::move(group_by_sort_description));
         }
     }
 }
@@ -2470,6 +2484,26 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
     else
         group_by_info = nullptr;
 
+    if (!group_by_info && settings.force_aggregation_in_order)
+    {
+        /// Not the most optimal implementation here, but this branch handles very marginal case.
+
+        group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
+
+        auto sorting_step = std::make_unique<SortingStep>(
+            query_plan.getCurrentDataStream(),
+            group_by_sort_description,
+            0 /* LIMIT */,
+            SortingStep::Settings(*context),
+            settings.optimize_sorting_by_input_stream_properties);
+        sorting_step->setStepDescription("Enforced sorting for aggregation in order");
+
+        query_plan.addStep(std::move(sorting_step));
+
+        group_by_info = std::make_shared<InputOrderInfo>(
+            group_by_sort_description, group_by_sort_description.size(), 1 /* direction */, 0 /* limit */);
+    }
+
     auto merge_threads = max_streams;
     auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads
         ? static_cast<size_t>(settings.aggregation_memory_efficient_merge_threads)
@@ -2477,8 +2511,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
 
     bool storage_has_evenly_distributed_read = storage && storage->hasEvenlyDistributedRead();
 
-    const bool should_produce_results_in_order_of_bucket_number
-        = options.to_stage == QueryProcessingStage::WithMergeableState && settings.distributed_aggregation_memory_efficient;
+    const bool should_produce_results_in_order_of_bucket_number = options.to_stage == QueryProcessingStage::WithMergeableState
+        && (settings.distributed_aggregation_memory_efficient || settings.enable_memory_bound_merging_of_aggregation_results);
 
     auto aggregating_step = std::make_unique<AggregatingStep>(
         query_plan.getCurrentDataStream(),
@@ -2493,7 +2527,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
         settings.group_by_use_nulls,
         std::move(group_by_info),
         std::move(group_by_sort_description),
-        should_produce_results_in_order_of_bucket_number);
+        should_produce_results_in_order_of_bucket_number,
+        settings.enable_memory_bound_merging_of_aggregation_results);
     query_plan.addStep(std::move(aggregating_step));
 }
 
@@ -2506,8 +2541,14 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
     if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
         return;
 
+    const Settings & settings = context->getSettingsRef();
+
+    /// Used to determine if we should use memory bound merging strategy.
+    auto group_by_sort_description
+        = !query_analyzer->useGroupingSetKey() ? getSortDescriptionFromGroupBy(getSelectQuery()) : SortDescription{};
+
     const bool should_produce_results_in_order_of_bucket_number = options.to_stage == QueryProcessingStage::WithMergeableState
-        && context->getSettingsRef().distributed_aggregation_memory_efficient;
+        && (settings.distributed_aggregation_memory_efficient || settings.enable_memory_bound_merging_of_aggregation_results);
 
     executeMergeAggregatedImpl(
         query_plan,
@@ -2518,7 +2559,8 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
         context->getSettingsRef(),
         query_analyzer->aggregationKeys(),
         query_analyzer->aggregates(),
-        should_produce_results_in_order_of_bucket_number);
+        should_produce_results_in_order_of_bucket_number,
+        std::move(group_by_sort_description));
 }
 
 
@@ -2749,13 +2791,18 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before
     {
         const Settings & settings = context->getSettingsRef();
 
-        auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
         UInt64 limit_for_distinct = 0;
 
-        /// If after this stage of DISTINCT ORDER BY is not executed,
+        /// If after this stage of DISTINCT,
+        /// (1) ORDER BY is not executed
+        /// (2) there is no LIMIT BY (todo: we can check if DISTINCT and LIMIT BY expressions are match)
         /// then you can get no more than limit_length + limit_offset of different rows.
-        if ((!query.orderBy() || !before_order) && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
-            limit_for_distinct = limit_length + limit_offset;
+        if ((!query.orderBy() || !before_order) && !query.limitBy())
+        {
+            auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
+            if (limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
+                limit_for_distinct = limit_length + limit_offset;
+        }
 
         SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
 
diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp
index 6ae51c1ff23..eff31b168bd 100644
--- a/src/Interpreters/InterpreterShowCreateQuery.cpp
+++ b/src/Interpreters/InterpreterShowCreateQuery.cpp
@@ -92,9 +92,7 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl()
         create.to_inner_uuid = UUIDHelpers::Nil;
     }
 
-    WriteBufferFromOwnString buf;
-    formatAST(*create_query, buf, false, false);
-    String res = buf.str();
+    String res = create_query->formatWithSecretsHidden(/* max_length= */ 0, /* one_line= */ false);
 
     MutableColumnPtr column = ColumnString::create();
     column->insert(res);
diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
index 5879c96f7b3..10b122364f9 100644
--- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
+++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
@@ -560,11 +560,11 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
                     original_long_name = ident->name();
 
                 size_t count = countTablesWithColumn(tables, short_name);
+                const auto & table = tables[*table_pos];
 
                 /// isValidIdentifierBegin retuired to be consistent with TableJoin::deduplicateAndQualifyColumnNames
                 if (count > 1 || aliases.contains(short_name) || !isValidIdentifierBegin(short_name.at(0)))
                 {
-                    const auto & table = tables[*table_pos];
                     IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column
                     const auto & unique_long_name = ident->name();
 
@@ -578,6 +578,13 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
                 }
                 else
                 {
+                    if (!table.hasColumn(short_name))
+                    {
+                        throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
+                                        "There's no column '{}' in table '{}'",
+                                        ident->name(),
+                                        table.table.getQualifiedNamePrefix(false));
+                    }
                     ident->setShortName(short_name); /// table.column -> column
                     needed_columns[*table_pos].no_clashes.emplace(short_name);
                 }
diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
index 4ed22b34e26..6989940323c 100644
--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
@@ -6,6 +6,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTColumnDeclaration.h>
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/MySQL/ASTCreateQuery.h>
@@ -543,15 +544,29 @@ void InterpreterDropImpl::validate(const InterpreterDropImpl::TQuery & /*query*/
 ASTs InterpreterDropImpl::getRewrittenQueries(
     const InterpreterDropImpl::TQuery & drop_query, ContextPtr context, const String & mapped_to_database, const String & mysql_database)
 {
-    const auto & database_name = resolveDatabase(drop_query.getDatabase(), mysql_database, mapped_to_database, context);
-
-    /// Skip drop database|view|dictionary
-    if (database_name != mapped_to_database || !drop_query.table || drop_query.is_view || drop_query.is_dictionary)
+    /// Skip drop database|view|dictionary|others
+    if (drop_query.kind != TQuery::Kind::Table)
         return {};
-
-    ASTPtr rewritten_query = drop_query.clone();
-    rewritten_query->as<ASTDropQuery>()->setDatabase(mapped_to_database);
-    return ASTs{rewritten_query};
+    TQuery::QualifiedNames tables = drop_query.names;
+    ASTs rewritten_querys;
+    for (const auto & table: tables)
+    {
+        const auto & database_name = resolveDatabase(table.schema, mysql_database, mapped_to_database, context);
+        if (database_name != mapped_to_database)
+            continue;
+        auto rewritten_query = std::make_shared<ASTDropQuery>();
+        rewritten_query->setTable(table.shortName);
+        rewritten_query->setDatabase(mapped_to_database);
+        if (drop_query.is_truncate)
+            rewritten_query->kind = ASTDropQuery::Kind::Truncate;
+        else
+            rewritten_query->kind = ASTDropQuery::Kind::Drop;
+        rewritten_query->is_view = false;
+        //To avoid failure, we always set exists
+        rewritten_query->if_exists = true;
+        rewritten_querys.push_back(rewritten_query);
+    }
+    return rewritten_querys;
 }
 
 void InterpreterRenameImpl::validate(const InterpreterRenameImpl::TQuery & rename_query, ContextPtr /*context*/)
diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h
index 1ffaacc7dcc..824024e020d 100644
--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h
@@ -2,11 +2,11 @@
 
 #include <Interpreters/IInterpreter.h>
 #include <Interpreters/executeQuery.h>
-#include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTRenameQuery.h>
 #include <Parsers/IAST_fwd.h>
 #include <Parsers/MySQL/ASTAlterQuery.h>
 #include <Parsers/MySQL/ASTCreateQuery.h>
+#include <Parsers/MySQL/ASTDropQuery.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTExpressionList.h>
 
@@ -17,7 +17,7 @@ namespace MySQLInterpreter
 {
     struct InterpreterDropImpl
     {
-        using TQuery = ASTDropQuery;
+        using TQuery = MySQLParser::ASTDropQuery;
 
         static void validate(const TQuery & query, ContextPtr context);
 
diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index b35ee50b98e..4a1349680fd 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -6,6 +6,7 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Interpreters/PartLog.h>
@@ -100,6 +101,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes()
 
         {"database", std::make_shared<DataTypeString>()},
         {"table", std::make_shared<DataTypeString>()},
+        {"table_uuid", std::make_shared<DataTypeUUID>()},
         {"part_name", std::make_shared<DataTypeString>()},
         {"partition_id", std::make_shared<DataTypeString>()},
         {"part_type", std::make_shared<DataTypeString>()},
@@ -137,6 +139,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
 
     columns[i++]->insert(database_name);
     columns[i++]->insert(table_name);
+    columns[i++]->insert(table_uuid);
     columns[i++]->insert(part_name);
     columns[i++]->insert(partition_id);
     columns[i++]->insert(part_type.toString());
@@ -205,6 +208,7 @@ bool PartLog::addNewParts(
 
             elem.database_name = table_id.database_name;
             elem.table_name = table_id.table_name;
+            elem.table_uuid = table_id.uuid;
             elem.partition_id = part->info.partition_id;
             elem.part_name = part->name;
             elem.disk_name = part->getDataPartStorage().getDiskName();
diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h
index 2ce0dfd76de..392e76d85d1 100644
--- a/src/Interpreters/PartLog.h
+++ b/src/Interpreters/PartLog.h
@@ -4,6 +4,7 @@
 #include <Interpreters/SystemLog.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
+#include <Core/UUID.h>
 #include <Storages/MergeTree/MergeType.h>
 #include <Storages/MergeTree/MergeAlgorithm.h>
 
@@ -55,6 +56,7 @@ struct PartLogElement
 
     String database_name;
     String table_name;
+    UUID table_uuid{UUIDHelpers::Nil};
     String part_name;
     String partition_id;
     String disk_name;
diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp
index 7639dec813d..b6f120edc6c 100644
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@@ -117,6 +117,8 @@ public:
             if (!thread.joinable())
                 thread = ThreadFromGlobalPool{&NamedSessionsStorage::cleanThread, this};
 
+            LOG_TRACE(log, "Create new session with session_id: {}, user_id: {}", key.second, key.first);
+
             return {session, true};
         }
         else
@@ -124,6 +126,8 @@ public:
             /// Use existing session.
             const auto & session = it->second;
 
+            LOG_TEST(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first);
+
             if (!session.unique())
                 throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED);
             return {session, false};
@@ -173,6 +177,10 @@ private:
                 close_times.resize(close_index + 1);
             close_times[close_index].emplace_back(session.key);
         }
+
+        LOG_TEST(log, "Schedule closing session with session_id: {}, user_id: {}",
+                 session.key.second, session.key.first);
+
     }
 
     void cleanThread()
@@ -214,12 +222,17 @@ private:
             {
                 if (!session->second.unique())
                 {
+                    LOG_TEST(log, "Delay closing session with session_id: {}, user_id: {}", key.second, key.first);
+
                     /// Skip but move it to close on the next cycle.
                     session->second->timeout = std::chrono::steady_clock::duration{0};
                     scheduleCloseSession(*session->second, lock);
                 }
                 else
+                {
+                    LOG_TRACE(log, "Close session with session_id: {}, user_id: {}", key.second, key.first);
                     sessions.erase(session);
+                }
             }
         }
 
@@ -231,6 +244,8 @@ private:
     std::condition_variable cond;
     ThreadFromGlobalPool thread;
     bool quit = false;
+
+    Poco::Logger * log = &Poco::Logger::get("NamedSessionsStorage");
 };
 
 
@@ -257,11 +272,6 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter
 
 Session::~Session()
 {
-    LOG_DEBUG(log, "{} Destroying {}",
-        toString(auth_id),
-        (named_session ? "named session '" + named_session->key.second + "'" : "unnamed session")
-    );
-
     /// Early release a NamedSessionData.
     if (named_session)
         named_session->release();
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index b5e4a0e81c7..2cd177edd40 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -75,7 +75,7 @@ namespace
         const char * getName() const override { return "storage definition with comment"; }
         bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
         {
-            ParserStorage storage_p;
+            ParserStorage storage_p{ParserStorage::TABLE_ENGINE};
             ASTPtr storage;
 
             if (!storage_p.parse(pos, storage, expected))
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index ee126f2da11..ad7884ade55 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -131,6 +131,12 @@ void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
     thread_state = ThreadState::AttachedToQuery;
 }
 
+void ThreadStatus::setInternalThread()
+{
+    chassert(!query_profiler_real && !query_profiler_cpu);
+    internal_thread = true;
+}
+
 void ThreadStatus::initializeQuery()
 {
     setupState(std::make_shared<ThreadGroupStatus>());
@@ -177,41 +183,44 @@ void ThreadStatus::initPerformanceCounters()
     // query_start_time_nanoseconds cannot be used here since RUsageCounters expect CLOCK_MONOTONIC
     *last_rusage = RUsageCounters::current();
 
-    if (auto query_context_ptr = query_context.lock())
+    if (!internal_thread)
     {
-        const Settings & settings = query_context_ptr->getSettingsRef();
-        if (settings.metrics_perf_events_enabled)
+        if (auto query_context_ptr = query_context.lock())
+        {
+            const Settings & settings = query_context_ptr->getSettingsRef();
+            if (settings.metrics_perf_events_enabled)
+            {
+                try
+                {
+                    current_thread_counters.initializeProfileEvents(
+                        settings.metrics_perf_events_list);
+                }
+                catch (...)
+                {
+                    tryLogCurrentException(__PRETTY_FUNCTION__);
+                }
+            }
+        }
+
+        if (!taskstats)
         {
             try
             {
-                current_thread_counters.initializeProfileEvents(
-                    settings.metrics_perf_events_list);
+                taskstats = TasksStatsCounters::create(thread_id);
             }
             catch (...)
             {
-                tryLogCurrentException(__PRETTY_FUNCTION__);
+                tryLogCurrentException(log);
             }
         }
+        if (taskstats)
+            taskstats->reset();
     }
-
-    if (!taskstats)
-    {
-        try
-        {
-            taskstats = TasksStatsCounters::create(thread_id);
-        }
-        catch (...)
-        {
-            tryLogCurrentException(log);
-        }
-    }
-    if (taskstats)
-        taskstats->reset();
 }
 
 void ThreadStatus::finalizePerformanceCounters()
 {
-    if (performance_counters_finalized)
+    if (performance_counters_finalized || internal_thread)
         return;
 
     performance_counters_finalized = true;
@@ -270,7 +279,7 @@ void ThreadStatus::resetPerformanceCountersLastUsage()
 
 void ThreadStatus::initQueryProfiler()
 {
-    if (!query_profiler_enabled)
+    if (internal_thread)
         return;
 
     /// query profilers are useless without trace collector
diff --git a/src/Interpreters/TransactionVersionMetadata.cpp b/src/Interpreters/TransactionVersionMetadata.cpp
index 5f46b86508c..7671f512bdc 100644
--- a/src/Interpreters/TransactionVersionMetadata.cpp
+++ b/src/Interpreters/TransactionVersionMetadata.cpp
@@ -243,6 +243,9 @@ bool VersionMetadata::canBeRemoved()
     {
         /// Avoid access to Transaction log if transactions are not involved
 
+        if (creation_csn.load(std::memory_order_relaxed) == Tx::RolledBackCSN)
+            return true;
+
         TIDHash removal_lock = removal_tid_lock.load(std::memory_order_relaxed);
         if (!removal_lock)
             return false;
@@ -380,8 +383,9 @@ void VersionMetadata::read(ReadBuffer & buf)
 
         if (name == CREATION_CSN_STR)
         {
-            chassert(!creation_csn);
-            creation_csn = read_csn();
+            auto new_val = read_csn();
+            chassert(!creation_csn || (creation_csn == new_val && creation_csn == Tx::PrehistoricCSN));
+            creation_csn = new_val;
         }
         else if (name == REMOVAL_TID_STR)
         {
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 80b25f7352b..a07df23bcf1 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -1,8 +1,8 @@
 #include <algorithm>
 #include <memory>
+
 #include <Core/Settings.h>
 #include <Core/NamesAndTypes.h>
-
 #include <Core/SettingsEnums.h>
 
 #include <Interpreters/ArrayJoinedColumnsVisitor.h>
@@ -46,10 +46,10 @@
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypesNumber.h>
 
 #include <IO/WriteHelpers.h>
 #include <Storages/IStorage.h>
+#include <Common/checkStackSize.h>
 
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 
@@ -785,6 +785,67 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
     }
 }
 
+std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(const ASTPtr & expr, ASTExpressionList & into)
+{
+    checkStackSize();
+
+    if (expr->as<ASTIdentifier>())
+    {
+        into.children.push_back(expr);
+        return {false, 1};
+    }
+
+    auto * function = expr->as<ASTFunction>();
+
+    if (!function)
+        return {false, 0};
+
+    if (AggregateUtils::isAggregateFunction(*function))
+        return {true, 0};
+
+    UInt64 pushed_children = 0;
+    bool has_aggregate = false;
+
+    for (const auto & child : function->arguments->children)
+    {
+        auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into);
+        has_aggregate |= child_has_aggregate;
+        pushed_children += child_pushed_children;
+    }
+
+    /// The current function is not aggregate function and there is no aggregate function in its arguments,
+    /// so use the current function to replace its arguments
+    if (!has_aggregate)
+    {
+        for (UInt64 i = 0; i < pushed_children; i++)
+            into.children.pop_back();
+
+        into.children.push_back(expr);
+        pushed_children = 1;
+    }
+
+    return {has_aggregate, pushed_children};
+}
+
+/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions.
+  *
+  * For a special case that if there is a function having both aggregate functions and other fields as its arguments,
+  * the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
+  *
+  * Example:
+  * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL
+  * will expand as
+  * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2)
+  */
+void expandGroupByAll(ASTSelectQuery * select_query)
+{
+    auto group_expression_list = std::make_shared<ASTExpressionList>();
+
+    for (const auto & expr : select_query->select()->children)
+        recursivelyCollectMaxOrdinaryExpressions(expr, *group_expression_list);
+
+    select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
+}
 
 std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
 {
@@ -1279,6 +1340,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
 
     normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext(), select_options.is_create_parameterized_view);
 
+    // expand GROUP BY ALL
+    if (select_query->group_by_all)
+        expandGroupByAll(select_query);
+
     /// Remove unneeded columns according to 'required_result_columns'.
     /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
     /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
@@ -1432,10 +1497,7 @@ void TreeRewriter::normalize(
     ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view)
 {
     if (!UserDefinedSQLFunctionFactory::instance().empty())
-    {
-        UserDefinedSQLFunctionVisitor::Data data_user_defined_functions_visitor;
-        UserDefinedSQLFunctionVisitor(data_user_defined_functions_visitor).visit(query);
-    }
+        UserDefinedSQLFunctionVisitor::visit(query);
 
     CustomizeCountDistinctVisitor::Data data_count_distinct{settings.count_distinct_implementation};
     CustomizeCountDistinctVisitor(data_count_distinct).visit(query);
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 44723546006..d05453783c6 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -3,6 +3,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/ThreadProfileEvents.h>
 #include <Common/MemoryTrackerBlockerInThread.h>
+#include <Common/SensitiveDataMasker.h>
 
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <IO/WriteBufferFromFile.h>
@@ -55,7 +56,6 @@
 #include <Interpreters/SelectQueryOptions.h>
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/executeQuery.h>
-#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
 #include <Common/ProfileEvents.h>
 
 #include <IO/CompressionMethod.h>
@@ -352,6 +352,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     ASTPtr ast;
     String query;
     String query_for_logging;
+    size_t log_queries_cut_to_length = context->getSettingsRef().log_queries_cut_to_length;
 
     /// Parse the query from string.
     try
@@ -396,15 +397,23 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         /// MUST go before any modification (except for prepared statements,
         /// since it substitute parameters and without them query does not contain
         /// parameters), to keep query as-is in query_log and server log.
-        query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
+        if (ast->hasSecretParts())
+        {
+            /// IAST::formatForLogging() wipes secret parts in AST and then calls wipeSensitiveDataAndCutToLength().
+            query_for_logging = ast->formatForLogging(log_queries_cut_to_length);
+        }
+        else
+        {
+            query_for_logging = wipeSensitiveDataAndCutToLength(query, log_queries_cut_to_length);
+        }
     }
     catch (...)
     {
         /// Anyway log the query.
         if (query.empty())
             query.assign(begin, std::min(end - begin, static_cast<ptrdiff_t>(max_query_size)));
-        query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
 
+        query_for_logging = wipeSensitiveDataAndCutToLength(query, log_queries_cut_to_length);
         logQuery(query_for_logging, context, internal, stage);
 
         if (!internal)
@@ -587,13 +596,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                 quota->checkExceeded(QuotaType::ERRORS);
             }
 
-            queue->push(ast, context);
+            auto insert_future = queue->push(ast, context);
 
             if (settings.wait_for_async_insert)
             {
                 auto timeout = settings.wait_for_async_insert_timeout.totalMilliseconds();
-                auto query_id = context->getCurrentQueryId();
-                auto source = std::make_shared<WaitForAsyncInsertSource>(query_id, timeout, *queue);
+                auto source = std::make_shared<WaitForAsyncInsertSource>(std::move(insert_future), timeout);
                 res.pipeline = QueryPipeline(Pipe(std::move(source)));
             }
 
diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp
index f843c4880df..30db25668cf 100644
--- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp
+++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp
@@ -33,7 +33,7 @@ try
         registerTableFunctions();
         registerStorages();
         registerDictionaries();
-        registerDisks();
+        registerDisks(/* global_skip_access_check= */ true);
         registerFormats();
 
         return true;
diff --git a/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp b/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp
deleted file mode 100644
index fe05283eef5..00000000000
--- a/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp
+++ /dev/null
@@ -1,623 +0,0 @@
-#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
-
-#include <Formats/FormatFactory.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/InDepthNodeVisitor.h>
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Parsers/ASTBackupQuery.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/Access/ASTCreateUserQuery.h>
-#include <Parsers/formatAST.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <Common/ProfileEvents.h>
-#include <Common/SensitiveDataMasker.h>
-#include <Common/typeid_cast.h>
-
-
-namespace ProfileEvents
-{
-    extern const Event QueryMaskingRulesMatch;
-}
-
-
-namespace DB
-{
-
-namespace
-{
-    enum class PasswordWipingMode
-    {
-        Query,
-        BackupName,
-    };
-
-
-    template <bool check_only>
-    class PasswordWipingVisitor
-    {
-    public:
-        struct Data
-        {
-            bool can_contain_password = false;
-            bool password_was_hidden = false;
-            bool is_create_table_query = false;
-            bool is_create_database_query = false;
-            bool is_create_dictionary_query = false;
-            ContextPtr context;
-            PasswordWipingMode mode = PasswordWipingMode::Query;
-        };
-
-        using Visitor = std::conditional_t<
-            check_only,
-            ConstInDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>,
-            InDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>>;
-
-        static bool needChildVisit(const ASTPtr & /* ast */, const ASTPtr & /* child */, Data & data)
-        {
-            if constexpr (check_only)
-            {
-                return !data.can_contain_password;
-            }
-            else
-            {
-                return true;
-            }
-        }
-
-        static void visit(ASTPtr ast, Data & data)
-        {
-            if (auto * create_user_query = ast->as<ASTCreateUserQuery>())
-            {
-                visitCreateUserQuery(*create_user_query, data);
-            }
-            else if (auto * create_query = ast->as<ASTCreateQuery>())
-            {
-                visitCreateQuery(*create_query, data);
-            }
-            else if (auto * backup_query = ast->as<ASTBackupQuery>())
-            {
-                visitBackupQuery(*backup_query, data);
-            }
-            else if (auto * storage = ast->as<ASTStorage>())
-            {
-                if (data.is_create_table_query)
-                    visitTableEngine(*storage, data);
-                else if (data.is_create_database_query)
-                    visitDatabaseEngine(*storage, data);
-            }
-            else if (auto * dictionary = ast->as<ASTDictionary>())
-            {
-                if (data.is_create_dictionary_query)
-                    visitDictionaryDef(*dictionary, data);
-            }
-            else if (auto * function = ast->as<ASTFunction>())
-            {
-                if (data.mode == PasswordWipingMode::BackupName)
-                    wipePasswordFromBackupEngineArguments(*function, data);
-                else
-                    visitFunction(*function, data);
-            }
-        }
-
-    private:
-        static void visitCreateUserQuery(ASTCreateUserQuery & query, Data & data)
-        {
-            if (!query.auth_data)
-                return;
-
-            auto auth_type = query.auth_data->getType();
-            if (auth_type == AuthenticationType::NO_PASSWORD || auth_type == AuthenticationType::LDAP
-                || auth_type == AuthenticationType::KERBEROS || auth_type == AuthenticationType::SSL_CERTIFICATE)
-                return; /// No password, nothing to hide.
-
-            if constexpr (check_only)
-            {
-                data.can_contain_password = true;
-                return;
-            }
-
-            query.show_password = false;
-            data.password_was_hidden = true;
-        }
-
-        static void visitCreateQuery(ASTCreateQuery & query, Data & data)
-        {
-            if (query.is_dictionary)
-                data.is_create_dictionary_query = true;
-            else if (query.table)
-                data.is_create_table_query = true;
-            else if (query.database)
-                data.is_create_database_query = true;
-        }
-
-        static void visitTableEngine(ASTStorage & storage, Data & data)
-        {
-            if (!storage.engine)
-                return;
-
-            const String & engine_name = storage.engine->name;
-
-            if (engine_name == "ExternalDistributed")
-            {
-                /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
-                wipePasswordFromArgument(*storage.engine, data, 5);
-            }
-            else if (engine_name == "MySQL")
-            {
-                /// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
-                wipePasswordFromArgument(*storage.engine, data, 4);
-            }
-            else if (engine_name == "PostgreSQL")
-            {
-                /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
-                wipePasswordFromArgument(*storage.engine, data, 4);
-            }
-            else if (engine_name == "MaterializedPostgreSQL")
-            {
-                /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
-                wipePasswordFromArgument(*storage.engine, data, 4);
-            }
-            else if (engine_name == "MongoDB")
-            {
-                /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
-                wipePasswordFromArgument(*storage.engine, data, 4);
-            }
-            else if (engine_name == "S3" || engine_name == "COSN" || engine_name == "OSS")
-            {
-                /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
-                wipePasswordFromS3TableEngineArguments(*storage.engine, data);
-            }
-        }
-
-        static void wipePasswordFromS3TableEngineArguments(ASTFunction & engine, Data & data)
-        {
-            /// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
-            /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
-            /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
-
-            /// But we should check the number of arguments first because we don't need to do that replacements in case of
-            /// S3('url' [, 'format' [, 'compression']])
-            size_t num_arguments;
-            if (!tryGetNumArguments(engine, &num_arguments) || (num_arguments < 4))
-                return;
-
-            wipePasswordFromArgument(engine, data, 2);
-        }
-
-        static void visitDatabaseEngine(ASTStorage & storage, Data & data)
-        {
-            if (!storage.engine)
-                return;
-
-            const String & engine_name = storage.engine->name;
-
-            if (engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL")
-            {
-                /// MySQL('host:port', 'database', 'user', 'password')
-                wipePasswordFromArgument(*storage.engine, data, 3);
-            }
-            else if (engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL")
-            {
-                /// PostgreSQL('host:port', 'database', 'user', 'password', ...)
-                wipePasswordFromArgument(*storage.engine, data, 3);
-            }
-        }
-
-        static void visitFunction(ASTFunction & function, Data & data)
-        {
-            if (function.name == "mysql")
-            {
-                /// mysql('host:port', 'database', 'table', 'user', 'password', ...)
-                wipePasswordFromArgument(function, data, 4);
-            }
-            else if (function.name == "postgresql")
-            {
-                /// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
-                wipePasswordFromArgument(function, data, 4);
-            }
-            else if (function.name == "mongodb")
-            {
-                /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
-                wipePasswordFromArgument(function, data, 4);
-            }
-            else if (function.name == "s3" || function.name == "cosn" || function.name == "oss")
-            {
-                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
-                wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ false);
-            }
-            else if (function.name == "s3Cluster")
-            {
-                /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
-                wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ true);
-            }
-            else if (function.name == "remote" || function.name == "remoteSecure")
-            {
-                /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
-                wipePasswordFromRemoteFunctionArguments(function, data);
-            }
-            else if (
-                function.name == "encrypt" || function.name == "decrypt" || function.name == "aes_encrypt_mysql"
-                || function.name == "aes_decrypt_mysql" || function.name == "tryDecrypt")
-            {
-                /// encrypt('mode', 'plaintext', 'key' [, iv, aad])
-                wipePasswordFromEncryptionFunctionArguments(function, data);
-            }
-        }
-
-        static void wipePasswordFromS3FunctionArguments(ASTFunction & function, Data & data, bool is_cluster_function)
-        {
-            /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
-            size_t url_arg_idx = is_cluster_function ? 1 : 0;
-
-            /// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
-            /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
-            /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
-
-            /// But we should check the number of arguments first because we don't need to do any replacements in case of
-            /// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format'])
-            size_t num_arguments;
-            if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < url_arg_idx + 3))
-                return;
-
-            if (num_arguments >= url_arg_idx + 5)
-            {
-                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...)
-                wipePasswordFromArgument(function, data, url_arg_idx + 2);
-            }
-            else
-            {
-                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
-                /// We need to distinguish that from s3('url', 'format', 'structure' [, 'compression_method']).
-                /// So we will check whether the argument after 'url' is a format.
-                String format;
-                if (!tryGetEvaluatedConstStringFromArgument(function, data, url_arg_idx + 1, &format))
-                    return;
-
-                if (FormatFactory::instance().getAllFormats().contains(format))
-                    return; /// The argument after 'url' is a format: s3('url', 'format', ...)
-
-                /// The argument after 'url' is not a format so we do our replacement:
-                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...)
-                wipePasswordFromArgument(function, data, url_arg_idx + 2);
-            }
-        }
-
-        static void wipePasswordFromRemoteFunctionArguments(ASTFunction & function, Data & data)
-        {
-            /// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
-            /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
-            /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
-            /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
-
-            /// But we should check the number of arguments first because we don't need to do any replacements in case of
-            /// remote('addresses_expr', db.table)
-            size_t num_arguments;
-            if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < 3))
-                return;
-
-            auto & arguments = assert_cast<ASTExpressionList &>(*function.arguments).children;
-            size_t arg_num = 1;
-
-            /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
-            const auto * table_function = arguments[arg_num]->as<ASTFunction>();
-            if (table_function && TableFunctionFactory::instance().isTableFunctionName(table_function->name))
-            {
-                ++arg_num;
-            }
-            else
-            {
-                String database;
-                if (!tryGetEvaluatedConstDatabaseNameFromArgument(function, data, arg_num, &database))
-                    return;
-                ++arg_num;
-
-                auto qualified_name = QualifiedTableName::parseFromString(database);
-                if (qualified_name.database.empty())
-                    ++arg_num; /// skip 'table' argument
-            }
-
-            /// Check if username and password are specified
-            /// (sharding_key can be of any type so while we're getting string literals they're username & password).
-            String username, password;
-            bool username_specified = tryGetStringFromArgument(function, arg_num, &username);
-            bool password_specified = username_specified && tryGetStringFromArgument(function, arg_num + 1, &password);
-
-            if (password_specified)
-            {
-                /// Password is specified so we do our replacement:
-                /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
-                wipePasswordFromArgument(function, data, arg_num + 1);
-            }
-        }
-
-        static void wipePasswordFromEncryptionFunctionArguments(ASTFunction & function, Data & data)
-        {
-            /// We replace all arguments after 'mode' with '[HIDDEN]':
-            /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
-            wipePasswordFromArgument(function, data, 1);
-            removeArgumentsAfter(function, data, 2);
-        }
-
-        static void visitBackupQuery(ASTBackupQuery & query, Data & data)
-        {
-            if (query.backup_name)
-            {
-                if (auto * backup_engine = query.backup_name->as<ASTFunction>())
-                    wipePasswordFromBackupEngineArguments(*backup_engine, data);
-            }
-
-            if (query.base_backup_name)
-            {
-                if (auto * base_backup_engine = query.base_backup_name->as<ASTFunction>())
-                    wipePasswordFromBackupEngineArguments(*base_backup_engine, data);
-            }
-        }
-
-        static void wipePasswordFromBackupEngineArguments(ASTFunction & engine, Data & data)
-        {
-            if (engine.name == "S3")
-            {
-                /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
-                wipePasswordFromArgument(engine, data, 2);
-            }
-        }
-
-        static void wipePasswordFromArgument(ASTFunction & function, Data & data, size_t arg_idx)
-        {
-            if (!function.arguments)
-                return;
-
-            auto * expr_list = function.arguments->as<ASTExpressionList>();
-            if (!expr_list)
-                return; /// return because we don't want to validate query here
-
-            auto & arguments = expr_list->children;
-            if (arg_idx >= arguments.size())
-                return;
-
-            if constexpr (check_only)
-            {
-                data.can_contain_password = true;
-                return;
-            }
-
-            arguments[arg_idx] = std::make_shared<ASTLiteral>("[HIDDEN]");
-            data.password_was_hidden = true;
-        }
-
-        static void removeArgumentsAfter(ASTFunction & function, Data & data, size_t new_num_arguments)
-        {
-            if (!function.arguments)
-                return;
-
-            auto * expr_list = function.arguments->as<ASTExpressionList>();
-            if (!expr_list)
-                return; /// return because we don't want to validate query here
-
-            auto & arguments = expr_list->children;
-            if (new_num_arguments >= arguments.size())
-                return;
-
-            if constexpr (check_only)
-            {
-                data.can_contain_password = true;
-                return;
-            }
-
-            arguments.resize(new_num_arguments);
-            data.password_was_hidden = true;
-        }
-
-        static bool tryGetNumArguments(const ASTFunction & function, size_t * num_arguments)
-        {
-            if (!function.arguments)
-                return false;
-
-            auto * expr_list = function.arguments->as<ASTExpressionList>();
-            if (!expr_list)
-                return false; /// return false because we don't want to validate query here
-
-            const auto & arguments = expr_list->children;
-            *num_arguments = arguments.size();
-            return true;
-        }
-
-        static bool tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, String * value)
-        {
-            if (!function.arguments)
-                return false;
-
-            const auto * expr_list = function.arguments->as<ASTExpressionList>();
-            if (!expr_list)
-                return false; /// return false because we don't want to validate query here
-
-            const auto & arguments = expr_list->children;
-            if (arg_idx >= arguments.size())
-                return false;
-
-            const auto * literal = arguments[arg_idx]->as<ASTLiteral>();
-            if (!literal || literal->value.getType() != Field::Types::String)
-                return false;
-
-            *value = literal->value.safeGet<String>();
-            return true;
-        }
-
-        static bool tryGetEvaluatedConstStringFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
-        {
-            if (!function.arguments)
-                return false;
-
-            const auto * expr_list = function.arguments->as<ASTExpressionList>();
-            if (!expr_list)
-                return false; /// return false because we don't want to validate query here
-
-            const auto & arguments = expr_list->children;
-            if (arg_idx >= arguments.size())
-                return false;
-
-            if constexpr (check_only)
-            {
-                data.can_contain_password = true;
-                return false;
-            }
-
-            ASTPtr argument = arguments[arg_idx];
-            try
-            {
-                argument = evaluateConstantExpressionOrIdentifierAsLiteral(argument, data.context);
-            }
-            catch (...)
-            {
-                return false;
-            }
-
-            const auto & literal = assert_cast<const ASTLiteral &>(*argument);
-            if (literal.value.getType() != Field::Types::String)
-                return false;
-
-            *value = literal.value.safeGet<String>();
-            return true;
-        }
-
-        static bool tryGetEvaluatedConstDatabaseNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
-        {
-            if (!function.arguments)
-                return false;
-
-            const auto * expr_list = function.arguments->as<ASTExpressionList>();
-            if (!expr_list)
-                return false; /// return false because we don't want to validate query here
-
-            const auto & arguments = expr_list->children;
-            if (arg_idx >= arguments.size())
-                return false;
-
-            if constexpr (check_only)
-            {
-                data.can_contain_password = true;
-                return false;
-            }
-
-            ASTPtr argument = arguments[arg_idx];
-            try
-            {
-                argument = evaluateConstantExpressionForDatabaseName(argument, data.context);
-            }
-            catch (...)
-            {
-                return false;
-            }
-
-            const auto & literal = assert_cast<const ASTLiteral &>(*argument);
-            if (literal.value.getType() != Field::Types::String)
-                return false;
-
-            *value = literal.value.safeGet<String>();
-            return true;
-        }
-
-        static void visitDictionaryDef(ASTDictionary & dictionary, Data & data)
-        {
-            if (!dictionary.source || !dictionary.source->elements)
-                return;
-
-            const auto * expr_list = dictionary.source->elements->as<ASTExpressionList>();
-            if (!expr_list)
-                return; /// return because we don't want to validate query here
-
-            const auto & elements = expr_list->children;
-
-            /// We replace password in the dictionary's definition:
-            /// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password 'qwe123' db 'default' table 'ids')) ->
-            /// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password '[HIDDEN]' db 'default' table 'ids'))
-            for (const auto & element : elements)
-            {
-                auto * pair = element->as<ASTPair>();
-                if (!pair)
-                    continue; /// just skip because we don't want to validate query here
-
-                if (pair->first == "password")
-                {
-                    if constexpr (check_only)
-                    {
-                        data.can_contain_password = true;
-                        return;
-                    }
-                    pair->set(pair->second, std::make_shared<ASTLiteral>("[HIDDEN]"));
-                    data.password_was_hidden = true;
-                }
-            }
-        }
-    };
-
-    /// Checks the type of a specified AST and returns true if it can contain a password.
-    bool canContainPassword(const IAST & ast, PasswordWipingMode mode)
-    {
-        using WipingVisitor = PasswordWipingVisitor</*check_only= */ true>;
-        WipingVisitor::Data data;
-        data.mode = mode;
-        WipingVisitor::Visitor visitor{data};
-        ASTPtr ast_ptr = std::const_pointer_cast<IAST>(ast.shared_from_this());
-        visitor.visit(ast_ptr);
-        return data.can_contain_password;
-    }
-
-    /// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
-    /// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
-    bool wipePasswordFromQuery(ASTPtr ast, PasswordWipingMode mode, const ContextPtr & context)
-    {
-        using WipingVisitor = PasswordWipingVisitor</*check_only= */ false>;
-        WipingVisitor::Data data;
-        data.context = context;
-        data.mode = mode;
-        WipingVisitor::Visitor visitor{data};
-        visitor.visit(ast);
-        return data.password_was_hidden;
-    }
-
-    /// Common utility for masking sensitive information.
-    String maskSensitiveInfoImpl(const String & query, const ASTPtr & parsed_query, PasswordWipingMode mode, const ContextPtr & context)
-    {
-        String res = query;
-
-        // Wiping a password or hash from the query because we don't want it to go to logs.
-        if (parsed_query && canContainPassword(*parsed_query, mode))
-        {
-            ASTPtr ast_without_password = parsed_query->clone();
-            if (wipePasswordFromQuery(ast_without_password, mode, context))
-                res = serializeAST(*ast_without_password);
-        }
-
-        // Wiping sensitive data before cropping query by log_queries_cut_to_length,
-        // otherwise something like credit card without last digit can go to log.
-        if (auto * masker = SensitiveDataMasker::getInstance())
-        {
-            auto matches = masker->wipeSensitiveData(res);
-            if (matches > 0)
-            {
-                ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
-            }
-        }
-
-        res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
-
-        return res;
-    }
-}
-
-
-String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context)
-{
-    return maskSensitiveInfoImpl(query, parsed_query, PasswordWipingMode::Query, context);
-}
-
-
-String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context)
-{
-    return maskSensitiveInfoImpl(backup_name, ast, PasswordWipingMode::BackupName, context);
-}
-
-}
diff --git a/src/Interpreters/maskSensitiveInfoInQueryForLogging.h b/src/Interpreters/maskSensitiveInfoInQueryForLogging.h
deleted file mode 100644
index 3892f89bc52..00000000000
--- a/src/Interpreters/maskSensitiveInfoInQueryForLogging.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#include <Parsers/IAST_fwd.h>
-#include <Interpreters/Context_fwd.h>
-
-
-namespace DB
-{
-
-/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
-/// The parameter `parsed query` is allowed to be nullptr if the query cannot be parsed.
-/// Does not validate AST, works a best-effort way.
-String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context);
-
-/// Makes a version of backup name without sensitive information (e.g. passwords) for logging.
-/// Does not validate AST, works a best-effort way.
-String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context);
-
-}
diff --git a/src/Parsers/ASTBackupQuery.cpp b/src/Parsers/ASTBackupQuery.cpp
index 567b52b5669..85a0a0c7799 100644
--- a/src/Parsers/ASTBackupQuery.cpp
+++ b/src/Parsers/ASTBackupQuery.cpp
@@ -1,4 +1,5 @@
 #include <Parsers/ASTBackupQuery.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSetQuery.h>
 #include <IO/Operators.h>
 #include <Common/assert_cast.h>
@@ -141,7 +142,7 @@ namespace
         }
     }
 
-    void formatSettings(const ASTPtr & settings, const ASTPtr & base_backup_name, const ASTPtr & cluster_host_ids, const IAST::FormatSettings & format)
+    void formatSettings(const ASTPtr & settings, const ASTFunction * base_backup_name, const ASTPtr & cluster_host_ids, const IAST::FormatSettings & format)
     {
         if (!settings && !base_backup_name && !cluster_host_ids)
             return;
@@ -246,12 +247,13 @@ String ASTBackupQuery::getID(char) const
 ASTPtr ASTBackupQuery::clone() const
 {
     auto res = std::make_shared<ASTBackupQuery>(*this);
+    res->children.clear();
 
     if (backup_name)
-        res->backup_name = backup_name->clone();
+        res->set(res->backup_name, backup_name->clone());
 
     if (base_backup_name)
-        res->base_backup_name = base_backup_name->clone();
+        res->set(res->base_backup_name, base_backup_name->clone());
 
     if (cluster_host_ids)
         res->cluster_host_ids = cluster_host_ids->clone();
diff --git a/src/Parsers/ASTBackupQuery.h b/src/Parsers/ASTBackupQuery.h
index dd094b77aae..708f5bf468f 100644
--- a/src/Parsers/ASTBackupQuery.h
+++ b/src/Parsers/ASTBackupQuery.h
@@ -8,6 +8,7 @@ namespace DB
 {
 using Strings = std::vector<String>;
 using DatabaseAndTableName = std::pair<String, String>;
+class ASTFunction;
 
 
 /** BACKUP { TABLE [db.]table_name [AS [db.]table_name_in_backup] [PARTITION[S] partition_expr [,...]] |
@@ -77,13 +78,13 @@ public:
 
     Elements elements;
 
-    ASTPtr backup_name;
+    ASTFunction * backup_name = nullptr;
 
     ASTPtr settings;
 
     /// Base backup. Only differences made after the base backup will be included in a newly created backup,
     /// so this setting allows to make an incremental backup.
-    ASTPtr base_backup_name;
+    ASTFunction * base_backup_name = nullptr;
 
     /// List of cluster's hosts' IDs if this is a BACKUP/RESTORE ON CLUSTER command.
     ASTPtr cluster_host_ids;
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 53c40089924..e1b12a49f29 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -4,16 +4,21 @@
 
 #include <Common/quoteString.h>
 #include <Common/FieldVisitorToString.h>
+#include <Common/KnownObjectNames.h>
 #include <Common/SipHash.h>
 #include <Common/typeid_cast.h>
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTSetQuery.h>
+#include <Core/QualifiedTableName.h>
+
 
 using namespace std::literals;
 
@@ -27,6 +32,338 @@ namespace ErrorCodes
     extern const int UNEXPECTED_AST_STRUCTURE;
 }
 
+
+namespace
+{
+    /// Finds arguments of a specified function which should not be displayed for most users for security reasons.
+    /// That involves passwords and secret keys.
+    /// The member function getRange() returns a pair of numbers [first, last) specifying arguments
+    /// which must be hidden. If the function returns {-1, -1} that means no arguments must be hidden.
+    class FunctionSecretArgumentsFinder
+    {
+    public:
+        explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_)
+        {
+            if (function.arguments)
+            {
+                if (const auto * expr_list = function.arguments->as<ASTExpressionList>())
+                    arguments = &expr_list->children;
+            }
+        }
+
+        std::pair<size_t, size_t> getRange() const
+        {
+            if (!arguments)
+                return npos;
+
+            switch (function.kind)
+            {
+                case ASTFunction::Kind::ORDINARY_FUNCTION: return findOrdinaryFunctionSecretArguments();
+                case ASTFunction::Kind::WINDOW_FUNCTION: return npos;
+                case ASTFunction::Kind::LAMBDA_FUNCTION: return npos;
+                case ASTFunction::Kind::TABLE_ENGINE: return findTableEngineSecretArguments();
+                case ASTFunction::Kind::DATABASE_ENGINE: return findDatabaseEngineSecretArguments();
+                case ASTFunction::Kind::BACKUP_NAME: return findBackupNameSecretArguments();
+            }
+        }
+
+        static const constexpr std::pair<size_t, size_t> npos{static_cast<size_t>(-1), static_cast<size_t>(-1)};
+
+    private:
+        std::pair<size_t, size_t> findOrdinaryFunctionSecretArguments() const
+        {
+            if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
+            {
+                /// mysql('host:port', 'database', 'table', 'user', 'password', ...)
+                /// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
+                /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
+                return {4, 5};
+            }
+            else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss"))
+            {
+                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+                return findS3FunctionSecretArguments(/* is_cluster_function= */ false);
+            }
+            else if (function.name == "s3Cluster")
+            {
+                /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+                return findS3FunctionSecretArguments(/* is_cluster_function= */ true);
+            }
+            else if ((function.name == "remote") || (function.name == "remoteSecure"))
+            {
+                /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
+                return findRemoteFunctionSecretArguments();
+            }
+            else if ((function.name == "encrypt") || (function.name == "decrypt") ||
+                     (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
+                     (function.name == "tryDecrypt"))
+            {
+                /// encrypt('mode', 'plaintext', 'key' [, iv, aad])
+                return findEncryptionFunctionSecretArguments();
+            }
+            else
+            {
+                return npos;
+            }
+        }
+
+        std::pair<size_t, size_t> findS3FunctionSecretArguments(bool is_cluster_function) const
+        {
+            /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
+            size_t url_arg_idx = is_cluster_function ? 1 : 0;
+
+            /// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
+            /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+            /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
+
+            /// But we should check the number of arguments first because we don't need to do any replacements in case of
+            /// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format'])
+            if (arguments->size() < url_arg_idx + 3)
+                return npos;
+
+            if (arguments->size() >= url_arg_idx + 5)
+            {
+                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...)
+                return {url_arg_idx + 2, url_arg_idx + 3};
+            }
+            else
+            {
+                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+                /// We need to distinguish that from s3('url', 'format', 'structure' [, 'compression_method']).
+                /// So we will check whether the argument after 'url' is a format.
+                String format;
+                if (!tryGetStringFromArgument(url_arg_idx + 1, &format, /* allow_identifier= */ false))
+                {
+                    /// We couldn't evaluate the argument after 'url' so we don't know whether it is a format or `aws_access_key_id`.
+                    /// So it's safer to wipe the next argument just in case.
+                    return {url_arg_idx + 2, url_arg_idx + 3}; /// Wipe either `aws_secret_access_key` or `structure`.
+                }
+
+                if (KnownFormatNames::instance().exists(format))
+                    return npos; /// The argument after 'url' is a format: s3('url', 'format', ...)
+
+                /// The argument after 'url' is not a format so we do our replacement:
+                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...)
+                return {url_arg_idx + 2, url_arg_idx + 3};
+            }
+        }
+
+        bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
+        {
+            if (arg_idx >= arguments->size())
+                return false;
+
+            ASTPtr argument = (*arguments)[arg_idx];
+            if (const auto * literal = argument->as<ASTLiteral>())
+            {
+                if (literal->value.getType() != Field::Types::String)
+                    return false;
+                if (res)
+                    *res = literal->value.safeGet<String>();
+                return true;
+            }
+
+            if (allow_identifier)
+            {
+                if (const auto * id = argument->as<ASTIdentifier>())
+                {
+                    if (res)
+                        *res = id->name();
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        std::pair<size_t, size_t> findRemoteFunctionSecretArguments() const
+        {
+            /// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
+            /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
+            /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
+            /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
+
+            /// But we should check the number of arguments first because we don't need to do any replacements in case of
+            /// remote('addresses_expr', db.table)
+            if (arguments->size() < 3)
+                return npos;
+
+            size_t arg_num = 1;
+
+            /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
+            const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
+            if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
+            {
+                ++arg_num;
+            }
+            else
+            {
+                std::optional<String> database;
+                std::optional<QualifiedTableName> qualified_table_name;
+                if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
+                {
+                    /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
+                    /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
+                    /// before the argument 'password'. So it's safer to wipe two arguments just in case.
+                    /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
+                    /// before wiping it (because the `password` argument is always a literal string).
+                    auto res = npos;
+                    if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
+                    {
+                        /// Wipe either `password` or `user`.
+                        res = {arg_num + 2, arg_num + 3};
+                    }
+                    if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
+                    {
+                        /// Wipe either `password` or `sharding_key`.
+                        if (res == npos)
+                            res.first = arg_num + 3;
+                        res.second = arg_num + 4;
+                    }
+                    return res;
+                }
+
+                /// Skip the current argument (which is either a database name or a qualified table name).
+                ++arg_num;
+                if (database)
+                {
+                    /// Skip the 'table' argument if the previous argument was a database name.
+                    ++arg_num;
+                }
+            }
+
+            /// Skip username.
+            ++arg_num;
+
+            /// Do our replacement:
+            /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
+            /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
+            /// before wiping it (because the `password` argument is always a literal string).
+            bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
+            if (can_be_password)
+                return {arg_num, arg_num + 1};
+
+            return npos;
+        }
+
+        /// Tries to get either a database name or a qualified table name from an argument.
+        /// Empty string is also allowed (it means the default database).
+        /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
+        bool tryGetDatabaseNameOrQualifiedTableName(
+            size_t arg_idx,
+            std::optional<String> & res_database,
+            std::optional<QualifiedTableName> & res_qualified_table_name) const
+        {
+            res_database.reset();
+            res_qualified_table_name.reset();
+
+            String str;
+            if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
+                return false;
+
+            if (str.empty())
+            {
+                res_database = "";
+                return true;
+            }
+
+            auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
+            if (!qualified_table_name)
+                return false;
+
+            if (qualified_table_name->database.empty())
+                res_database = std::move(qualified_table_name->table);
+            else
+                res_qualified_table_name = std::move(qualified_table_name);
+            return true;
+        }
+
+        std::pair<size_t, size_t> findEncryptionFunctionSecretArguments() const
+        {
+            /// We replace all arguments after 'mode' with '[HIDDEN]':
+            /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
+            return {1, arguments->size()};
+        }
+
+        std::pair<size_t, size_t> findTableEngineSecretArguments() const
+        {
+            const String & engine_name = function.name;
+            if (engine_name == "ExternalDistributed")
+            {
+                /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
+                return {5, 6};
+            }
+            else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
+                     (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
+            {
+                /// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
+                /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
+                /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
+                /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
+                return {4, 5};
+            }
+            else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS"))
+            {
+                /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
+                return findS3TableEngineSecretArguments();
+            }
+            else
+            {
+                return npos;
+            }
+        }
+
+        std::pair<size_t, size_t> findS3TableEngineSecretArguments() const
+        {
+            /// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
+            /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
+            /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
+
+            /// But we should check the number of arguments first because we don't need to do that replacements in case of
+            /// S3('url' [, 'format' [, 'compression']])
+            if (arguments->size() < 4)
+                return npos;
+
+            return {2, 3};
+        }
+
+        std::pair<size_t, size_t> findDatabaseEngineSecretArguments() const
+        {
+            const String & engine_name = function.name;
+            if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
+                (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
+                (engine_name == "MaterializedPostgreSQL"))
+            {
+                /// MySQL('host:port', 'database', 'user', 'password')
+                /// PostgreSQL('host:port', 'database', 'user', 'password', ...)
+                return {3, 4};
+            }
+            else
+            {
+                return npos;
+            }
+        }
+
+        std::pair<size_t, size_t> findBackupNameSecretArguments() const
+        {
+            const String & engine_name = function.name;
+            if (engine_name == "S3")
+            {
+                /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
+                return {2, 3};
+            }
+            else
+            {
+                return npos;
+            }
+        }
+
+        const ASTFunction & function;
+        const ASTs * arguments = nullptr;
+    };
+}
+
+
 void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
 {
     if (name == "view")
@@ -631,6 +968,10 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
             && (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne"
                 || name == "replaceRegexpAll");
 
+        auto secret_arguments = std::make_pair(static_cast<size_t>(-1), static_cast<size_t>(-1));
+        if (!settings.show_secrets)
+            secret_arguments = FunctionSecretArgumentsFinder(*this).getRange();
+
         for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
         {
             if (i != 0)
@@ -638,12 +979,21 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
             if (arguments->children[i]->as<ASTSetQuery>())
                 settings.ostr << "SETTINGS ";
 
-            bool special_hilite = false;
-            if (i == 1 && special_hilite_regexp)
-                special_hilite = highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-");
+            if (!settings.show_secrets && (secret_arguments.first <= i) && (i < secret_arguments.second))
+            {
+                settings.ostr << "'[HIDDEN]'";
+                if (size - 1 < secret_arguments.second)
+                    break; /// All other arguments should also be hidden.
+                continue;
+            }
 
-            if (!special_hilite)
-                arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
+            if ((i == 1) && special_hilite_regexp
+                && highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-"))
+            {
+                continue;
+            }
+
+            arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
         }
     }
 
@@ -655,6 +1005,18 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
     return finishFormatWithWindow(settings, state, frame);
 }
 
+bool ASTFunction::hasSecretParts() const
+{
+    if (arguments)
+    {
+        size_t num_arguments = arguments->children.size();
+        auto secret_arguments = FunctionSecretArgumentsFinder(*this).getRange();
+        if ((secret_arguments.first < num_arguments) && (secret_arguments.first < secret_arguments.second))
+            return true;
+    }
+    return childrenHaveSecretParts();
+}
+
 String getFunctionName(const IAST * ast)
 {
     String res;
diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h
index ca182a4fb11..4a036c5e94a 100644
--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@@ -46,6 +46,18 @@ public:
     /// do not print empty parentheses if there are no args - compatibility with new AST for data types and engine names.
     bool no_empty_args = false;
 
+    /// Specifies where this function-like expression is used.
+    enum class Kind
+    {
+        ORDINARY_FUNCTION,
+        WINDOW_FUNCTION,
+        LAMBDA_FUNCTION,
+        TABLE_ENGINE,
+        DATABASE_ENGINE,
+        BACKUP_NAME,
+    };
+    Kind kind = Kind::ORDINARY_FUNCTION;
+
     /** Get text identifying the AST node. */
     String getID(char delim) const override;
 
@@ -62,6 +74,8 @@ public:
     /// This is used for parameterized view, to identify if name is 'db.view'
     bool is_compound_name = false;
 
+    bool hasSecretParts() const override;
+
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
     void appendColumnNameImpl(WriteBuffer & ostr) const override;
diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.cpp b/src/Parsers/ASTFunctionWithKeyValueArguments.cpp
index d94490ab8b3..2c28e342610 100644
--- a/src/Parsers/ASTFunctionWithKeyValueArguments.cpp
+++ b/src/Parsers/ASTFunctionWithKeyValueArguments.cpp
@@ -29,7 +29,16 @@ void ASTPair::formatImpl(const FormatSettings & settings, FormatState & state, F
     if (second_with_brackets)
         settings.ostr << (settings.hilite ? hilite_keyword : "") << "(";
 
-    second->formatImpl(settings, state, frame);
+    if (!settings.show_secrets && (first == "password"))
+    {
+        /// Hide password in the definition of a dictionary:
+        /// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password '[HIDDEN]' db 'default' table 'ids'))
+        settings.ostr << "'[HIDDEN]'";
+    }
+    else
+    {
+        second->formatImpl(settings, state, frame);
+    }
 
     if (second_with_brackets)
         settings.ostr << (settings.hilite ? hilite_keyword : "") << ")";
@@ -38,6 +47,12 @@ void ASTPair::formatImpl(const FormatSettings & settings, FormatState & state, F
 }
 
 
+bool ASTPair::hasSecretParts() const
+{
+    return first == "password";
+}
+
+
 void ASTPair::updateTreeHashImpl(SipHash & hash_state) const
 {
     hash_state.update(first.size());
diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.h b/src/Parsers/ASTFunctionWithKeyValueArguments.h
index 4b745e2c1a2..67d591dfcdc 100644
--- a/src/Parsers/ASTFunctionWithKeyValueArguments.h
+++ b/src/Parsers/ASTFunctionWithKeyValueArguments.h
@@ -30,6 +30,8 @@ public:
 
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
 
+    bool hasSecretParts() const override;
+
     void updateTreeHashImpl(SipHash & hash_state) const override;
 };
 
diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp
index ca8ac0e8e64..341ac44b56e 100644
--- a/src/Parsers/ASTIdentifier.cpp
+++ b/src/Parsers/ASTIdentifier.cpp
@@ -63,6 +63,7 @@ ASTPtr ASTIdentifier::clone() const
 {
     auto ret = std::make_shared<ASTIdentifier>(*this);
     ret->semantic = std::make_shared<IdentifierSemanticImpl>(*ret->semantic);
+    ret->cloneChildren();
     return ret;
 }
 
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 1f93333628d..5bbd6161052 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -95,7 +95,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
         where()->formatImpl(s, state, frame);
     }
 
-    if (groupBy())
+    if (!group_by_all && groupBy())
     {
         s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY" << (s.hilite ? hilite_none : "");
         if (!group_by_with_grouping_sets)
@@ -106,6 +106,9 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
         }
     }
 
+    if (group_by_all)
+        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY ALL" << (s.hilite ? hilite_none : "");
+
     if (group_by_with_rollup)
         s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : "    ") << "WITH ROLLUP" << (s.hilite ? hilite_none : "");
 
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 881cc954ad9..101dbe9d02c 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -81,6 +81,7 @@ public:
     ASTPtr clone() const override;
 
     bool distinct = false;
+    bool group_by_all = false;
     bool group_by_with_totals = false;
     bool group_by_with_rollup = false;
     bool group_by_with_cube = false;
diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp
index b4eaf08856e..a59b5dd472c 100644
--- a/src/Parsers/Access/ASTCreateUserQuery.cpp
+++ b/src/Parsers/Access/ASTCreateUserQuery.cpp
@@ -23,7 +23,7 @@ namespace
     }
 
 
-    void formatAuthenticationData(const AuthenticationData & auth_data, bool show_password, const IAST::FormatSettings & settings)
+    void formatAuthenticationData(const AuthenticationData & auth_data, const IAST::FormatSettings & settings)
     {
         auto auth_type = auth_data.getType();
         if (auth_type == AuthenticationType::NO_PASSWORD)
@@ -93,7 +93,7 @@ namespace
                 throw Exception("AST: Unexpected authentication type " + toString(auth_type), ErrorCodes::LOGICAL_ERROR);
         }
 
-        if (password && !show_password)
+        if (password && !settings.show_secrets)
         {
             prefix = "";
             password.reset();
@@ -324,7 +324,7 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState &
         formatRenameTo(*new_name, format);
 
     if (auth_data)
-        formatAuthenticationData(*auth_data, show_password, format);
+        formatAuthenticationData(*auth_data, format);
 
     if (hosts)
         formatHosts(nullptr, *hosts, format);
@@ -345,4 +345,18 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState &
     if (grantees)
         formatGrantees(*grantees, format);
 }
+
+bool ASTCreateUserQuery::hasSecretParts() const
+{
+    if (auth_data)
+    {
+        auto auth_type = auth_data->getType();
+        if ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD)
+            || (auth_type == AuthenticationType::SHA256_PASSWORD)
+            || (auth_type == AuthenticationType::DOUBLE_SHA1_PASSWORD))
+            return true;
+    }
+    return childrenHaveSecretParts();
+}
+
 }
diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h
index 32d53b1bba7..8357b9f2b6c 100644
--- a/src/Parsers/Access/ASTCreateUserQuery.h
+++ b/src/Parsers/Access/ASTCreateUserQuery.h
@@ -45,7 +45,6 @@ public:
     std::optional<String> new_name;
 
     std::optional<AuthenticationData> auth_data;
-    bool show_password = true; /// formatImpl() shows a password or hash by default
 
     std::optional<AllowedClientHosts> hosts;
     std::optional<AllowedClientHosts> add_hosts;
@@ -60,6 +59,7 @@ public:
     String getID(char) const override;
     ASTPtr clone() const override;
     void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override;
+    bool hasSecretParts() const override;
     ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTCreateUserQuery>(clone()); }
 };
 }
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index c4e07ea2e15..08027753984 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -1094,7 +1094,153 @@ bool ParserCollectionOfLiterals<Collection>::parseImpl(Pos & pos, ASTPtr & node,
 
 template bool ParserCollectionOfLiterals<Array>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
 template bool ParserCollectionOfLiterals<Tuple>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
-template bool ParserCollectionOfLiterals<Map>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
+
+
+namespace
+{
+
+class ICollection;
+using Collections = std::vector<std::unique_ptr<ICollection>>;
+
+class ICollection
+{
+public:
+    virtual ~ICollection() = default;
+    virtual bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) = 0;
+};
+
+template <class Container, TokenType end_token>
+class CommonCollection : public ICollection
+{
+public:
+    bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) override;
+
+private:
+    Container container;
+};
+
+class MapCollection : public ICollection
+{
+public:
+    bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) override;
+
+private:
+    Map container;
+};
+
+bool parseAllCollectionsStart(IParser::Pos & pos, Collections & collections, Expected & /*expected*/)
+{
+    if (pos->type == TokenType::OpeningCurlyBrace)
+        collections.push_back(std::make_unique<MapCollection>());
+    else if (pos->type == TokenType::OpeningRoundBracket)
+        collections.push_back(std::make_unique<CommonCollection<Tuple, TokenType::ClosingRoundBracket>>());
+    else if (pos->type == TokenType::OpeningSquareBracket)
+        collections.push_back(std::make_unique<CommonCollection<Array, TokenType::ClosingSquareBracket>>());
+    else
+        return false;
+
+    ++pos;
+    return true;
+}
+
+template <class Container, TokenType end_token>
+bool CommonCollection<Container, end_token>::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected)
+{
+    if (node)
+    {
+        container.push_back(std::move(node->as<ASTLiteral &>().value));
+        node.reset();
+    }
+
+    ASTPtr literal;
+    ParserLiteral literal_p;
+    ParserToken comma_p(TokenType::Comma);
+    ParserToken end_p(end_token);
+
+    while (true)
+    {
+        if (end_p.ignore(pos, expected))
+        {
+            node = std::make_shared<ASTLiteral>(std::move(container));
+            break;
+        }
+
+        if (!container.empty() && !comma_p.ignore(pos, expected))
+                return false;
+
+        if (literal_p.parse(pos, literal, expected))
+            container.push_back(std::move(literal->as<ASTLiteral &>().value));
+        else
+            return parseAllCollectionsStart(pos, collections, expected);
+    }
+
+    return true;
+}
+
+bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected)
+{
+    if (node)
+    {
+        container.push_back(std::move(node->as<ASTLiteral &>().value));
+        node.reset();
+    }
+
+    ASTPtr literal;
+    ParserLiteral literal_p;
+    ParserToken comma_p(TokenType::Comma);
+    ParserToken colon_p(TokenType::Colon);
+    ParserToken end_p(TokenType::ClosingCurlyBrace);
+
+    while (true)
+    {
+        if (end_p.ignore(pos, expected))
+        {
+            node = std::make_shared<ASTLiteral>(std::move(container));
+            break;
+        }
+
+        if (!container.empty() && !comma_p.ignore(pos, expected))
+            return false;
+
+        if (!literal_p.parse(pos, literal, expected))
+            return false;
+
+        if (!colon_p.parse(pos, literal, expected))
+            return false;
+
+        container.push_back(std::move(literal->as<ASTLiteral &>().value));
+
+        if (literal_p.parse(pos, literal, expected))
+            container.push_back(std::move(literal->as<ASTLiteral &>().value));
+        else
+            return parseAllCollectionsStart(pos, collections, expected);
+    }
+
+    return true;
+}
+
+}
+
+
+bool ParserAllCollectionsOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    Collections collections;
+
+    if (!parseAllCollectionsStart(pos, collections, expected))
+        return false;
+
+    while (!collections.empty())
+    {
+        if (!collections.back()->parse(pos, collections, node, expected))
+            return false;
+
+        if (node)
+            collections.pop_back();
+    }
+
+    return true;
+}
+
 
 bool ParserLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index 8a9647dc86f..8e328db976b 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -301,6 +301,17 @@ protected:
     }
 };
 
+/** Parses all collections of literals and their various combinations
+  * Used in parsing parameters for SET query
+  */
+class ParserAllCollectionsOfLiterals : public IParserBase
+{
+public:
+protected:
+    const char * getName() const override { return "combination of maps, arrays, tuples"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 
 /** The literal is one of: NULL, UInt64, Int64, Float64, String.
   */
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 44bf1023e6e..b45401672b7 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -494,7 +494,12 @@ template <typename... Args>
 static std::shared_ptr<ASTFunction> makeASTFunction(Operator & op, Args &&... args)
 {
     auto ast_function = makeASTFunction(op.function_name, std::forward<Args>(args)...);
-    ast_function->is_lambda_function = op.type == OperatorType::Lambda;
+
+    if (op.type == OperatorType::Lambda)
+    {
+        ast_function->is_lambda_function = true;
+        ast_function->kind = ASTFunction::Kind::LAMBDA_FUNCTION;
+    }
     return ast_function;
 }
 
@@ -920,7 +925,7 @@ public:
                         , ErrorCodes::SYNTAX_ERROR);
                 }
 
-                if (allow_function_parameters && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
+                if (allow_function_parameters && !parameters && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
                 {
                     parameters = std::make_shared<ASTExpressionList>();
                     std::swap(parameters->children, elements);
@@ -1000,6 +1005,7 @@ public:
             if (over.ignore(pos, expected))
             {
                 function_node->is_window_function = true;
+                function_node->kind = ASTFunction::Kind::WINDOW_FUNCTION;
 
                 ASTPtr function_node_as_iast = function_node;
 
diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp
index 65740f290b5..064bcc9a59e 100644
--- a/src/Parsers/IAST.cpp
+++ b/src/Parsers/IAST.cpp
@@ -1,8 +1,10 @@
+#include <Parsers/IAST.h>
+
 #include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
+#include <Common/SensitiveDataMasker.h>
 #include <Common/SipHash.h>
-#include <Parsers/IAST.h>
 
 
 namespace DB
@@ -165,11 +167,25 @@ size_t IAST::checkDepthImpl(size_t max_depth) const
     return res;
 }
 
-std::string IAST::formatForErrorMessage() const
+String IAST::formatWithSecretsHidden(size_t max_length, bool one_line) const
 {
     WriteBufferFromOwnString buf;
-    format(FormatSettings(buf, true /* one line */));
-    return buf.str();
+
+    FormatSettings settings{buf, one_line};
+    settings.show_secrets = false;
+    format(settings);
+
+    return wipeSensitiveDataAndCutToLength(buf.str(), max_length);
+}
+
+bool IAST::childrenHaveSecretParts() const
+{
+    for (const auto & child : children)
+    {
+        if (child->hasSecretParts())
+            return true;
+    }
+    return false;
 }
 
 void IAST::cloneChildren()
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index bba878e4769..fd987d4b48e 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -185,6 +185,7 @@ public:
         bool one_line;
         bool always_quote_identifiers = false;
         IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks;
+        bool show_secrets = true; /// Show secret parts of the AST (e.g. passwords, encryption keys).
 
         // Newline or whitespace.
         char nl_or_ws;
@@ -240,9 +241,12 @@ public:
     }
 
     // A simple way to add some user-readable context to an error message.
-    std::string formatForErrorMessage() const;
-    template <typename AstArray>
-    static std::string formatForErrorMessage(const AstArray & array);
+    String formatWithSecretsHidden(size_t max_length = 0, bool one_line = true) const;
+    String formatForLogging(size_t max_length = 0) const { return formatWithSecretsHidden(max_length, true); }
+    String formatForErrorMessage() const { return formatWithSecretsHidden(0, true); }
+
+    /// If an AST has secret parts then formatForLogging() will replace them with the placeholder '[HIDDEN]'.
+    virtual bool hasSecretParts() const { return childrenHaveSecretParts(); }
 
     void cloneChildren();
 
@@ -272,6 +276,9 @@ public:
     static const char * hilite_substitution;
     static const char * hilite_none;
 
+protected:
+    bool childrenHaveSecretParts() const;
+
 private:
     size_t checkDepthImpl(size_t max_depth) const;
 
@@ -282,20 +289,4 @@ private:
     ASTPtr * next_to_delete_list_head = nullptr;
 };
 
-template <typename AstArray>
-std::string IAST::formatForErrorMessage(const AstArray & array)
-{
-    WriteBufferFromOwnString buf;
-    for (size_t i = 0; i < array.size(); ++i)
-    {
-        if (i > 0)
-        {
-            const char * delim = ", ";
-            buf.write(delim, strlen(delim));
-        }
-        array[i]->format(IAST::FormatSettings(buf, true /* one line */));
-    }
-    return buf.str();
-}
-
 }
diff --git a/src/Parsers/MySQL/ASTDropQuery.cpp b/src/Parsers/MySQL/ASTDropQuery.cpp
new file mode 100644
index 00000000000..fb76d93363a
--- /dev/null
+++ b/src/Parsers/MySQL/ASTDropQuery.cpp
@@ -0,0 +1,119 @@
+#include <Parsers/MySQL/ASTDropQuery.h>
+
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/parseDatabaseAndTableName.h>
+#include <Parsers/ExpressionListParsers.h>
+
+namespace DB
+{
+
+namespace MySQLParser
+{
+
+ASTPtr ASTDropQuery::clone() const
+{
+    auto res = std::make_shared<ASTDropQuery>(*this);
+    res->children.clear();
+    res->is_truncate = is_truncate;
+    res->if_exists = if_exists;
+    return res;
+}
+
+bool ParserDropQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
+{
+    ParserKeyword s_drop("DROP");
+    ParserKeyword s_truncate("TRUNCATE");
+    ParserKeyword s_table("TABLE");
+    ParserKeyword s_database("DATABASE");
+    ParserKeyword s_if_exists("IF EXISTS");
+    ParserKeyword s_view("VIEW");
+    ParserKeyword on("ON");
+    ParserIdentifier name_p(false);
+
+    ParserKeyword s_event("EVENT");
+    ParserKeyword s_function("FUNCTION");
+    ParserKeyword s_index("INDEX");
+    ParserKeyword s_server("SERVER");
+    ParserKeyword s_trigger("TRIGGER");
+
+    auto query = std::make_shared<ASTDropQuery>();
+    node = query;
+    ASTDropQuery::QualifiedNames names;
+    bool if_exists = false;
+    bool is_truncate = false;
+
+    if (s_truncate.ignore(pos, expected) && s_table.ignore(pos, expected))
+    {
+        is_truncate = true;
+        query->kind = ASTDropQuery::Kind::Table;
+        ASTDropQuery::QualifiedName name;
+        if (parseDatabaseAndTableName(pos, expected, name.schema, name.shortName))
+            names.push_back(name);
+        else
+            return false;
+    }
+    else if (s_drop.ignore(pos, expected))
+    {
+        if (s_database.ignore(pos, expected))
+        {
+            query->kind = ASTDropQuery::Kind::Database;
+            if (s_if_exists.ignore(pos, expected))
+                if_exists = true;
+            ASTPtr database;
+            if (!name_p.parse(pos, database, expected))
+                return false;
+        }
+        else
+        {
+            if (s_view.ignore(pos, expected))
+                query->kind = ASTDropQuery::Kind::View;
+            else if (s_table.ignore(pos, expected))
+                query->kind = ASTDropQuery::Kind::Table;
+            else if (s_index.ignore(pos, expected))
+            {
+                ASTPtr index;
+                query->kind = ASTDropQuery::Kind::Index;
+                if (!(name_p.parse(pos, index, expected) && on.ignore(pos, expected)))
+                    return false;
+            }
+            else if (s_event.ignore(pos, expected) || s_function.ignore(pos, expected) || s_server.ignore(pos, expected)
+                || s_trigger.ignore(pos, expected))
+            {
+                query->kind = ASTDropQuery::Kind::Other;
+            }
+            else
+                return false;
+
+            if (s_if_exists.ignore(pos, expected))
+                if_exists = true;
+            //parse name
+            auto parse_element = [&]
+            {
+                ASTDropQuery::QualifiedName element;
+                if (parseDatabaseAndTableName(pos, expected, element.schema, element.shortName))
+                {
+                    names.emplace_back(std::move(element));
+                    return true;
+                }
+                return false;
+            };
+
+            if (!ParserList::parseUtil(pos, expected, parse_element, false))
+                return false;
+        }
+    }
+    else
+        return false;
+
+    query->if_exists = if_exists;
+    query->names = names;
+    query->is_truncate = is_truncate;
+
+    return true;
+}
+
+}
+
+}
diff --git a/src/Parsers/MySQL/ASTDropQuery.h b/src/Parsers/MySQL/ASTDropQuery.h
new file mode 100644
index 00000000000..ff95277ae5e
--- /dev/null
+++ b/src/Parsers/MySQL/ASTDropQuery.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <Parsers/IParserBase.h>
+#include <Parsers/MySQL/ASTDeclareIndex.h>
+#include <Parsers/MySQL/ASTDeclareColumn.h>
+#include <Parsers/MySQL/ASTDeclareTableOptions.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+namespace MySQLParser
+{
+
+class ASTDropQuery : public IAST
+{
+public:
+    enum Kind
+    {
+        Table,
+        View,
+        Database,
+        Index,
+        /// TRIGGER,FUNCTION,EVENT and so on, No need for support
+        Other,
+    };
+    Kind kind;
+    struct QualifiedName
+    {
+        String schema;
+        String shortName;
+    };
+
+    using QualifiedNames = std::vector<QualifiedName>;
+    QualifiedNames names;
+    bool if_exists{false};
+    //drop or truncate
+    bool is_truncate{false};
+
+    ASTPtr clone() const override;
+    String getID(char /*delim*/) const override {return "ASTDropQuery" ;}
+
+protected:
+    void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override
+    {
+        throw Exception("Method formatImpl is not supported by MySQLParser::ASTDropQuery.", ErrorCodes::NOT_IMPLEMENTED);
+    }
+};
+
+class ParserDropQuery : public IParserBase
+{
+protected:
+    const char * getName() const override { return "DROP query"; }
+
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
+
+}
diff --git a/src/Parsers/ParserBackupQuery.cpp b/src/Parsers/ParserBackupQuery.cpp
index 649304b1dab..cead1a98c1a 100644
--- a/src/Parsers/ParserBackupQuery.cpp
+++ b/src/Parsers/ParserBackupQuery.cpp
@@ -1,5 +1,6 @@
 #include <Parsers/ParserBackupQuery.h>
 #include <Parsers/ASTBackupQuery.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier_fwd.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Parsers/CommonParsers.h>
@@ -207,7 +208,11 @@ namespace
 
     bool parseBackupName(IParser::Pos & pos, Expected & expected, ASTPtr & backup_name)
     {
-        return ParserIdentifierWithOptionalParameters{}.parse(pos, backup_name, expected);
+        if (!ParserIdentifierWithOptionalParameters{}.parse(pos, backup_name, expected))
+            return false;
+
+        backup_name->as<ASTFunction &>().kind = ASTFunction::Kind::BACKUP_NAME;
+        return true;
     }
 
     bool parseBaseBackupSetting(IParser::Pos & pos, Expected & expected, ASTPtr & base_backup_name)
@@ -358,11 +363,16 @@ bool ParserBackupQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     query->kind = kind;
     query->elements = std::move(elements);
     query->cluster = std::move(cluster);
-    query->backup_name = std::move(backup_name);
+
+    if (backup_name)
+        query->set(query->backup_name, backup_name);
+
     query->settings = std::move(settings);
-    query->base_backup_name = std::move(base_backup_name);
     query->cluster_host_ids = std::move(cluster_host_ids);
 
+    if (base_backup_name)
+        query->set(query->base_backup_name, base_backup_name);
+
     return true;
 }
 
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index bf305ba4781..9c1c682ca03 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -442,6 +442,20 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     if (!storage_like)
         return false;
 
+    if (engine)
+    {
+        switch (engine_kind)
+        {
+            case EngineKind::TABLE_ENGINE:
+                engine->as<ASTFunction &>().kind = ASTFunction::Kind::TABLE_ENGINE;
+                break;
+
+            case EngineKind::DATABASE_ENGINE:
+                engine->as<ASTFunction &>().kind = ASTFunction::Kind::DATABASE_ENGINE;
+                break;
+        }
+    }
+
     auto storage = std::make_shared<ASTStorage>();
     storage->set(storage->engine, engine);
     storage->set(storage->partition_by, partition_by);
@@ -449,7 +463,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     storage->set(storage->order_by, order_by);
     storage->set(storage->sample_by, sample_by);
     storage->set(storage->ttl_table, ttl_table);
-
     storage->set(storage->settings, settings);
 
     node = storage;
@@ -473,7 +486,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
     ParserToken s_comma(TokenType::Comma);
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
-    ParserStorage storage_p;
+    ParserStorage storage_p{ParserStorage::TABLE_ENGINE};
     ParserIdentifier name_p;
     ParserTablePropertiesDeclarationList table_properties_p;
     ParserSelectWithUnionQuery select_p;
@@ -635,6 +648,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
             }
         }
     }
+
     auto comment = parseComment(pos, expected);
 
     auto query = std::make_shared<ASTCreateQuery>();
@@ -697,6 +711,8 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     ParserToken s_dot(TokenType::Dot);
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
+    ParserStorage storage_p{ParserStorage::TABLE_ENGINE};
+    ParserStorage storage_inner{ParserStorage::TABLE_ENGINE};
     ParserTablePropertiesDeclarationList table_properties_p;
     ParserSelectWithUnionQuery select_p;
 
@@ -835,8 +851,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
     ParserToken s_eq(TokenType::Equals);
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
-    ParserStorage storage_p;
-    ParserStorage storage_inner;
+    ParserStorage storage_p{ParserStorage::TABLE_ENGINE};
+    ParserStorage storage_inner{ParserStorage::TABLE_ENGINE};
     ParserTablePropertiesDeclarationList table_properties_p;
     ParserExpression watermark_p;
     ParserExpression lateness_p;
@@ -1135,7 +1151,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     ParserKeyword s_attach("ATTACH");
     ParserKeyword s_database("DATABASE");
     ParserKeyword s_if_not_exists("IF NOT EXISTS");
-    ParserStorage storage_p;
+    ParserStorage storage_p{ParserStorage::DATABASE_ENGINE};
     ParserIdentifier name_p(true);
     ParserTableOverridesDeclarationList table_overrides_p;
 
@@ -1222,7 +1238,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     ParserToken s_dot(TokenType::Dot);
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
-    ParserStorage storage_p;
+    ParserStorage storage_p{ParserStorage::TABLE_ENGINE};
     ParserIdentifier name_p;
     ParserTablePropertiesDeclarationList table_properties_p;
     ParserSelectWithUnionQuery select_p;
diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h
index b5480fa6eae..e1573c92dab 100644
--- a/src/Parsers/ParserCreateQuery.h
+++ b/src/Parsers/ParserCreateQuery.h
@@ -420,9 +420,20 @@ protected:
   */
 class ParserStorage : public IParserBase
 {
+public:
+    /// What kind of engine we're going to parse.
+    enum EngineKind
+    {
+        TABLE_ENGINE,
+        DATABASE_ENGINE,
+    };
+
+    ParserStorage(EngineKind engine_kind_) : engine_kind(engine_kind_) {}
+
 protected:
     const char * getName() const override { return "storage definition"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+    EngineKind engine_kind;
 };
 
 /** Query like this:
diff --git a/src/Parsers/ParserExternalDDLQuery.cpp b/src/Parsers/ParserExternalDDLQuery.cpp
index 839838c4f54..5d6874f524d 100644
--- a/src/Parsers/ParserExternalDDLQuery.cpp
+++ b/src/Parsers/ParserExternalDDLQuery.cpp
@@ -11,6 +11,7 @@
 #if USE_MYSQL
 #    include <Parsers/MySQL/ASTAlterQuery.h>
 #    include <Parsers/MySQL/ASTCreateQuery.h>
+#    include <Parsers/MySQL/ASTDropQuery.h>
 #endif
 
 namespace DB
@@ -43,7 +44,7 @@ bool ParserExternalDDLQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect
     if (external_ddl_query->from->name == "MySQL")
     {
 #if USE_MYSQL
-        ParserDropQuery p_drop_query;
+        MySQLParser::ParserDropQuery p_drop_query;
         ParserRenameQuery p_rename_query;
         MySQLParser::ParserAlterQuery p_alter_query;
         MySQLParser::ParserCreateQuery p_create_query;
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index cf335270734..201cd750af8 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -195,6 +195,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
             select_query->group_by_with_cube = true;
         else if (s_grouping_sets.ignore(pos, expected))
             select_query->group_by_with_grouping_sets = true;
+        else if (s_all.ignore(pos, expected))
+            select_query->group_by_all = true;
 
         if ((select_query->group_by_with_rollup || select_query->group_by_with_cube || select_query->group_by_with_grouping_sets) &&
             !open_bracket.ignore(pos, expected))
@@ -205,7 +207,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
             if (!grouping_sets_list.parse(pos, group_expression_list, expected))
                 return false;
         }
-        else
+        else if (!select_query->group_by_all)
         {
             if (!exp_list.parse(pos, group_expression_list, expected))
                 return false;
diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp
index c840fc8d2b8..262111f8835 100644
--- a/src/Parsers/ParserSetQuery.cpp
+++ b/src/Parsers/ParserSetQuery.cpp
@@ -4,10 +4,13 @@
 
 #include <Parsers/CommonParsers.h>
 #include <Parsers/ParserSetQuery.h>
+#include <Parsers/ExpressionElementParsers.h>
 
 #include <Core/Names.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
 #include <IO/ReadHelpers.h>
+#include <IO/Operators.h>
 #include <Common/FieldVisitorToString.h>
 #include <Common/SettingsChanges.h>
 #include <Common/typeid_cast.h>
@@ -20,21 +23,75 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-static NameToNameMap::value_type convertToQueryParameter(SettingChange change)
-{
-    auto name = change.name.substr(strlen(QUERY_PARAMETER_NAME_PREFIX));
-    if (name.empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty");
 
-    auto value = applyVisitor(FieldVisitorToString(), change.value);
-    /// writeQuoted is not always quoted in line with SQL standard https://github.com/ClickHouse/ClickHouse/blob/master/src/IO/WriteHelpers.h
-    if (value.starts_with('\''))
+class ParameterFieldVisitorToString : public StaticVisitor<String>
+{
+public:
+    template <class T>
+    String operator() (const T & x) const
     {
-        ReadBufferFromOwnString buf(value);
-        readQuoted(value, buf);
+        FieldVisitorToString visitor;
+        return visitor(x);
     }
-    return {name, value};
-}
+
+    String operator() (const Array & x) const
+    {
+        WriteBufferFromOwnString wb;
+
+        wb << '[';
+        for (Array::const_iterator it = x.begin(); it != x.end(); ++it)
+        {
+            if (it != x.begin())
+                wb.write(", ", 2);
+            wb << applyVisitor(*this, *it);
+        }
+        wb << ']';
+
+        return wb.str();
+    }
+
+    String operator() (const Map & x) const
+    {
+        WriteBufferFromOwnString wb;
+
+        wb << '{';
+
+        auto it = x.begin();
+        while (it != x.end())
+        {
+            if (it != x.begin())
+                wb << ", ";
+            wb << applyVisitor(*this, *it);
+            ++it;
+
+            if (it != x.end())
+            {
+                wb << ':';
+                wb << applyVisitor(*this, *it);
+                ++it;
+            }
+        }
+        wb << '}';
+
+        return wb.str();
+    }
+
+    String operator() (const Tuple & x) const
+    {
+        WriteBufferFromOwnString wb;
+
+        wb << '(';
+        for (auto it = x.begin(); it != x.end(); ++it)
+        {
+            if (it != x.begin())
+                wb << ", ";
+            wb << applyVisitor(*this, *it);
+        }
+        wb << ')';
+
+        return wb.str();
+    }
+};
 
 
 class ParserLiteralOrMap : public IParserBase
@@ -89,6 +146,48 @@ protected:
     }
 };
 
+/// Parse Identifier, Literal, Array/Tuple/Map of literals
+bool parseParameterValueIntoString(IParser::Pos & pos, String & value, Expected & expected)
+{
+    ASTPtr node;
+
+    /// 1. Identifier
+    ParserCompoundIdentifier identifier_p;
+
+    if (identifier_p.parse(pos, node, expected))
+    {
+        tryGetIdentifierNameInto(node, value);
+        return true;
+    }
+
+    /// 2. Literal
+    ParserLiteral literal_p;
+    if (literal_p.parse(pos, node, expected))
+    {
+        value = applyVisitor(FieldVisitorToString(), node->as<ASTLiteral>()->value);
+
+        /// writeQuoted is not always quoted in line with SQL standard https://github.com/ClickHouse/ClickHouse/blob/master/src/IO/WriteHelpers.h
+        if (value.starts_with('\''))
+        {
+            ReadBufferFromOwnString buf(value);
+            readQuoted(value, buf);
+        }
+
+        return true;
+    }
+
+    /// 3. Map, Array, Tuple of literals and their combination
+    ParserAllCollectionsOfLiterals all_collections_p;
+
+    if (all_collections_p.parse(pos, node, expected))
+    {
+        value = applyVisitor(ParameterFieldVisitorToString(), node->as<ASTLiteral>()->value);
+        return true;
+    }
+
+    return false;
+}
+
 /// Parse `name = value`.
 bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & pos, Expected & expected)
 {
@@ -118,36 +217,58 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p
     return true;
 }
 
-bool ParserSetQuery::parseNameValuePairWithDefault(SettingChange & change, String & default_settings, IParser::Pos & pos, Expected & expected)
+bool ParserSetQuery::parseNameValuePairWithParameterOrDefault(
+    SettingChange & change, String & default_settings, ParserSetQuery::Parameter & parameter, IParser::Pos & pos, Expected & expected)
 {
     ParserCompoundIdentifier name_p;
     ParserLiteralOrMap value_p;
     ParserToken s_eq(TokenType::Equals);
 
-    ASTPtr name;
-    ASTPtr value;
-    bool is_default = false;
+    ASTPtr node;
+    String name;
 
-    if (!name_p.parse(pos, name, expected))
+    if (!name_p.parse(pos, node, expected))
         return false;
 
     if (!s_eq.ignore(pos, expected))
         return false;
 
+    tryGetIdentifierNameInto(node, name);
+
+    /// Parameter
+    if (name.starts_with(QUERY_PARAMETER_NAME_PREFIX))
+    {
+        name = name.substr(strlen(QUERY_PARAMETER_NAME_PREFIX));
+
+        if (name.empty())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty");
+
+        String value;
+
+        if (!parseParameterValueIntoString(pos, value, expected))
+            return false;
+
+        parameter = {std::move(name), std::move(value)};
+        return true;
+    }
+
+    /// Default
+    if (ParserKeyword("DEFAULT").ignore(pos, expected))
+    {
+        default_settings = name;
+        return true;
+    }
+
+    /// Setting
     if (ParserKeyword("TRUE").ignore(pos, expected))
-        value = std::make_shared<ASTLiteral>(Field(static_cast<UInt64>(1)));
+        node = std::make_shared<ASTLiteral>(Field(static_cast<UInt64>(1)));
     else if (ParserKeyword("FALSE").ignore(pos, expected))
-        value = std::make_shared<ASTLiteral>(Field(static_cast<UInt64>(0)));
-    else if (ParserKeyword("DEFAULT").ignore(pos, expected))
-        is_default = true;
-    else if (!value_p.parse(pos, value, expected))
+        node = std::make_shared<ASTLiteral>(Field(static_cast<UInt64>(0)));
+    else if (!value_p.parse(pos, node, expected))
         return false;
 
-    tryGetIdentifierNameInto(name, change.name);
-    if (is_default)
-        default_settings = change.name;
-    else
-        change.value = value->as<ASTLiteral &>().value;
+    change.name = name;
+    change.value = node->as<ASTLiteral &>().value;
 
     return true;
 }
@@ -178,19 +299,19 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         if ((!changes.empty() || !query_parameters.empty() || !default_settings.empty()) && !s_comma.ignore(pos))
             break;
 
-        /// Either a setting or a parameter for prepared statement (if name starts with QUERY_PARAMETER_NAME_PREFIX)
-        SettingChange current;
+        SettingChange setting;
         String name_of_default_setting;
+        Parameter parameter;
 
-        if (!parseNameValuePairWithDefault(current, name_of_default_setting, pos, expected))
+        if (!parseNameValuePairWithParameterOrDefault(setting, name_of_default_setting, parameter, pos, expected))
             return false;
 
-        if (current.name.starts_with(QUERY_PARAMETER_NAME_PREFIX))
-            query_parameters.emplace(convertToQueryParameter(std::move(current)));
+        if (!parameter.first.empty())
+            query_parameters.emplace(std::move(parameter));
         else if (!name_of_default_setting.empty())
             default_settings.emplace_back(std::move(name_of_default_setting));
         else
-            changes.push_back(std::move(current));
+            changes.push_back(std::move(setting));
     }
 
     auto query = std::make_shared<ASTSetQuery>();
diff --git a/src/Parsers/ParserSetQuery.h b/src/Parsers/ParserSetQuery.h
index 0213667ad7a..4d3803e78b4 100644
--- a/src/Parsers/ParserSetQuery.h
+++ b/src/Parsers/ParserSetQuery.h
@@ -15,9 +15,18 @@ struct SettingChange;
 class ParserSetQuery : public IParserBase
 {
 public:
+    using Parameter = std::pair<std::string, std::string>;
+
     explicit ParserSetQuery(bool parse_only_internals_ = false) : parse_only_internals(parse_only_internals_) {}
+
     static bool parseNameValuePair(SettingChange & change, IParser::Pos & pos, Expected & expected);
-    static bool parseNameValuePairWithDefault(SettingChange & change, String & default_settings, IParser::Pos & pos, Expected & expected);
+
+    static bool parseNameValuePairWithParameterOrDefault(SettingChange & change,
+                                                         String & default_settings,
+                                                         Parameter & parameter,
+                                                         IParser::Pos & pos,
+                                                         Expected & expected);
+
 protected:
     const char * getName() const override { return "SET query"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp
index ef39df8ca52..cff4c959267 100644
--- a/src/Parsers/ParserTablesInSelectQuery.cpp
+++ b/src/Parsers/ParserTablesInSelectQuery.cpp
@@ -22,7 +22,7 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     auto res = std::make_shared<ASTTableExpression>();
 
     if (!ParserWithOptionalAlias(std::make_unique<ParserSubquery>(), true).parse(pos, res->subquery, expected)
-        && !ParserWithOptionalAlias(std::make_unique<ParserFunction>(true, true), true).parse(pos, res->table_function, expected)
+        && !ParserWithOptionalAlias(std::make_unique<ParserFunction>(false, true), true).parse(pos, res->table_function, expected)
         && !ParserWithOptionalAlias(std::make_unique<ParserCompoundIdentifier>(true, true), true)
                 .parse(pos, res->database_and_table_name, expected))
         return false;
diff --git a/src/Parsers/fuzzers/create_parser_fuzzer.cpp b/src/Parsers/fuzzers/create_parser_fuzzer.cpp
index 032d9ca3ffe..13cb1dfd36e 100644
--- a/src/Parsers/fuzzers/create_parser_fuzzer.cpp
+++ b/src/Parsers/fuzzers/create_parser_fuzzer.cpp
@@ -15,6 +15,12 @@ try
     DB::ParserCreateQuery parser;
     DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000);
 
+    const UInt64 max_ast_depth = 1000;
+    ast->checkDepth(max_ast_depth);
+
+    const UInt64 max_ast_elements = 50000;
+    ast->checkSize(max_ast_elements);
+
     DB::WriteBufferFromOwnString wb;
     DB::formatAST(*ast, wb);
 
diff --git a/src/Parsers/fuzzers/select_parser_fuzzer.cpp b/src/Parsers/fuzzers/select_parser_fuzzer.cpp
index caa6c586cd6..3f712834c55 100644
--- a/src/Parsers/fuzzers/select_parser_fuzzer.cpp
+++ b/src/Parsers/fuzzers/select_parser_fuzzer.cpp
@@ -12,7 +12,15 @@ try
     std::string input = std::string(reinterpret_cast<const char*>(data), size);
 
     DB::ParserQueryWithOutput parser(input.data() + input.size());
-    DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000);
+
+    const UInt64 max_parser_depth = 1000;
+    DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth);
+
+    const UInt64 max_ast_depth = 1000;
+    ast->checkDepth(max_ast_depth);
+
+    const UInt64 max_ast_elements = 50000;
+    ast->checkSize(max_ast_elements);
 
     DB::WriteBufferFromOwnString wb;
     DB::formatAST(*ast, wb);
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 56cc73456ce..b19678596a1 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -495,20 +495,10 @@ void Planner::buildQueryPlanIfNeeded()
             settings.group_by_use_nulls,
             std::move(input_order_info),
             std::move(group_by_sort_description),
-            should_produce_results_in_order_of_bucket_number);
+            should_produce_results_in_order_of_bucket_number,
+            settings.enable_memory_bound_merging_of_aggregation_results);
         query_plan.addStep(std::move(aggregating_step));
 
-        if (query_node.isGroupByWithRollup())
-        {
-            auto rollup_step = std::make_unique<RollupStep>(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
-            query_plan.addStep(std::move(rollup_step));
-        }
-        else if (query_node.isGroupByWithCube())
-        {
-            auto cube_step = std::make_unique<CubeStep>(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
-            query_plan.addStep(std::move(cube_step));
-        }
-
         if (query_node.isGroupByWithTotals())
         {
             const auto & having_analysis_result = expression_analysis_result.getHaving();
@@ -528,6 +518,17 @@ void Planner::buildQueryPlanIfNeeded()
 
             query_plan.addStep(std::move(totals_having_step));
         }
+
+        if (query_node.isGroupByWithRollup())
+        {
+            auto rollup_step = std::make_unique<RollupStep>(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
+            query_plan.addStep(std::move(rollup_step));
+        }
+        else if (query_node.isGroupByWithCube())
+        {
+            auto cube_step = std::make_unique<CubeStep>(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
+            query_plan.addStep(std::move(cube_step));
+        }
     }
 
     if (!having_executed && expression_analysis_result.hasHaving())
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 53b9cfc5d99..b59dccc92c2 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -87,8 +87,8 @@ void JoinClause::dump(WriteBuffer & buffer) const
         {
             const auto & asof_condition = asof_conditions[i];
 
-            buffer << "key_index: " << asof_condition.key_index;
-            buffer << "inequality: " << toString(asof_condition.asof_inequality);
+            buffer << " key_index: " << asof_condition.key_index;
+            buffer << " inequality: " << toString(asof_condition.asof_inequality);
 
             if (i + 1 != asof_conditions_size)
                 buffer << ',';
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index 57eaa28e072..4537065c58a 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -183,19 +183,19 @@ public:
     }
 
 private:
-    /// Valid for table, table function, query, union, array join table expression nodes
+    /// Valid for table, table function, array join, query, union nodes
     NamesAndTypesList columns;
 
-    /// Valid for table, table function, query, union, array join table expression nodes
+    /// Valid for table, table function, array join, query, union nodes
     NameSet columns_names;
 
-    /// Valid only for table table expression node
+    /// Valid only for table node
     NameSet alias_columns_names;
 
-    /// Valid for table, table function, query, union table, array join expression nodes
+    /// Valid for table, table function, array join, query, union nodes
     ColumnNameToColumnIdentifier column_name_to_column_identifier;
 
-    /// Valid for table, table function, query, union table, array join expression nodes
+    /// Valid for table, table function, array join, query, union nodes
     ColumnIdentifierToColumnName column_identifier_to_column_name;
 
     /// Is storage remote
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index e9b01ec7dda..8b546f48116 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -324,14 +324,31 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedAr
     ColumnArray::Offsets & offsets_data = assert_cast<ColumnVector<UInt64> &>(*offsets_column).getData();
     offsets_data.reserve(arrow_column->length());
 
+    uint64_t start_offset = 0u;
+
     for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
     {
         arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
         auto arrow_offsets_array = list_chunk.offsets();
         auto & arrow_offsets = dynamic_cast<arrow::Int32Array &>(*arrow_offsets_array);
-        auto start = offsets_data.back();
+
+        /*
+         * It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
+         * When it is shared, the offsets will be monotonically increasing. Otherwise, the offsets will be zero based.
+         * In order to account for both cases, the starting offset is updated whenever a zero-based offset is found.
+         * More info can be found in: https://lists.apache.org/thread/rrwfb9zo2dc58dhd9rblf20xd7wmy7jm and
+         * https://github.com/ClickHouse/ClickHouse/pull/43297
+         * */
+        if (list_chunk.offset() == 0)
+        {
+            start_offset = offsets_data.back();
+        }
+
         for (int64_t i = 1; i < arrow_offsets.length(); ++i)
-            offsets_data.emplace_back(start + arrow_offsets.Value(i));
+        {
+            auto offset = arrow_offsets.Value(i);
+            offsets_data.emplace_back(start_offset + offset);
+        }
     }
     return offsets_column;
 }
@@ -467,8 +484,23 @@ static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr
     for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
     {
         arrow::ListArray & list_chunk = dynamic_cast<arrow::ListArray &>(*(arrow_column->chunk(chunk_i)));
-        std::shared_ptr<arrow::Array> chunk = list_chunk.values();
-        array_vector.emplace_back(std::move(chunk));
+
+        /*
+         * It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks.
+         * Therefore, simply appending arrow::ListArray::values() could lead to duplicated data to be appended.
+         * To properly handle this, arrow::ListArray::values() needs to be sliced based on the chunk offsets.
+         * arrow::ListArray::Flatten does that. More info on: https://lists.apache.org/thread/rrwfb9zo2dc58dhd9rblf20xd7wmy7jm and
+         * https://github.com/ClickHouse/ClickHouse/pull/43297
+         * */
+        auto flatten_result = list_chunk.Flatten();
+        if (flatten_result.ok())
+        {
+            array_vector.emplace_back(flatten_result.ValueOrDie());
+        }
+        else
+        {
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Failed to flatten chunk '{}' of column of type '{}' ", chunk_i, arrow_column->type()->id());
+        }
     }
     return std::make_shared<arrow::ChunkedArray>(array_vector);
 }
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
new file mode 100644
index 00000000000..878860aeb25
--- /dev/null
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -0,0 +1,978 @@
+#include <IO/ReadBufferFromString.h>
+
+#include <Formats/FormatFactory.h>
+#include <Formats/FormatSettings.h>
+#include <Formats/BSONTypes.h>
+#include <Formats/EscapingRuleUtils.h>
+#include <Processors/Formats/Impl/BSONEachRowRowInputFormat.h>
+#include <IO/ReadHelpers.h>
+
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnMap.h>
+
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/getLeastSupertype.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+    extern const int ILLEGAL_COLUMN;
+    extern const int TOO_LARGE_STRING_SIZE;
+    extern const int UNKNOWN_TYPE;
+}
+
+namespace
+{
+    enum
+    {
+        UNKNOWN_FIELD = size_t(-1),
+    };
+}
+
+BSONEachRowRowInputFormat::BSONEachRowRowInputFormat(
+    ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
+    : IRowInputFormat(header_, in_, std::move(params_))
+    , format_settings(format_settings_)
+    , name_map(header_.getNamesToIndexesMap())
+    , prev_positions(header_.columns())
+    , types(header_.getDataTypes())
+{
+}
+
+inline size_t BSONEachRowRowInputFormat::columnIndex(const StringRef & name, size_t key_index)
+{
+    /// Optimization by caching the order of fields (which is almost always the same)
+    /// and a quick check to match the next expected field, instead of searching the hash table.
+
+    if (prev_positions.size() > key_index && prev_positions[key_index] && name == prev_positions[key_index]->getKey())
+    {
+        return prev_positions[key_index]->getMapped();
+    }
+    else
+    {
+        auto * it = name_map.find(name);
+
+        if (it)
+        {
+            if (key_index < prev_positions.size())
+                prev_positions[key_index] = it;
+
+            return it->getMapped();
+        }
+        else
+            return UNKNOWN_FIELD;
+    }
+}
+
+/// Read the field name. Resulting StringRef is valid only before next read from buf.
+static StringRef readBSONKeyName(ReadBuffer & in, String & key_holder)
+{
+    // This is just an optimization: try to avoid copying the name into key_holder
+
+    if (!in.eof())
+    {
+        char * next_pos = find_first_symbols<0>(in.position(), in.buffer().end());
+
+        if (next_pos != in.buffer().end())
+        {
+            StringRef res(in.position(), next_pos - in.position());
+            in.position() = next_pos + 1;
+            return res;
+        }
+    }
+
+    key_holder.clear();
+    readNullTerminated(key_holder, in);
+    return key_holder;
+}
+
+static UInt8 readBSONType(ReadBuffer & in)
+{
+    UInt8 type;
+    readBinary(type, in);
+    return type;
+}
+
+static size_t readBSONSize(ReadBuffer & in)
+{
+    BSONSizeT size;
+    readBinary(size, in);
+    return size;
+}
+
+template <typename T>
+static void readAndInsertInteger(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    /// We allow to read any integer into any integer column.
+    /// For example we can read BSON Int32 into ClickHouse UInt8.
+
+    if (bson_type == BSONType::INT32)
+    {
+        UInt32 value;
+        readBinary(value, in);
+        assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
+    }
+    else if (bson_type == BSONType::INT64)
+    {
+        UInt64 value;
+        readBinary(value, in);
+        assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
+    }
+    else if (bson_type == BSONType::BOOL)
+    {
+        UInt8 value;
+        readBinary(value, in);
+        assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
+    }
+    else
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName());
+    }
+}
+
+template <typename T>
+static void readAndInsertDouble(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    if (bson_type != BSONType::DOUBLE)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName());
+
+    Float64 value;
+    readBinary(value, in);
+    assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
+}
+
+template <typename DecimalType, BSONType expected_bson_type>
+static void readAndInsertSmallDecimal(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    if (bson_type != expected_bson_type)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName());
+
+    DecimalType value;
+    readBinary(value, in);
+    assert_cast<ColumnDecimal<DecimalType> &>(column).insertValue(value);
+}
+
+static void readAndInsertDateTime64(ReadBuffer & in, IColumn & column, BSONType bson_type)
+{
+    if (bson_type != BSONType::INT64 && bson_type != BSONType::DATETIME)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into DateTime64 column", getBSONTypeName(bson_type));
+
+    DateTime64 value;
+    readBinary(value, in);
+    assert_cast<DataTypeDateTime64::ColumnType &>(column).insertValue(value);
+}
+
+template <typename ColumnType>
+static void readAndInsertBigInteger(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    if (bson_type != BSONType::BINARY)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName());
+
+    auto size = readBSONSize(in);
+    auto subtype = getBSONBinarySubtype(readBSONType(in));
+    if (subtype != BSONBinarySubtype::BINARY)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Binary subtype {} into column with type {}", getBSONBinarySubtypeName(subtype), data_type->getName());
+
+    using ValueType = typename ColumnType::ValueType;
+
+    if (size != sizeof(ValueType))
+        throw Exception(
+            ErrorCodes::INCORRECT_DATA,
+            "Cannot parse value of type {}, size of binary data is not equal to the binary size of expected value: {} != {}",
+            data_type->getName(),
+            size,
+            sizeof(ValueType));
+
+    ValueType value;
+    readBinary(value, in);
+    assert_cast<ColumnType &>(column).insertValue(value);
+}
+
+template <bool is_fixed_string>
+static void readAndInsertStringImpl(ReadBuffer & in, IColumn & column, size_t size)
+{
+    if constexpr (is_fixed_string)
+    {
+        auto & fixed_string_column = assert_cast<ColumnFixedString &>(column);
+        size_t n = fixed_string_column.getN();
+        if (size > n)
+            throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE);
+
+        auto & data = fixed_string_column.getChars();
+
+        size_t old_size = data.size();
+        data.resize_fill(old_size + n);
+
+        try
+        {
+            in.readStrict(reinterpret_cast<char *>(data.data() + old_size), size);
+        }
+        catch (...)
+        {
+            /// Restore column state in case of any exception.
+            data.resize_assume_reserved(old_size);
+            throw;
+        }
+    }
+    else
+    {
+        auto & column_string = assert_cast<ColumnString &>(column);
+        auto & data = column_string.getChars();
+        auto & offsets = column_string.getOffsets();
+
+        size_t old_chars_size = data.size();
+        size_t offset = old_chars_size + size + 1;
+        offsets.push_back(offset);
+
+        try
+        {
+            data.resize(offset);
+            in.readStrict(reinterpret_cast<char *>(&data[offset - size - 1]), size);
+            data.back() = 0;
+        }
+        catch (...)
+        {
+            /// Restore column state in case of any exception.
+            offsets.pop_back();
+            data.resize_assume_reserved(old_chars_size);
+            throw;
+        }
+    }
+}
+
+template <bool is_fixed_string>
+static void readAndInsertString(ReadBuffer & in, IColumn & column, BSONType bson_type)
+{
+    if (bson_type == BSONType::STRING || bson_type == BSONType::SYMBOL || bson_type == BSONType::JAVA_SCRIPT_CODE)
+    {
+        auto size = readBSONSize(in);
+        readAndInsertStringImpl<is_fixed_string>(in, column, size - 1);
+        assertChar(0, in);
+    }
+    else if (bson_type == BSONType::BINARY)
+    {
+        auto size = readBSONSize(in);
+        auto subtype = getBSONBinarySubtype(readBSONType(in));
+        if (subtype == BSONBinarySubtype::BINARY || subtype == BSONBinarySubtype::BINARY_OLD)
+            readAndInsertStringImpl<is_fixed_string>(in, column, size);
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Cannot insert BSON Binary subtype {} into String column",
+                getBSONBinarySubtypeName(subtype));
+    }
+    else if (bson_type == BSONType::OBJECT_ID)
+    {
+        readAndInsertStringImpl<is_fixed_string>(in, column, 12);
+    }
+    else
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into String column", getBSONTypeName(bson_type));
+    }
+}
+
+static void readAndInsertUUID(ReadBuffer & in, IColumn & column, BSONType bson_type)
+{
+    if (bson_type == BSONType::BINARY)
+    {
+        auto size = readBSONSize(in);
+        auto subtype = getBSONBinarySubtype(readBSONType(in));
+        if (subtype == BSONBinarySubtype::UUID || subtype == BSONBinarySubtype::UUID_OLD)
+        {
+            if (size != sizeof(UUID))
+                throw Exception(
+                    ErrorCodes::INCORRECT_DATA,
+                    "Cannot parse value of type UUID, size of binary data is not equal to the binary size of UUID value: {} != {}",
+                    size,
+                    sizeof(UUID));
+
+            UUID value;
+            readBinary(value, in);
+            assert_cast<ColumnUUID &>(column).insertValue(value);
+        }
+        else
+        {
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Cannot insert BSON Binary subtype {} into UUID column",
+                getBSONBinarySubtypeName(subtype));
+        }
+    }
+    else
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into UUID column", getBSONTypeName(bson_type));
+    }
+}
+
+void BSONEachRowRowInputFormat::readArray(IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    if (bson_type != BSONType::ARRAY)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into Array column", getBSONTypeName(bson_type));
+
+    const auto * data_type_array = assert_cast<const DataTypeArray *>(data_type.get());
+    const auto & nested_type = data_type_array->getNestedType();
+    auto & array_column = assert_cast<ColumnArray &>(column);
+    auto & nested_column = array_column.getData();
+
+    size_t document_start = in->count();
+    BSONSizeT document_size;
+    readBinary(document_size, *in);
+    while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size)
+    {
+        auto nested_bson_type = getBSONType(readBSONType(*in));
+        readBSONKeyName(*in, current_key_name);
+        readField(nested_column, nested_type, nested_bson_type);
+    }
+
+    assertChar(BSON_DOCUMENT_END, *in);
+    array_column.getOffsets().push_back(array_column.getData().size());
+}
+
+void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    if (bson_type != BSONType::ARRAY && bson_type != BSONType::DOCUMENT)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into Tuple column", getBSONTypeName(bson_type));
+
+    /// When BSON type is ARRAY, names in nested document are not useful
+    /// (most likely they are just sequential numbers).
+    bool use_key_names = bson_type == BSONType::DOCUMENT;
+
+    const auto * data_type_tuple = assert_cast<const DataTypeTuple *>(data_type.get());
+    auto & tuple_column = assert_cast<ColumnTuple &>(column);
+    size_t read_nested_columns = 0;
+
+    size_t document_start = in->count();
+    BSONSizeT document_size;
+    readBinary(document_size, *in);
+    while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size)
+    {
+        auto nested_bson_type = getBSONType(readBSONType(*in));
+        auto name = readBSONKeyName(*in, current_key_name);
+
+        size_t index = read_nested_columns;
+        if (use_key_names)
+        {
+            auto try_get_index = data_type_tuple->tryGetPositionByName(name.toString());
+            if (!try_get_index)
+                throw Exception(
+                    ErrorCodes::INCORRECT_DATA,
+                    "Cannot parse tuple column with type {} from BSON array/embedded document field: tuple doesn't have element with name \"{}\"",
+                    data_type->getName(),
+                    name);
+            index = *try_get_index;
+        }
+
+        if (index >= data_type_tuple->getElements().size())
+            throw Exception(
+                ErrorCodes::INCORRECT_DATA,
+                "Cannot parse tuple column with type {} from BSON array/embedded document field: the number of fields BSON document exceeds the number of fields in tuple",
+                data_type->getName());
+
+        readField(tuple_column.getColumn(index), data_type_tuple->getElement(index), nested_bson_type);
+        ++read_nested_columns;
+    }
+
+    assertChar(BSON_DOCUMENT_END, *in);
+
+    if (read_nested_columns != data_type_tuple->getElements().size())
+        throw Exception(
+            ErrorCodes::INCORRECT_DATA,
+            "Cannot parse tuple column with type {} from BSON array/embedded document field, the number of fields in tuple and BSON document doesn't match: {} != {}",
+            data_type->getName(),
+            data_type_tuple->getElements().size(),
+            read_nested_columns);
+}
+
+void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    if (bson_type != BSONType::DOCUMENT)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into Map column", getBSONTypeName(bson_type));
+
+    const auto * data_type_map = assert_cast<const DataTypeMap *>(data_type.get());
+    const auto & key_data_type = data_type_map->getKeyType();
+    if (!isStringOrFixedString(key_data_type))
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", key_data_type->getName());
+
+    const auto & value_data_type = data_type_map->getValueType();
+    auto & column_map = assert_cast<ColumnMap &>(column);
+    auto & key_column = column_map.getNestedData().getColumn(0);
+    auto & value_column = column_map.getNestedData().getColumn(1);
+    auto & offsets = column_map.getNestedColumn().getOffsets();
+
+    size_t document_start = in->count();
+    BSONSizeT document_size;
+    readBinary(document_size, *in);
+    while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size)
+    {
+        auto nested_bson_type = getBSONType(readBSONType(*in));
+        auto name = readBSONKeyName(*in, current_key_name);
+        key_column.insertData(name.data, name.size);
+        readField(value_column, value_data_type, nested_bson_type);
+    }
+
+    assertChar(BSON_DOCUMENT_END, *in);
+    offsets.push_back(key_column.size());
+}
+
+
+bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
+{
+    if (bson_type == BSONType::NULL_VALUE)
+    {
+        if (data_type->isNullable())
+        {
+            column.insertDefault();
+            return true;
+        }
+
+        if (!format_settings.null_as_default)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Null value into non-nullable column with type {}", getBSONTypeName(bson_type), data_type->getName());
+
+        column.insertDefault();
+        return false;
+    }
+
+    switch (data_type->getTypeId())
+    {
+        case TypeIndex::Nullable:
+        {
+            auto & nullable_column = assert_cast<ColumnNullable &>(column);
+            auto & nested_column = nullable_column.getNestedColumn();
+            const auto & nested_type = assert_cast<const DataTypeNullable *>(data_type.get())->getNestedType();
+            nullable_column.getNullMapColumn().insertValue(0);
+            return readField(nested_column, nested_type, bson_type);
+        }
+        case TypeIndex::LowCardinality:
+        {
+            auto & lc_column = assert_cast<ColumnLowCardinality &>(column);
+            auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty();
+            const auto & dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
+            auto res = readField(*tmp_column, dict_type, bson_type);
+            lc_column.insertFromFullColumn(*tmp_column, 0);
+            return res;
+        }
+        case TypeIndex::Int8:
+        {
+            readAndInsertInteger<Int8>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::UInt8:
+        {
+            readAndInsertInteger<UInt8>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Int16:
+        {
+            readAndInsertInteger<Int16>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Date: [[fallthrough]];
+        case TypeIndex::UInt16:
+        {
+            readAndInsertInteger<UInt16>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Date32: [[fallthrough]];
+        case TypeIndex::Int32:
+        {
+            readAndInsertInteger<Int32>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::DateTime: [[fallthrough]];
+        case TypeIndex::UInt32:
+        {
+            readAndInsertInteger<UInt32>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Int64:
+        {
+            readAndInsertInteger<Int64>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::UInt64:
+        {
+            readAndInsertInteger<UInt64>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Int128:
+        {
+            readAndInsertBigInteger<ColumnInt128>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::UInt128:
+        {
+            readAndInsertBigInteger<ColumnUInt128>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Int256:
+        {
+            readAndInsertBigInteger<ColumnInt256>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::UInt256:
+        {
+            readAndInsertBigInteger<ColumnUInt256>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Float32:
+        {
+            readAndInsertDouble<Float32>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Float64:
+        {
+            readAndInsertDouble<Float64>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Decimal32:
+        {
+            readAndInsertSmallDecimal<Decimal32, BSONType::INT32>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Decimal64:
+        {
+            readAndInsertSmallDecimal<Decimal64, BSONType::INT64>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Decimal128:
+        {
+            readAndInsertBigInteger<ColumnDecimal<Decimal128>>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Decimal256:
+        {
+            readAndInsertBigInteger<ColumnDecimal<Decimal256>>(*in, column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::DateTime64:
+        {
+            readAndInsertDateTime64(*in, column, bson_type);
+            return true;
+        }
+        case TypeIndex::FixedString:
+        {
+            readAndInsertString<true>(*in, column, bson_type);
+            return true;
+        }
+        case TypeIndex::String:
+        {
+            readAndInsertString<false>(*in, column, bson_type);
+            return true;
+        }
+        case TypeIndex::UUID:
+        {
+            readAndInsertUUID(*in, column, bson_type);
+            return true;
+        }
+        case TypeIndex::Array:
+        {
+            readArray(column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Tuple:
+        {
+            readTuple(column, data_type, bson_type);
+            return true;
+        }
+        case TypeIndex::Map:
+        {
+            readMap(column, data_type, bson_type);
+            return true;
+        }
+        default:
+        {
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for output in BSON format", data_type->getName());
+        }
+    }
+}
+
+static void skipBSONField(ReadBuffer & in, BSONType type)
+{
+    switch (type)
+    {
+        case BSONType::DOUBLE:
+        {
+            in.ignore(sizeof(Float64));
+            break;
+        }
+        case BSONType::BOOL:
+        {
+            in.ignore(sizeof(UInt8));
+            break;
+        }
+        case BSONType::INT64: [[fallthrough]];
+        case BSONType::DATETIME: [[fallthrough]];
+        case BSONType::TIMESTAMP:
+        {
+            in.ignore(sizeof(UInt64));
+            break;
+        }
+        case BSONType::INT32:
+        {
+            in.ignore(sizeof(Int32));
+            break;
+        }
+        case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]];
+        case BSONType::SYMBOL: [[fallthrough]];
+        case BSONType::STRING:
+        {
+            BSONSizeT size;
+            readBinary(size, in);
+            in.ignore(size);
+            break;
+        }
+        case BSONType::DOCUMENT: [[fallthrough]];
+        case BSONType::ARRAY:
+        {
+            BSONSizeT size;
+            readBinary(size, in);
+            in.ignore(size - sizeof(size));
+            break;
+        }
+        case BSONType::BINARY:
+        {
+            BSONSizeT size;
+            readBinary(size, in);
+            in.ignore(size + 1);
+            break;
+        }
+        case BSONType::MIN_KEY: [[fallthrough]];
+        case BSONType::MAX_KEY: [[fallthrough]];
+        case BSONType::UNDEFINED: [[fallthrough]];
+        case BSONType::NULL_VALUE:
+        {
+            break;
+        }
+        case BSONType::OBJECT_ID:
+        {
+            in.ignore(12);
+            break;
+        }
+        case BSONType::REGEXP:
+        {
+            skipNullTerminated(in);
+            skipNullTerminated(in);
+            break;
+        }
+        case BSONType::DB_POINTER:
+        {
+            BSONSizeT size;
+            readBinary(size, in);
+            in.ignore(size + 12);
+            break;
+        }
+        case BSONType::JAVA_SCRIPT_CODE_W_SCOPE:
+        {
+            BSONSizeT size;
+            readBinary(size, in);
+            in.ignore(size - sizeof(size));
+            break;
+        }
+        case BSONType::DECIMAL128:
+        {
+            in.ignore(16);
+            break;
+        }
+    }
+}
+
+void BSONEachRowRowInputFormat::skipUnknownField(BSONType type, const String & key_name)
+{
+    if (!format_settings.skip_unknown_fields)
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field found while parsing BSONEachRow format: {}", key_name);
+
+    skipBSONField(*in, type);
+}
+
+void BSONEachRowRowInputFormat::syncAfterError()
+{
+    /// Skip all remaining bytes in current document
+    size_t already_read_bytes = in->count() - current_document_start;
+    in->ignore(current_document_size - already_read_bytes);
+}
+
+bool BSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext)
+{
+    size_t num_columns = columns.size();
+
+    read_columns.assign(num_columns, false);
+    seen_columns.assign(num_columns, false);
+
+    if (in->eof())
+        return false;
+
+    size_t key_index = 0;
+
+    current_document_start = in->count();
+    readBinary(current_document_size, *in);
+    while (in->count() - current_document_start + sizeof(BSON_DOCUMENT_END) != current_document_size)
+    {
+        auto type = getBSONType(readBSONType(*in));
+        auto name = readBSONKeyName(*in, current_key_name);
+        auto index = columnIndex(name, key_index);
+
+        if (index == UNKNOWN_FIELD)
+        {
+            current_key_name.assign(name.data, name.size);
+            skipUnknownField(BSONType(type), current_key_name);
+        }
+        else
+        {
+            seen_columns[index] = true;
+            read_columns[index] = readField(*columns[index], types[index], BSONType(type));
+        }
+
+        ++key_index;
+    }
+
+    assertChar(BSON_DOCUMENT_END, *in);
+
+    const auto & header = getPort().getHeader();
+    /// Fill non-visited columns with the default values.
+    for (size_t i = 0; i < num_columns; ++i)
+        if (!seen_columns[i])
+            header.getByPosition(i).type->insertDefaultInto(*columns[i]);
+
+    if (format_settings.defaults_for_omitted_fields)
+        ext.read_columns = read_columns;
+    else
+        ext.read_columns.assign(read_columns.size(), true);
+
+    return true;
+}
+
+BSONEachRowSchemaReader::BSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & settings_)
+    : IRowWithNamesSchemaReader(in_, settings_)
+{
+}
+
+DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, bool allow_to_skip_unsupported_types, bool & skip)
+{
+    switch (type)
+    {
+        case BSONType::DOUBLE:
+        {
+            in.ignore(sizeof(Float64));
+            return makeNullable(std::make_shared<DataTypeFloat64>());
+        }
+        case BSONType::BOOL:
+        {
+            in.ignore(sizeof(UInt8));
+            return makeNullable(DataTypeFactory::instance().get("Bool"));
+        }
+        case BSONType::INT64:
+        {
+            in.ignore(sizeof(Int64));
+            return makeNullable(std::make_shared<DataTypeInt64>());
+        }
+        case BSONType::DATETIME:
+        {
+            in.ignore(sizeof(Int64));
+            return makeNullable(std::make_shared<DataTypeDateTime64>(6, "UTC"));
+        }
+        case BSONType::INT32:
+        {
+            in.ignore(sizeof(Int32));
+            return makeNullable(std::make_shared<DataTypeInt32>());
+        }
+        case BSONType::SYMBOL: [[fallthrough]];
+        case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]];
+        case BSONType::OBJECT_ID: [[fallthrough]];
+        case BSONType::STRING:
+        {
+            BSONSizeT size;
+            readBinary(size, in);
+            in.ignore(size);
+            return makeNullable(std::make_shared<DataTypeString>());
+        }
+        case BSONType::DOCUMENT:
+        {
+            auto nested_names_and_types = getDataTypesFromBSONDocument(false);
+            auto nested_types = nested_names_and_types.getTypes();
+            bool types_are_equal = true;
+            if (nested_types.empty() || !nested_types[0])
+                return nullptr;
+
+            for (size_t i = 1; i != nested_types.size(); ++i)
+            {
+                if (!nested_types[i])
+                    return nullptr;
+
+                types_are_equal &= nested_types[i]->equals(*nested_types[0]);
+            }
+
+            if (types_are_equal)
+                return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), nested_types[0]);
+
+            return std::make_shared<DataTypeTuple>(std::move(nested_types), nested_names_and_types.getNames());
+
+        }
+        case BSONType::ARRAY:
+        {
+            auto nested_types = getDataTypesFromBSONDocument(false).getTypes();
+            bool types_are_equal = true;
+            if (nested_types.empty() || !nested_types[0])
+                return nullptr;
+
+            for (size_t i = 1; i != nested_types.size(); ++i)
+            {
+                if (!nested_types[i])
+                    return nullptr;
+
+                types_are_equal &= nested_types[i]->equals(*nested_types[0]);
+            }
+
+            if (types_are_equal)
+                return std::make_shared<DataTypeArray>(nested_types[0]);
+
+            return std::make_shared<DataTypeTuple>(std::move(nested_types));
+        }
+        case BSONType::BINARY:
+        {
+            BSONSizeT size;
+            readBinary(size, in);
+            auto subtype = getBSONBinarySubtype(readBSONType(in));
+            in.ignore(size);
+            switch (subtype)
+            {
+                case BSONBinarySubtype::BINARY_OLD: [[fallthrough]];
+                case BSONBinarySubtype::BINARY:
+                    return makeNullable(std::make_shared<DataTypeString>());
+                case BSONBinarySubtype::UUID_OLD: [[fallthrough]];
+                case BSONBinarySubtype::UUID:
+                    return makeNullable(std::make_shared<DataTypeUUID>());
+                default:
+                    throw Exception(ErrorCodes::UNKNOWN_TYPE, "BSON binary subtype {} is not supported", getBSONBinarySubtypeName(subtype));
+            }
+        }
+        case BSONType::NULL_VALUE:
+        {
+            return nullptr;
+        }
+        default:
+        {
+            if (!allow_to_skip_unsupported_types)
+                throw Exception(ErrorCodes::UNKNOWN_TYPE, "BSON type {} is not supported", getBSONTypeName(type));
+
+            skip = true;
+            skipBSONField(in, type);
+            return nullptr;
+        }
+    }
+}
+
+NamesAndTypesList BSONEachRowSchemaReader::getDataTypesFromBSONDocument(bool allow_to_skip_unsupported_types)
+{
+    size_t document_start = in.count();
+    BSONSizeT document_size;
+    readBinary(document_size, in);
+    NamesAndTypesList names_and_types;
+    while (in.count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size)
+    {
+        auto bson_type = getBSONType(readBSONType(in));
+        String name;
+        readNullTerminated(name, in);
+        bool skip = false;
+        auto type = getDataTypeFromBSONField(bson_type, allow_to_skip_unsupported_types, skip);
+        if (!skip)
+            names_and_types.emplace_back(name, type);
+    }
+
+    assertChar(BSON_DOCUMENT_END, in);
+
+    return names_and_types;
+}
+
+NamesAndTypesList BSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof)
+{
+    if (in.eof())
+    {
+        eof = true;
+        return {};
+    }
+
+    return getDataTypesFromBSONDocument(format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference);
+}
+
+void BSONEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
+{
+    DataTypes types = {type, new_type};
+    /// For example for integer conversion Int32,
+    auto least_supertype = tryGetLeastSupertype(types);
+    if (least_supertype)
+        type = new_type = least_supertype;
+}
+
+static std::pair<bool, size_t>
+fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
+{
+    size_t number_of_rows = 0;
+
+    while (!in.eof() && memory.size() < min_bytes && number_of_rows < max_rows)
+    {
+        BSONSizeT document_size;
+        readBinary(document_size, in);
+        if (min_bytes != 0 && document_size > 10 * min_bytes)
+            throw ParsingException(
+                ErrorCodes::INCORRECT_DATA,
+                "Size of BSON document is extremely large. Expected not greater than {} bytes, but current is {} bytes per row. Increase "
+                "the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely BSON is malformed",
+                min_bytes, document_size);
+
+        size_t old_size = memory.size();
+        memory.resize(old_size + document_size);
+        memcpy(memory.data() + old_size, reinterpret_cast<char *>(&document_size), sizeof(document_size));
+        in.readStrict(memory.data() + old_size + sizeof(document_size), document_size - sizeof(document_size));
+        ++number_of_rows;
+    }
+
+    return {!in.eof(), number_of_rows};
+}
+
+void registerInputFormatBSONEachRow(FormatFactory & factory)
+{
+    factory.registerInputFormat(
+        "BSONEachRow",
+        [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings)
+        { return std::make_shared<BSONEachRowRowInputFormat>(buf, sample, std::move(params), settings); });
+}
+
+void registerFileSegmentationEngineBSONEachRow(FormatFactory & factory)
+{
+    factory.registerFileSegmentationEngine("BSONEachRow", &fileSegmentationEngineBSONEachRow);
+}
+
+void registerBSONEachRowSchemaReader(FormatFactory & factory)
+{
+    factory.registerSchemaReader("BSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings)
+    {
+        return std::make_unique<BSONEachRowSchemaReader>(buf, settings);
+    });
+    factory.registerAdditionalInfoForSchemaCacheGetter("BSONEachRow", [](const FormatSettings & settings)
+    {
+         String result = getAdditionalFormatInfoForAllRowBasedFormats(settings);
+         return result + fmt::format(", skip_fields_with_unsupported_types_in_schema_inference={}",
+                                     settings.bson.skip_fields_with_unsupported_types_in_schema_inference);
+    });
+}
+
+}
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h
new file mode 100644
index 00000000000..d0830ca2781
--- /dev/null
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Formats/FormatSettings.h>
+#include <Formats/BSONTypes.h>
+#include <Processors/Formats/IRowInputFormat.h>
+#include <Processors/Formats/ISchemaReader.h>
+#include <Common/HashTable/HashMap.h>
+
+
+namespace DB
+{
+
+/*
+ * Class for parsing data in BSON format.
+ * Each row is parsed as a separate BSON document.
+ * Each column is parsed as a single field with column name as a key.
+ * It uses the following correspondence between BSON types and ClickHouse types:
+ *
+ * BSON Type                                   | ClickHouse Type
+ * \x01 double                                 | Float32/Float64
+ * \x02 string                                 | String/FixedString
+ * \x03 document                               | Map/Named Tuple
+ * \x04 array                                  | Array/Tuple
+ * \x05 binary, \x00 binary subtype            | String/FixedString
+ * \x05 binary, \x02 old binary subtype        | String/FixedString
+ * \x05 binary, \x03 old uuid subtype          | UUID
+ * \x05 binary, \x04 uuid subtype              | UUID
+ * \x07 ObjectId                               | String
+ * \x08 boolean                                | Bool
+ * \x09 datetime                               | DateTime64
+ * \x0A null value                             | NULL
+ * \x0D JavaScript code                        | String
+ * \x0E symbol                                 | String/FixedString
+ * \x10 int32                                  | Int32/Decimal32
+ * \x12 int64                                  | Int64/Decimal64/DateTime64
+ * \x11 uint64                                 | UInt64
+ *
+ * Other BSON types are not supported.
+ * Also, we perform conversion between different integer types
+ * (for example, you can insert BSON int32 value into ClickHouse UInt8)
+ * Big integers and decimals Int128/UInt128/Int256/UInt256/Decimal128/Decimal256
+ * can be parsed from BSON Binary value with \x00 binary subtype. In this case
+ * we validate that the size of binary data equals the size of expected value.
+ *
+ * Note: this format will not work on Big-Endian platforms.
+ */
+
+class ReadBuffer;
+class BSONEachRowRowInputFormat final : public IRowInputFormat
+{
+public:
+    BSONEachRowRowInputFormat(
+        ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_);
+
+    String getName() const override { return "BSONEachRowRowInputFormat"; }
+    void resetParser() override { }
+
+private:
+    void readPrefix() override { }
+    void readSuffix() override { }
+
+    bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
+    bool allowSyncAfterError() const override { return true; }
+    void syncAfterError() override;
+
+    size_t columnIndex(const StringRef & name, size_t key_index);
+
+    using ColumnReader = std::function<void(StringRef name, BSONType type)>;
+
+    bool readField(IColumn & column, const DataTypePtr & data_type, BSONType bson_type);
+    void skipUnknownField(BSONType type, const String & key_name);
+
+    void readTuple(IColumn & column, const DataTypePtr & data_type, BSONType bson_type);
+    void readArray(IColumn & column, const DataTypePtr & data_type, BSONType bson_type);
+    void readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type);
+
+    const FormatSettings format_settings;
+
+    /// Buffer for the read from the stream field name. Used when you have to copy it.
+    String current_key_name;
+
+    /// Set of columns for which the values were read. The rest will be filled with default values.
+    std::vector<UInt8> read_columns;
+    /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
+    std::vector<UInt8> seen_columns;
+    /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true
+    /// for row like {..., "non-nullable column name" : null, ...}
+
+    /// Hash table match `field name -> position in the block`.
+    Block::NameMap name_map;
+
+    /// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
+    std::vector<Block::NameMap::LookupResult> prev_positions;
+
+    DataTypes types;
+
+    size_t current_document_start;
+    BSONSizeT current_document_size;
+};
+
+class BSONEachRowSchemaReader : public IRowWithNamesSchemaReader
+{
+public:
+    BSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & settings_);
+
+private:
+    NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override;
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+
+    NamesAndTypesList getDataTypesFromBSONDocument(bool skip_unsupported_types);
+    DataTypePtr getDataTypeFromBSONField(BSONType type, bool skip_unsupported_types, bool & skip);
+};
+
+}
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
new file mode 100644
index 00000000000..c296114a6e7
--- /dev/null
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
@@ -0,0 +1,527 @@
+#include <Processors/Formats/Impl/BSONEachRowRowOutputFormat.h>
+
+#include <Formats/FormatFactory.h>
+#include <Formats/BSONTypes.h>
+
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnMap.h>
+#include <Columns/ColumnDecimal.h>
+
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+
+#include <IO/WriteHelpers.h>
+#include <IO/WriteBufferValidUTF8.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+    extern const int ILLEGAL_COLUMN;
+    extern const int LOGICAL_ERROR;
+}
+
+/// In BSON all names should be valid UTF8 sequences
+static String toValidUTF8String(const String & name)
+{
+    WriteBufferFromOwnString buf;
+    WriteBufferValidUTF8 validating_buf(buf);
+    writeString(name, validating_buf);
+    validating_buf.finalize();
+    return buf.str();
+}
+
+BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat(
+    WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_)
+    : IRowOutputFormat(header_, out_, params_), settings(settings_)
+{
+    const auto & sample = getPort(PortKind::Main).getHeader();
+    fields.reserve(sample.columns());
+    for (const auto & field : sample.getNamesAndTypes())
+        fields.emplace_back(toValidUTF8String(field.name), field.type);
+}
+
+static void writeBSONSize(size_t size, WriteBuffer & buf)
+{
+    if (size > MAX_BSON_SIZE)
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Too large document/value size: {}. Maximum allowed size: {}.", size, MAX_BSON_SIZE);
+
+    writePODBinary<BSONSizeT>(BSONSizeT(size), buf);
+}
+
+template <typename Type>
+static void writeBSONType(Type type, WriteBuffer & buf)
+{
+    UInt8 value = UInt8(type);
+    writeBinary(value, buf);
+}
+
+static void writeBSONTypeAndKeyName(BSONType type, const String & name, WriteBuffer & buf)
+{
+    writeBSONType(type, buf);
+    writeString(name, buf);
+    writeChar(0x00, buf);
+}
+
+template <typename ColumnType, typename ValueType>
+static void writeBSONNumber(BSONType type, const IColumn & column, size_t row_num, const String & name, WriteBuffer & buf)
+{
+    writeBSONTypeAndKeyName(type, name, buf);
+    writePODBinary<ValueType>(assert_cast<const ColumnType &>(column).getElement(row_num), buf);
+}
+
+template <typename StringColumnType>
+static void writeBSONString(const IColumn & column, size_t row_num, const String & name, WriteBuffer & buf, bool as_bson_string)
+{
+    const auto & string_column = assert_cast<const StringColumnType &>(column);
+    StringRef data = string_column.getDataAt(row_num);
+    if (as_bson_string)
+    {
+        writeBSONTypeAndKeyName(BSONType::STRING, name, buf);
+        writeBSONSize(data.size + 1, buf);
+        writeString(data, buf);
+        writeChar(0x00, buf);
+    }
+    else
+    {
+        writeBSONTypeAndKeyName(BSONType::BINARY, name, buf);
+        writeBSONSize(data.size, buf);
+        writeBSONType(BSONBinarySubtype::BINARY, buf);
+        writeString(data, buf);
+    }
+}
+
+template <class ColumnType>
+static void writeBSONBigInteger(const IColumn & column, size_t row_num, const String & name, WriteBuffer & buf)
+{
+    writeBSONTypeAndKeyName(BSONType::BINARY, name, buf);
+    writeBSONSize(sizeof(typename ColumnType::ValueType), buf);
+    writeBSONType(BSONBinarySubtype::BINARY, buf);
+    auto data = assert_cast<const ColumnType &>(column).getDataAt(row_num);
+    buf.write(data.data, data.size);
+}
+
+size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+{
+    size_t size = 1; // Field type
+    size += name.size() + 1; // Field name and \0
+    switch (column.getDataType())
+    {
+        case TypeIndex::Int8: [[fallthrough]];
+        case TypeIndex::Int16: [[fallthrough]];
+        case TypeIndex::UInt16: [[fallthrough]];
+        case TypeIndex::Date: [[fallthrough]];
+        case TypeIndex::Date32: [[fallthrough]];
+        case TypeIndex::Decimal32: [[fallthrough]];
+        case TypeIndex::Int32:
+        {
+            return size + sizeof(Int32);
+        }
+        case TypeIndex::UInt8:
+        {
+            if (isBool(data_type))
+                return size + 1;
+
+            return size + sizeof(Int32);
+        }
+        case TypeIndex::Float32: [[fallthrough]];
+        case TypeIndex::Float64: [[fallthrough]];
+        case TypeIndex::UInt32: [[fallthrough]];
+        case TypeIndex::Int64: [[fallthrough]];
+        case TypeIndex::UInt64: [[fallthrough]];
+        case TypeIndex::DateTime: [[fallthrough]];
+        case TypeIndex::Decimal64: [[fallthrough]];
+        case TypeIndex::DateTime64:
+        {
+            return size + sizeof(UInt64);
+        }
+        case TypeIndex::Int128: [[fallthrough]];
+        case TypeIndex::UInt128: [[fallthrough]];
+        case TypeIndex::Decimal128:
+        {
+            return size + sizeof(BSONSizeT) + 1 + sizeof(UInt128); // Size of a binary + binary subtype + 16 bytes of value
+        }
+        case TypeIndex::Int256: [[fallthrough]];
+        case TypeIndex::UInt256: [[fallthrough]];
+        case TypeIndex::Decimal256:
+        {
+            return size + sizeof(BSONSizeT) + 1 + sizeof(UInt256); // Size of a binary + binary subtype + 32 bytes of value
+        }
+        case TypeIndex::String:
+        {
+            const auto & string_column = assert_cast<const ColumnString &>(column);
+            return size + sizeof(BSONSizeT) + string_column.getDataAt(row_num).size + 1; // Size of data + data + \0 or BSON subtype (in case of BSON binary)
+        }
+        case TypeIndex::FixedString:
+        {
+            const auto & string_column = assert_cast<const ColumnFixedString &>(column);
+            return size + sizeof(BSONSizeT) + string_column.getN() + 1; // Size of data + data + \0 or BSON subtype (in case of BSON binary)
+        }
+        case TypeIndex::UUID:
+        {
+            return size + sizeof(BSONSizeT) + 1 + sizeof(UUID); // Size of data + BSON binary subtype + 16 bytes of value
+        }
+        case TypeIndex::LowCardinality:
+        {
+            const auto & lc_column = assert_cast<const ColumnLowCardinality &>(column);
+            auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
+            auto dict_column = lc_column.getDictionary().getNestedColumn();
+            size_t index = lc_column.getIndexAt(row_num);
+            return countBSONFieldSize(*dict_column, dict_type, index, name);
+        }
+        case TypeIndex::Nullable:
+        {
+            auto nested_type = removeNullable(data_type);
+            const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
+            if (column_nullable.isNullAt(row_num))
+                return size; /// Null has no value, just type
+            return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name);
+        }
+        case TypeIndex::Array:
+        {
+            size += sizeof(BSONSizeT); // Size of a document
+
+            const auto & nested_type = assert_cast<const DataTypeArray *>(data_type.get())->getNestedType();
+            const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
+            const IColumn & nested_column = column_array.getData();
+            const ColumnArray::Offsets & offsets = column_array.getOffsets();
+            size_t offset = offsets[row_num - 1];
+            size_t array_size = offsets[row_num] - offset;
+
+            for (size_t i = 0; i < array_size; ++i)
+                size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
+
+            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+        }
+        case TypeIndex::Tuple:
+        {
+            size += sizeof(BSONSizeT); // Size of a document
+
+            const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
+            const auto & nested_types = tuple_type->getElements();
+            bool have_explicit_names = tuple_type->haveExplicitNames();
+            const auto & nested_names = tuple_type->getElementNames();
+            const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
+            const auto & nested_columns = tuple_column.getColumns();
+
+            for (size_t i = 0; i < nested_columns.size(); ++i)
+            {
+                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
+                size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
+            }
+
+            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+        }
+        case TypeIndex::Map:
+        {
+            size += sizeof(BSONSizeT); // Size of a document
+
+            const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
+            if (!isStringOrFixedString(map_type.getKeyType()))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName());
+            const auto & value_type = map_type.getValueType();
+
+            const auto & map_column = assert_cast<const ColumnMap &>(column);
+            const auto & nested_column = map_column.getNestedColumn();
+            const auto & key_value_columns = map_column.getNestedData().getColumns();
+            const auto & key_column = key_value_columns[0];
+            const auto & value_column = key_value_columns[1];
+            const auto & offsets = nested_column.getOffsets();
+            size_t offset = offsets[row_num - 1];
+            size_t map_size = offsets[row_num] - offset;
+
+            for (size_t i = 0; i < map_size; ++i)
+            {
+                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
+                size += countBSONFieldSize(*value_column, value_type, offset + i, key);
+            }
+
+            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+        }
+        default:
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported in BSON output format", data_type->getName());
+    }
+}
+
+void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+{
+    switch (column.getDataType())
+    {
+        case TypeIndex::Float32:
+        {
+            writeBSONNumber<ColumnFloat32, double>(BSONType::DOUBLE, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Float64:
+        {
+            writeBSONNumber<ColumnFloat64, double>(BSONType::DOUBLE, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Int8:
+        {
+            writeBSONNumber<ColumnInt8, Int32>(BSONType::INT32, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::UInt8:
+        {
+            if (isBool(data_type))
+                writeBSONNumber<ColumnUInt8, bool>(BSONType::BOOL, column, row_num, name, out);
+            else
+                writeBSONNumber<ColumnUInt8, Int32>(BSONType::INT32, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Int16:
+        {
+            writeBSONNumber<ColumnInt16, Int32>(BSONType::INT32, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Date: [[fallthrough]];
+        case TypeIndex::UInt16:
+        {
+            writeBSONNumber<ColumnUInt16, Int32>(BSONType::INT32, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Date32: [[fallthrough]];
+        case TypeIndex::Int32:
+        {
+            writeBSONNumber<ColumnInt32, Int32>(BSONType::INT32, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::DateTime: [[fallthrough]];
+        case TypeIndex::UInt32:
+        {
+            writeBSONNumber<ColumnUInt32, Int64>(BSONType::INT64, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Int64:
+        {
+            writeBSONNumber<ColumnInt64, Int64>(BSONType::INT64, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::UInt64:
+        {
+            writeBSONNumber<ColumnUInt64, UInt64>(BSONType::INT64, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Int128:
+        {
+            writeBSONBigInteger<ColumnInt128>(column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::UInt128:
+        {
+            writeBSONBigInteger<ColumnUInt128>(column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Int256:
+        {
+            writeBSONBigInteger<ColumnInt256>(column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::UInt256:
+        {
+            writeBSONBigInteger<ColumnUInt256>(column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Decimal32:
+        {
+            writeBSONNumber<ColumnDecimal<Decimal32>, Decimal32>(BSONType::INT32, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::DateTime64:
+        {
+            writeBSONNumber<ColumnDecimal<DateTime64>, Decimal64>(BSONType::DATETIME, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Decimal64:
+        {
+            writeBSONNumber<ColumnDecimal<Decimal64>, Decimal64>(BSONType::INT64, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Decimal128:
+        {
+            writeBSONBigInteger<ColumnDecimal<Decimal128>>(column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::Decimal256:
+        {
+            writeBSONBigInteger<ColumnDecimal<Decimal256>>(column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::String:
+        {
+            writeBSONString<ColumnString>(column, row_num, name, out, settings.bson.output_string_as_string);
+            break;
+        }
+        case TypeIndex::FixedString:
+        {
+            writeBSONString<ColumnFixedString>(column, row_num, name, out, settings.bson.output_string_as_string);
+            break;
+        }
+        case TypeIndex::UUID:
+        {
+            writeBSONTypeAndKeyName(BSONType::BINARY, name, out);
+            writeBSONSize(sizeof(UUID), out);
+            writeBSONType(BSONBinarySubtype::UUID, out);
+            writeBinary(assert_cast<const ColumnUUID &>(column).getElement(row_num), out);
+            break;
+        }
+        case TypeIndex::LowCardinality:
+        {
+            const auto & lc_column = assert_cast<const ColumnLowCardinality &>(column);
+            auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
+            auto dict_column = lc_column.getDictionary().getNestedColumn();
+            size_t index = lc_column.getIndexAt(row_num);
+            serializeField(*dict_column, dict_type, index, name);
+            break;
+        }
+        case TypeIndex::Nullable:
+        {
+            auto nested_type = removeNullable(data_type);
+            const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
+            if (!column_nullable.isNullAt(row_num))
+                serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name);
+            else
+                writeBSONTypeAndKeyName(BSONType::NULL_VALUE, name, out);
+            break;
+        }
+        case TypeIndex::Array:
+        {
+            const auto & nested_type = assert_cast<const DataTypeArray *>(data_type.get())->getNestedType();
+            const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
+            const IColumn & nested_column = column_array.getData();
+            const ColumnArray::Offsets & offsets = column_array.getOffsets();
+            size_t offset = offsets[row_num - 1];
+            size_t array_size = offsets[row_num] - offset;
+
+            writeBSONTypeAndKeyName(BSONType::ARRAY, name, out);
+
+            size_t document_size = sizeof(BSONSizeT);
+            for (size_t i = 0; i < array_size; ++i)
+                document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+
+            writeBSONSize(document_size, out);
+
+            for (size_t i = 0; i < array_size; ++i)
+                serializeField(nested_column, nested_type, offset + i, std::to_string(i));
+
+            writeChar(BSON_DOCUMENT_END, out);
+            break;
+        }
+        case TypeIndex::Tuple:
+        {
+            const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
+            const auto & nested_types = tuple_type->getElements();
+            bool have_explicit_names = tuple_type->haveExplicitNames();
+            const auto & nested_names = tuple_type->getElementNames();
+            const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
+            const auto & nested_columns = tuple_column.getColumns();
+
+            BSONType bson_type = have_explicit_names ? BSONType::DOCUMENT : BSONType::ARRAY;
+            writeBSONTypeAndKeyName(bson_type, name, out);
+
+            size_t document_size = sizeof(BSONSizeT);
+            for (size_t i = 0; i < nested_columns.size(); ++i)
+            {
+                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
+                document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
+            }
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+
+            writeBSONSize(document_size, out);
+
+            for (size_t i = 0; i < nested_columns.size(); ++i)
+                serializeField(*nested_columns[i], nested_types[i], row_num, toValidUTF8String(nested_names[i]));
+
+            writeChar(BSON_DOCUMENT_END, out);
+            break;
+        }
+        case TypeIndex::Map:
+        {
+            const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
+            if (!isStringOrFixedString(map_type.getKeyType()))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName());
+            const auto & value_type = map_type.getValueType();
+
+            const auto & map_column = assert_cast<const ColumnMap &>(column);
+            const auto & nested_column = map_column.getNestedColumn();
+            const auto & key_value_columns = map_column.getNestedData().getColumns();
+            const auto & key_column = key_value_columns[0];
+            const auto & value_column = key_value_columns[1];
+            const auto & offsets = nested_column.getOffsets();
+            size_t offset = offsets[row_num - 1];
+            size_t map_size = offsets[row_num] - offset;
+
+            writeBSONTypeAndKeyName(BSONType::DOCUMENT, name, out);
+
+            size_t document_size = sizeof(BSONSizeT);
+            for (size_t i = 0; i < map_size; ++i)
+            {
+                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
+                document_size += countBSONFieldSize(*value_column, value_type, offset + i, key);
+            }
+            document_size += sizeof(BSON_DOCUMENT_END);
+
+            writeBSONSize(document_size, out);
+
+            for (size_t i = 0; i < map_size; ++i)
+            {
+                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
+                serializeField(*value_column, value_type, offset + i, key);
+            }
+
+            writeChar(BSON_DOCUMENT_END, out);
+            break;
+        }
+        default:
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported in BSON output format", data_type->getName());
+    }
+}
+
+void BSONEachRowRowOutputFormat::write(const Columns & columns, size_t row_num)
+{
+    /// We should calculate and write document size before its content
+    size_t document_size = sizeof(BSONSizeT);
+    for (size_t i = 0; i != columns.size(); ++i)
+        document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name);
+    document_size += sizeof(BSON_DOCUMENT_END);
+
+    size_t document_start = out.count();
+    writeBSONSize(document_size, out);
+
+    for (size_t i = 0; i != columns.size(); ++i)
+        serializeField(*columns[i], fields[i].type, row_num, fields[i].name);
+
+    writeChar(BSON_DOCUMENT_END, out);
+
+    size_t actual_document_size = out.count() - document_start;
+    if (actual_document_size != document_size)
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "The actual size of the BSON document does not match the estimated size: {} != {}",
+            actual_document_size,
+            document_size);
+}
+
+void registerOutputFormatBSONEachRow(FormatFactory & factory)
+{
+    factory.registerOutputFormat(
+        "BSONEachRow",
+        [](WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, const FormatSettings & _format_settings)
+        { return std::make_shared<BSONEachRowRowOutputFormat>(buf, sample, params, _format_settings); });
+    factory.markOutputFormatSupportsParallelFormatting("BSONEachRow");
+}
+
+}
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
new file mode 100644
index 00000000000..f2252cabebe
--- /dev/null
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Formats/FormatSettings.h>
+#include <IO/WriteBuffer.h>
+#include <Processors/Formats/IRowOutputFormat.h>
+#include <Formats/BSONTypes.h>
+
+namespace DB
+{
+
+/*
+ * Class for formatting data in BSON format.
+ * Each row is formatted as a separate BSON document.
+ * Each column is formatted as a single field with column name as a key.
+ * It uses the following correspondence between ClickHouse types and BSON types:
+ *
+ * ClickHouse type         | BSON Type
+ * Bool                    | \x08 boolean
+ * Int8/UInt8              | \x10 int32
+ * Int16UInt16             | \x10 int32
+ * Int32                   | \x10 int32
+ * UInt32                  | \x12 int64
+ * Int64                   | \x12 int64
+ * UInt64                  | \x11 uint64
+ * Float32/Float64         | \x01 double
+ * Date/Date32             | \x10 int32
+ * DateTime                | \x12 int64
+ * DateTime64              | \x09 datetime
+ * Decimal32               | \x10 int32
+ * Decimal64               | \x12 int64
+ * Decimal128              | \x05 binary, \x00 binary subtype, size = 16
+ * Decimal256              | \x05 binary, \x00 binary subtype, size = 32
+ * Int128/UInt128          | \x05 binary, \x00 binary subtype, size = 16
+ * Int256/UInt256          | \x05 binary, \x00 binary subtype, size = 32
+ * String/FixedString      | \x05 binary, \x00 binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled
+ * UUID                    | \x05 binary, \x04 uuid subtype, size = 16
+ * Array                   | \x04 array
+ * Tuple                   | \x04 array
+ * Named Tuple             | \x03 document
+ * Map (with String keys)  | \x03 document
+ *
+ * Note: on Big-Endian platforms this format will not work properly.
+ */
+
+class BSONEachRowRowOutputFormat final : public IRowOutputFormat
+{
+public:
+    BSONEachRowRowOutputFormat(
+        WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_);
+
+    String getName() const override { return "BSONEachRowRowOutputFormat"; }
+
+private:
+    void write(const Columns & columns, size_t row_num) override;
+    void writeField(const IColumn &, const ISerialization &, size_t) override { }
+
+    void serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+
+    /// Count field size in bytes that we will get after serialization in BSON format.
+    /// It's needed to calculate document size before actual serialization,
+    /// because in BSON format we should write the size of the document before its content.
+    size_t countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+
+    NamesAndTypes fields;
+    FormatSettings settings;
+};
+
+}
diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
index 91bebd0daa4..047a55d3f90 100644
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
@@ -13,7 +13,7 @@ namespace ErrorCodes
     extern const int CANNOT_SKIP_UNKNOWN_FIELD;
 }
 
-BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
+BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
     : RowInputFormatWithNamesAndTypes(
         header,
         in_,
diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h
index ff7cc013cee..7e600c5b3dd 100644
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h
@@ -20,7 +20,7 @@ class ReadBuffer;
 class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes
 {
 public:
-    BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
+    BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
 
     String getName() const override { return "BinaryRowInputFormat"; }
 
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
index 0ada15637ce..cfd68079bba 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
@@ -72,10 +72,10 @@ JSONColumnsBlockInputFormatBase::JSONColumnsBlockInputFormatBase(
     : IInputFormat(header_, in_)
     , format_settings(format_settings_)
     , fields(header_.getNamesAndTypes())
-    , name_to_index(header_.getNamesToIndexesMap())
     , serializations(header_.getSerializations())
     , reader(std::move(reader_))
 {
+    name_to_index = getPort().getHeader().getNamesToIndexesMap();
 }
 
 size_t JSONColumnsBlockInputFormatBase::readColumn(
@@ -125,7 +125,7 @@ Chunk JSONColumnsBlockInputFormatBase::generate()
         {
             /// Check if this name appears in header. If no, skip this column or throw
             /// an exception according to setting input_format_skip_unknown_fields
-            if (!name_to_index.contains(*column_name))
+            if (!name_to_index.has(*column_name))
             {
                 if (!format_settings.skip_unknown_fields)
                     throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column found in input data: {}", *column_name);
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
index 308c8a59b92..a8311123afc 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
@@ -60,7 +60,7 @@ protected:
     const FormatSettings format_settings;
     const NamesAndTypes fields;
     /// Maps column names and their positions in header.
-    std::unordered_map<String, size_t> name_to_index;
+    Block::NameMap name_to_index;
     Serializations serializations;
     std::unique_ptr<JSONColumnsReaderBase> reader;
     BlockMissingValues block_missing_values;
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index db5a027844b..8a5ef33b73d 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -37,25 +37,25 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat(
     Params params_,
     const FormatSettings & format_settings_,
     bool yield_strings_)
-    : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()), yield_strings(yield_strings_)
+    : IRowInputFormat(header_, in_, std::move(params_))
+    , format_settings(format_settings_)
+    , prev_positions(header_.columns())
+    , yield_strings(yield_strings_)
 {
-    size_t num_columns = getPort().getHeader().columns();
-    for (size_t i = 0; i < num_columns; ++i)
+    name_map = getPort().getHeader().getNamesToIndexesMap();
+    if (format_settings_.import_nested_json)
     {
-        const String & column_name = columnName(i);
-        name_map[column_name] = i;        /// NOTE You could place names more cache-locally.
-        if (format_settings_.import_nested_json)
+        for (size_t i = 0; i != header_.columns(); ++i)
         {
-            const auto split = Nested::splitName(column_name);
+            const StringRef column_name = header_.getByPosition(i).name;
+            const auto split = Nested::splitName(column_name.toView());
             if (!split.second.empty())
             {
-                const StringRef table_name(column_name.data(), split.first.size());
+                const StringRef table_name(column_name.data, split.first.size());
                 name_map[table_name] = NESTED_FIELD;
             }
         }
     }
-
-    prev_positions.resize(num_columns);
 }
 
 const String & JSONEachRowRowInputFormat::columnName(size_t i) const
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index 80fdda687e2..4e62754bc3d 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -32,6 +32,7 @@
 #include <Columns/ColumnLowCardinality.h>
 
 #include <Formats/MsgPackExtensionTypes.h>
+#include <Formats/EscapingRuleUtils.h>
 
 namespace DB
 {
@@ -552,12 +553,9 @@ void registerMsgPackSchemaReader(FormatFactory & factory)
     });
     factory.registerAdditionalInfoForSchemaCacheGetter("MsgPack", [](const FormatSettings & settings)
     {
-            return fmt::format(
-                "number_of_columns={}, schema_inference_hints={}, max_rows_to_read_for_schema_inference={}",
-                settings.msgpack.number_of_columns,
-                settings.schema_inference_hints,
-                settings.max_rows_to_read_for_schema_inference);
-        });
+            String result = getAdditionalFormatInfoForAllRowBasedFormats(settings);
+            return result + fmt::format(", number_of_columns={}", settings.msgpack.number_of_columns);
+    });
 }
 
 }
diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
index beca7ad2552..faa74e234b9 100644
--- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
@@ -35,9 +35,9 @@ MySQLDumpRowInputFormat::MySQLDumpRowInputFormat(ReadBuffer & in_, const Block &
     : IRowInputFormat(header_, in_, params_)
     , table_name(format_settings_.mysql_dump.table_name)
     , types(header_.getDataTypes())
-    , column_indexes_by_names(header_.getNamesToIndexesMap())
     , format_settings(format_settings_)
 {
+    column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap();
 }
 
 
diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
index 6be20550e49..c28355054d7 100644
--- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
@@ -22,7 +22,7 @@ private:
 
     String table_name;
     DataTypes types;
-    std::unordered_map<String, size_t> column_indexes_by_names;
+    Block::NameMap column_indexes_by_names;
     const FormatSettings format_settings;
 };
 
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index 0804b188c07..e0e8ea47a7b 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -30,8 +30,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
     , with_names(with_names_)
     , with_types(with_types_)
     , format_reader(std::move(format_reader_))
-    , column_indexes_by_names(header_.getNamesToIndexesMap())
 {
+    column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap();
 }
 
 void RowInputFormatWithNamesAndTypes::readPrefix()
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index 9d0734f4567..e7dda957b04 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -59,7 +59,7 @@ private:
     std::unique_ptr<FormatWithNamesAndTypesReader> format_reader;
 
 protected:
-    std::unordered_map<String, size_t> column_indexes_by_names;
+    Block::NameMap column_indexes_by_names;
 };
 
 /// Base class for parsing data in input formats with -WithNames and -WithNamesAndTypes suffixes.
diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
index 19eeec979c7..aef1e9c70da 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
@@ -55,11 +55,12 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num
     if (!info)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm");
 
-    const auto * arenas_info = typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get());
-    if (!arenas_info)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have ChunkInfoWithAllocatedBytes in FinishAggregatingInOrderAlgorithm");
+    Int64 allocated_bytes = 0;
+    /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator.
+    if (const auto * arenas_info = typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
+        allocated_bytes = arenas_info->allocated_bytes;
 
-    states[source_num] = State{input.chunk, description, arenas_info->allocated_bytes};
+    states[source_num] = State{input.chunk, description, allocated_bytes};
 }
 
 IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge()
@@ -130,6 +131,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge()
 
     auto info = std::make_shared<ChunksToMerge>();
     info->chunks = std::make_unique<Chunks>(std::move(chunks));
+    info->chunk_num = chunk_num++;
 
     Chunk chunk;
     chunk.setChunkInfo(std::move(info));
diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
index ff31886f438..b1a74a09459 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
@@ -88,6 +88,7 @@ private:
     std::vector<size_t> inputs_to_update;
 
     std::vector<Chunk> chunks;
+    UInt64 chunk_num = 0;
     size_t accumulated_rows = 0;
     size_t accumulated_bytes = 0;
 };
diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp
index e89392d2e1f..8ef547ee8ab 100644
--- a/src/Processors/QueryPlan/AggregatingStep.cpp
+++ b/src/Processors/QueryPlan/AggregatingStep.cpp
@@ -1,34 +1,45 @@
 #include <cassert>
 #include <cstddef>
 #include <memory>
-#include <Processors/QueryPlan/AggregatingStep.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
-#include <Processors/Transforms/CopyTransform.h>
-#include <Processors/Transforms/AggregatingTransform.h>
-#include <Processors/Transforms/AggregatingInOrderTransform.h>
-#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
-#include <Processors/Transforms/ExpressionTransform.h>
+#include <Columns/ColumnFixedString.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Interpreters/Aggregator.h>
+#include <Interpreters/Context.h>
 #include <Processors/Merges/AggregatingSortedTransform.h>
 #include <Processors/Merges/FinishAggregatingInOrderTransform.h>
-#include <Interpreters/Aggregator.h>
-#include <Functions/FunctionFactory.h>
+#include <Processors/QueryPlan/AggregatingStep.h>
 #include <Processors/QueryPlan/IQueryPlanStep.h>
-#include <Columns/ColumnFixedString.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeFixedString.h>
+#include <Processors/Transforms/AggregatingInOrderTransform.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <Processors/Transforms/CopyTransform.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/MemoryBoundMerging.h>
+#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
 {
 
-static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number)
+static bool memoryBoundMergingWillBeUsed(
+    bool should_produce_results_in_order_of_bucket_number,
+    bool memory_bound_merging_of_aggregation_results_enabled,
+    InputOrderInfoPtr group_by_info)
+{
+    return should_produce_results_in_order_of_bucket_number && memory_bound_merging_of_aggregation_results_enabled && group_by_info;
+}
+
+static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number, bool memory_bound_merging_will_be_used)
 {
     return ITransformingStep::Traits
     {
         {
             .preserves_distinct_columns = false, /// Actually, we may check that distinct names are in aggregation keys
-            .returns_single_stream = should_produce_results_in_order_of_bucket_number, /// Actually, may also return single stream if should_produce_results_in_order_of_bucket_number = false
+            .returns_single_stream = should_produce_results_in_order_of_bucket_number || memory_bound_merging_will_be_used,
             .preserves_number_of_streams = false,
             .preserves_sorting = false,
+            .can_enforce_sorting_properties_in_distributed_query = memory_bound_merging_will_be_used,
         },
         {
             .preserves_number_of_rows = false,
@@ -88,9 +99,16 @@ AggregatingStep::AggregatingStep(
     bool group_by_use_nulls_,
     InputOrderInfoPtr group_by_info_,
     SortDescription group_by_sort_description_,
-    bool should_produce_results_in_order_of_bucket_number_)
+    bool should_produce_results_in_order_of_bucket_number_,
+    bool memory_bound_merging_of_aggregation_results_enabled_)
     : ITransformingStep(
-        input_stream_, appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_), getTraits(should_produce_results_in_order_of_bucket_number_), false)
+        input_stream_,
+        appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_),
+        getTraits(
+            should_produce_results_in_order_of_bucket_number_,
+            DB::memoryBoundMergingWillBeUsed(
+                should_produce_results_in_order_of_bucket_number_, memory_bound_merging_of_aggregation_results_enabled_, group_by_info_)),
+        false)
     , params(std::move(params_))
     , grouping_sets_params(std::move(grouping_sets_params_))
     , final(final_)
@@ -103,7 +121,13 @@ AggregatingStep::AggregatingStep(
     , group_by_info(std::move(group_by_info_))
     , group_by_sort_description(std::move(group_by_sort_description_))
     , should_produce_results_in_order_of_bucket_number(should_produce_results_in_order_of_bucket_number_)
+    , memory_bound_merging_of_aggregation_results_enabled(memory_bound_merging_of_aggregation_results_enabled_)
 {
+    if (memoryBoundMergingWillBeUsed())
+    {
+        output_stream->sort_description = group_by_sort_description;
+        output_stream->sort_scope = DataStream::SortScope::Global;
+    }
 }
 
 void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings)
@@ -336,10 +360,16 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
             /// Do merge of aggregated data in parallel.
             pipeline.resize(merge_threads);
 
-            pipeline.addSimpleTransform([&](const Block &)
+            const auto & required_sort_description = memoryBoundMergingWillBeUsed() ? group_by_sort_description : SortDescription{};
+            pipeline.addSimpleTransform(
+                [&](const Block &)
+                { return std::make_shared<MergingAggregatedBucketTransform>(transform_params, required_sort_description); });
+
+            if (memoryBoundMergingWillBeUsed())
             {
-                return std::make_shared<MergingAggregatedBucketTransform>(transform_params);
-            });
+                pipeline.addTransform(
+                    std::make_shared<SortingAggregatedForMemoryBoundMergingTransform>(pipeline.getHeader(), pipeline.getNumStreams()));
+            }
 
             aggregating_sorted = collector.detachProcessors(1);
         }
@@ -380,7 +410,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
             return std::make_shared<AggregatingTransform>(header, transform_params, many_data, counter++, merge_threads, temporary_data_merge_threads);
         });
 
-        /// We add the explicit resize here, but not in case of aggregating in order, since AIO don't use two-level hash tables and thus returns only buckets with bucket_number = -1.
         pipeline.resize(should_produce_results_in_order_of_bucket_number ? 1 : params.max_threads, true /* force */);
 
         aggregating = collector.detachProcessors(0);
@@ -426,4 +455,17 @@ void AggregatingStep::updateOutputStream()
         getDataStreamTraits());
 }
 
+void AggregatingStep::adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const
+{
+    context->setSetting("enable_memory_bound_merging_of_aggregation_results", true);
+    context->setSetting("optimize_aggregation_in_order", true);
+    context->setSetting("force_aggregation_in_order", true);
+}
+
+bool AggregatingStep::memoryBoundMergingWillBeUsed() const
+{
+    return DB::memoryBoundMergingWillBeUsed(
+        should_produce_results_in_order_of_bucket_number, memory_bound_merging_of_aggregation_results_enabled, group_by_info);
+}
+
 }
diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h
index 71130b65adb..84c6610e90d 100644
--- a/src/Processors/QueryPlan/AggregatingStep.h
+++ b/src/Processors/QueryPlan/AggregatingStep.h
@@ -39,7 +39,8 @@ public:
         bool group_by_use_nulls_,
         InputOrderInfoPtr group_by_info_,
         SortDescription group_by_sort_description_,
-        bool should_produce_results_in_order_of_bucket_number_);
+        bool should_produce_results_in_order_of_bucket_number_,
+        bool memory_bound_merging_of_aggregation_results_enabled_);
 
     String getName() const override { return "Aggregating"; }
 
@@ -52,9 +53,13 @@ public:
 
     const Aggregator::Params & getParams() const { return params; }
 
+    void adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const override;
+
 private:
     void updateOutputStream() override;
 
+    bool memoryBoundMergingWillBeUsed() const;
+
     Aggregator::Params params;
     GroupingSetsParamsList grouping_sets_params;
     bool final;
@@ -69,9 +74,9 @@ private:
     InputOrderInfoPtr group_by_info;
     SortDescription group_by_sort_description;
 
-    /// It determines if we should resize pipeline to 1 at the end.
-    /// Needed in case of distributed memory efficient aggregation.
-    const bool should_produce_results_in_order_of_bucket_number;
+    /// These settings are used to determine if we should resize pipeline to 1 at the end.
+    bool should_produce_results_in_order_of_bucket_number;
+    bool memory_bound_merging_of_aggregation_results_enabled;
 
     Processors aggregating_in_order;
     Processors aggregating_sorted;
diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h
index c5bd64d66be..1e00d76b66f 100644
--- a/src/Processors/QueryPlan/IQueryPlanStep.h
+++ b/src/Processors/QueryPlan/IQueryPlanStep.h
@@ -31,13 +31,13 @@ public:
     /// QueryPipeline has single port. Totals or extremes ports are not counted.
     bool has_single_port = false;
 
-    /// Sorting scope
+    /// Sorting scope. Please keep the mutual order (more strong mode should have greater value).
     enum class SortScope
     {
-        None,
-        Chunk, /// Separate chunks are sorted
-        Stream, /// Each data steam is sorted
-        Global, /// Data is globally sorted
+        None   = 0,
+        Chunk  = 1, /// Separate chunks are sorted
+        Stream = 2, /// Each data steam is sorted
+        Global = 3, /// Data is globally sorted
     };
 
     /// It is not guaranteed that header has columns from sort_description.
diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h
index 008642c71ee..a4124dda806 100644
--- a/src/Processors/QueryPlan/ITransformingStep.h
+++ b/src/Processors/QueryPlan/ITransformingStep.h
@@ -4,6 +4,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 /// Step which has single input and single output data stream.
 /// It doesn't mean that pipeline has single port before or after such step.
 class ITransformingStep : public IQueryPlanStep
@@ -29,6 +34,9 @@ public:
         /// Doesn't change row order.
         /// Examples: true for FilterStep, false for PartialSortingStep
         bool preserves_sorting;
+
+        /// See adjustSettingsToEnforceSortingPropertiesInDistributedQuery().
+        bool can_enforce_sorting_properties_in_distributed_query = false;
     };
 
     /// This flags are used by QueryPlan optimizers.
@@ -73,6 +81,13 @@ public:
     /// Append extra processors for this step.
     void appendExtraProcessors(const Processors & extra_processors);
 
+    /// Enforcement is supposed to be done through the special settings that will be taken into account by remote nodes during query planning (e.g. force_aggregation_in_order).
+    /// Should be called only if data_stream_traits.can_enforce_sorting_properties_in_distributed_query == true.
+    virtual void adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
+    }
+
 protected:
     /// Clear distinct_columns if res_header doesn't contain all of them.
     static void updateDistinctColumns(const Block & res_header, NameSet & distinct_columns);
diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
index d74a6174f00..10b986579cc 100644
--- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp
+++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
@@ -1,13 +1,25 @@
+#include <Interpreters/Context.h>
+#include <Processors/Merges/FinishAggregatingInOrderTransform.h>
 #include <Processors/QueryPlan/MergingAggregatedStep.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Processors/Transforms/AggregatingTransform.h>
-#include <Processors/Transforms/MergingAggregatedTransform.h>
+#include <Processors/Transforms/MemoryBoundMerging.h>
 #include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
+#include <Processors/Transforms/MergingAggregatedTransform.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
 {
 
-static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number)
+static bool memoryBoundMergingWillBeUsed(
+    const DataStream & input_stream,
+    bool memory_bound_merging_of_aggregation_results_enabled,
+    const SortDescription & group_by_sort_description)
+{
+    return memory_bound_merging_of_aggregation_results_enabled && !group_by_sort_description.empty()
+        && input_stream.sort_scope >= DataStream::SortScope::Stream && input_stream.sort_description.hasPrefix(group_by_sort_description);
+}
+
+static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number, bool memory_bound_merging_will_be_used)
 {
     return ITransformingStep::Traits
     {
@@ -16,6 +28,7 @@ static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_
             .returns_single_stream = should_produce_results_in_order_of_bucket_number,
             .preserves_number_of_streams = false,
             .preserves_sorting = false,
+            .can_enforce_sorting_properties_in_distributed_query = memory_bound_merging_will_be_used,
         },
         {
             .preserves_number_of_rows = false,
@@ -30,24 +43,74 @@ MergingAggregatedStep::MergingAggregatedStep(
     bool memory_efficient_aggregation_,
     size_t max_threads_,
     size_t memory_efficient_merge_threads_,
-    bool should_produce_results_in_order_of_bucket_number_)
+    bool should_produce_results_in_order_of_bucket_number_,
+    size_t max_block_size_,
+    size_t memory_bound_merging_max_block_bytes_,
+    SortDescription group_by_sort_description_,
+    bool memory_bound_merging_of_aggregation_results_enabled_)
     : ITransformingStep(
-        input_stream_, params_.getHeader(input_stream_.header, final_), getTraits(should_produce_results_in_order_of_bucket_number_))
+        input_stream_,
+        params_.getHeader(input_stream_.header, final_),
+        getTraits(
+            should_produce_results_in_order_of_bucket_number_,
+            DB::memoryBoundMergingWillBeUsed(
+                input_stream_, memory_bound_merging_of_aggregation_results_enabled_, group_by_sort_description_)))
     , params(std::move(params_))
     , final(final_)
     , memory_efficient_aggregation(memory_efficient_aggregation_)
     , max_threads(max_threads_)
     , memory_efficient_merge_threads(memory_efficient_merge_threads_)
+    , max_block_size(max_block_size_)
+    , memory_bound_merging_max_block_bytes(memory_bound_merging_max_block_bytes_)
+    , group_by_sort_description(std::move(group_by_sort_description_))
     , should_produce_results_in_order_of_bucket_number(should_produce_results_in_order_of_bucket_number_)
+    , memory_bound_merging_of_aggregation_results_enabled(memory_bound_merging_of_aggregation_results_enabled_)
 {
     /// Aggregation keys are distinct
     for (const auto & key : params.keys)
         output_stream->distinct_columns.insert(key);
+
+    if (memoryBoundMergingWillBeUsed() && should_produce_results_in_order_of_bucket_number)
+    {
+        output_stream->sort_description = group_by_sort_description;
+        output_stream->sort_scope = DataStream::SortScope::Global;
+    }
 }
 
 void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     auto transform_params = std::make_shared<AggregatingTransformParams>(pipeline.getHeader(), std::move(params), final);
+
+    if (memoryBoundMergingWillBeUsed())
+    {
+        auto transform = std::make_shared<FinishAggregatingInOrderTransform>(
+            pipeline.getHeader(),
+            pipeline.getNumStreams(),
+            transform_params,
+            group_by_sort_description,
+            max_block_size,
+            memory_bound_merging_max_block_bytes);
+
+        pipeline.addTransform(std::move(transform));
+
+        /// Do merge of aggregated data in parallel.
+        pipeline.resize(max_threads);
+
+        const auto & required_sort_description
+            = should_produce_results_in_order_of_bucket_number ? group_by_sort_description : SortDescription{};
+
+        pipeline.addSimpleTransform(
+            [&](const Block &) { return std::make_shared<MergingAggregatedBucketTransform>(transform_params, required_sort_description); });
+
+        if (should_produce_results_in_order_of_bucket_number)
+        {
+            pipeline.addTransform(
+                std::make_shared<SortingAggregatedForMemoryBoundMergingTransform>(pipeline.getHeader(), pipeline.getNumStreams()));
+        }
+
+        return;
+    }
+
     if (!memory_efficient_aggregation)
     {
         /// We union several sources into one, paralleling the work.
@@ -88,5 +151,14 @@ void MergingAggregatedStep::updateOutputStream()
         output_stream->distinct_columns.insert(key);
 }
 
-
+void MergingAggregatedStep::adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const
+{
+    context->setSetting("enable_memory_bound_merging_of_aggregation_results", true);
+}
+
+bool MergingAggregatedStep::memoryBoundMergingWillBeUsed() const
+{
+    return DB::memoryBoundMergingWillBeUsed(
+        input_streams.front(), memory_bound_merging_of_aggregation_results_enabled, group_by_sort_description);
+}
 }
diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h
index 419b43615bd..24bf6cfdd2b 100644
--- a/src/Processors/QueryPlan/MergingAggregatedStep.h
+++ b/src/Processors/QueryPlan/MergingAggregatedStep.h
@@ -20,7 +20,11 @@ public:
         bool memory_efficient_aggregation_,
         size_t max_threads_,
         size_t memory_efficient_merge_threads_,
-        bool should_produce_results_in_order_of_bucket_number_);
+        bool should_produce_results_in_order_of_bucket_number_,
+        size_t max_block_size_,
+        size_t memory_bound_merging_max_block_bytes_,
+        SortDescription group_by_sort_description_,
+        bool memory_bound_merging_of_aggregation_results_enabled_);
 
     String getName() const override { return "MergingAggregated"; }
 
@@ -29,18 +33,25 @@ public:
     void describeActions(JSONBuilder::JSONMap & map) const override;
     void describeActions(FormatSettings & settings) const override;
 
+    void adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const override;
+
 private:
     void updateOutputStream() override;
 
+    bool memoryBoundMergingWillBeUsed() const;
+
     Aggregator::Params params;
     bool final;
     bool memory_efficient_aggregation;
     size_t max_threads;
     size_t memory_efficient_merge_threads;
+    const size_t max_block_size;
+    const size_t memory_bound_merging_max_block_bytes;
+    const SortDescription group_by_sort_description;
 
-    /// It determines if we should resize pipeline to 1 at the end.
-    /// Needed in case of distributed memory efficient aggregation over distributed table.
+    /// These settings are used to determine if we should resize pipeline to 1 at the end.
     const bool should_produce_results_in_order_of_bucket_number;
+    const bool memory_bound_merging_of_aggregation_results_enabled;
 };
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 65b902230f4..81f2fa4b65f 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -76,7 +76,9 @@ ReadFromRemote::ReadFromRemote(
     Tables external_tables_,
     Poco::Logger * log_,
     UInt32 shard_count_,
-    std::shared_ptr<const StorageLimitsList> storage_limits_)
+    std::shared_ptr<const StorageLimitsList> storage_limits_,
+    SortDescription output_sort_description_,
+    DataStream::SortScope output_sort_scope_)
     : ISourceStep(DataStream{.header = std::move(header_)})
     , shards(std::move(shards_))
     , stage(stage_)
@@ -90,6 +92,8 @@ ReadFromRemote::ReadFromRemote(
     , log(log_)
     , shard_count(shard_count_)
 {
+    output_stream->sort_description = std::move(output_sort_description_);
+    output_stream->sort_scope = output_sort_scope_;
 }
 
 void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard)
@@ -239,7 +243,9 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
     Scalars scalars_,
     Tables external_tables_,
     Poco::Logger * log_,
-    std::shared_ptr<const StorageLimitsList> storage_limits_)
+    std::shared_ptr<const StorageLimitsList> storage_limits_,
+    SortDescription output_sort_description_,
+    DataStream::SortScope output_sort_scope_)
     : ISourceStep(DataStream{.header = std::move(header_)})
     , coordinator(std::move(coordinator_))
     , shard(std::move(shard_))
@@ -260,6 +266,9 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
             description.push_back(fmt::format("Replica: {}", address.host_name));
 
     setStepDescription(boost::algorithm::join(description, ", "));
+
+    output_stream->sort_description = std::move(output_sort_description_);
+    output_stream->sort_scope = output_sort_scope_;
 }
 
 
diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h
index 4d37a637250..7c8bbddfe79 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@@ -33,7 +33,9 @@ public:
         Tables external_tables_,
         Poco::Logger * log_,
         UInt32 shard_count_,
-        std::shared_ptr<const StorageLimitsList> storage_limits_);
+        std::shared_ptr<const StorageLimitsList> storage_limits_,
+        SortDescription output_sort_description_,
+        DataStream::SortScope output_sort_scope_);
 
     String getName() const override { return "ReadFromRemote"; }
 
@@ -83,7 +85,9 @@ public:
         Scalars scalars_,
         Tables external_tables_,
         Poco::Logger * log_,
-        std::shared_ptr<const StorageLimitsList> storage_limits_);
+        std::shared_ptr<const StorageLimitsList> storage_limits_,
+        SortDescription output_sort_description_,
+        DataStream::SortScope output_sort_scope_);
 
     String getName() const override { return "ReadFromRemoteParallelReplicas"; }
 
diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp
index 5d40a9e241e..6c990c5fd0b 100644
--- a/src/Processors/QueryPlan/UnionStep.cpp
+++ b/src/Processors/QueryPlan/UnionStep.cpp
@@ -1,8 +1,9 @@
+#include <type_traits>
+#include <Interpreters/ExpressionActions.h>
 #include <Processors/QueryPlan/UnionStep.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Transforms/ExpressionTransform.h>
-#include <Interpreters/ExpressionActions.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <base/defines.h>
 
 namespace DB
@@ -35,6 +36,22 @@ UnionStep::UnionStep(DataStreams input_streams_, size_t max_threads_)
         output_stream = input_streams.front();
     else
         output_stream = DataStream{.header = header};
+
+    SortDescription common_sort_description = input_streams.front().sort_description;
+    DataStream::SortScope sort_scope = input_streams.front().sort_scope;
+    for (const auto & input_stream : input_streams)
+    {
+        common_sort_description = commonPrefix(common_sort_description, input_stream.sort_description);
+        sort_scope = std::min(sort_scope, input_stream.sort_scope);
+    }
+    if (!common_sort_description.empty() && sort_scope >= DataStream::SortScope::Chunk)
+    {
+        output_stream->sort_description = common_sort_description;
+        if (sort_scope == DataStream::SortScope::Global && input_streams.size() > 1)
+            output_stream->sort_scope = DataStream::SortScope::Stream;
+        else
+            output_stream->sort_scope = sort_scope;
+    }
 }
 
 QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &)
diff --git a/src/Processors/Sources/WaitForAsyncInsertSource.h b/src/Processors/Sources/WaitForAsyncInsertSource.h
index 40871a59125..1029c164941 100644
--- a/src/Processors/Sources/WaitForAsyncInsertSource.h
+++ b/src/Processors/Sources/WaitForAsyncInsertSource.h
@@ -6,18 +6,24 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int TIMEOUT_EXCEEDED;
+    extern const int LOGICAL_ERROR;
+}
+
 /// Source, that allow to wait until processing of
 /// asynchronous insert for specified query_id will be finished.
 class WaitForAsyncInsertSource : public ISource, WithContext
 {
 public:
     WaitForAsyncInsertSource(
-        const String & query_id_, size_t timeout_ms_, AsynchronousInsertQueue & queue_)
+        std::future<void> insert_future_, size_t timeout_ms_)
         : ISource(Block())
-        , query_id(query_id_)
+        , insert_future(std::move(insert_future_))
         , timeout_ms(timeout_ms_)
-        , queue(queue_)
     {
+        assert(insert_future.valid());
     }
 
     String getName() const override { return "WaitForAsyncInsert"; }
@@ -25,14 +31,20 @@ public:
 protected:
     Chunk generate() override
     {
-        queue.waitForProcessingQuery(query_id, std::chrono::milliseconds(timeout_ms));
+        auto status = insert_future.wait_for(std::chrono::milliseconds(timeout_ms));
+        if (status == std::future_status::deferred)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: got future in deferred state");
+
+        if (status == std::future_status::timeout)
+            throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout ({} ms) exceeded)", timeout_ms);
+
+        insert_future.get();
         return Chunk();
     }
 
 private:
-    String query_id;
+    std::future<void> insert_future;
     size_t timeout_ms;
-    AsynchronousInsertQueue & queue;
 };
 
 }
diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp
index c2de0c3a23a..4664dcae8dd 100644
--- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp
+++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp
@@ -170,7 +170,7 @@ void AggregatingInOrderTransform::consume(Chunk chunk)
             }
         }
 
-        current_memory_usage = getCurrentMemoryUsage() - initial_memory_usage;
+        current_memory_usage = std::max<Int64>(getCurrentMemoryUsage() - initial_memory_usage, 0);
 
         /// We finalize last key aggregation state if a new key found.
         if (key_end != rows)
diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h
index 789fa970ebd..be5c1ef670d 100644
--- a/src/Processors/Transforms/AggregatingTransform.h
+++ b/src/Processors/Transforms/AggregatingTransform.h
@@ -14,6 +14,7 @@ class AggregatedChunkInfo : public ChunkInfo
 public:
     bool is_overflows = false;
     Int32 bucket_num = -1;
+    UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order
 };
 
 using AggregatorList = std::list<Aggregator>;
diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h
new file mode 100644
index 00000000000..d4e2cd41e9d
--- /dev/null
+++ b/src/Processors/Transforms/MemoryBoundMerging.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include <Core/SortDescription.h>
+#include <Interpreters/sortBlock.h>
+#include <Processors/IProcessor.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+
+#include <Poco/Logger.h>
+#include <Common/logger_useful.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+
+/// Has several inputs and single output.
+/// Read from inputs merged buckets with aggregated data, sort them by bucket number and block number.
+/// Presumption: inputs return chunks with increasing bucket and block number, there is at most one chunk with the given bucket and block number.
+class SortingAggregatedForMemoryBoundMergingTransform : public IProcessor
+{
+public:
+    explicit SortingAggregatedForMemoryBoundMergingTransform(const Block & header_, size_t num_inputs_)
+        : IProcessor(InputPorts(num_inputs_, header_), {header_})
+        , header(header_)
+        , num_inputs(num_inputs_)
+        , last_chunk_id(num_inputs, {std::numeric_limits<Int32>::min(), 0})
+        , is_input_finished(num_inputs, false)
+    {
+    }
+
+    String getName() const override { return "SortingAggregatedForMemoryBoundMergingTransform"; }
+
+    Status prepare() override
+    {
+        auto & output = outputs.front();
+
+        if (output.isFinished())
+        {
+            for (auto & input : inputs)
+                input.close();
+
+            return Status::Finished;
+        }
+
+        if (!output.canPush())
+        {
+            for (auto & input : inputs)
+                input.setNotNeeded();
+
+            return Status::PortFull;
+        }
+
+        /// Push if have chunk that is the next in order
+        bool pushed_to_output = tryPushChunk();
+
+        bool need_data = false;
+        bool all_finished = true;
+
+        /// Try read new chunk
+        auto in = inputs.begin();
+        for (size_t input_num = 0; input_num < num_inputs; ++input_num, ++in)
+        {
+            if (in->isFinished())
+            {
+                is_input_finished[input_num] = true;
+                continue;
+            }
+
+            /// We want to keep not more than `num_inputs` chunks in memory (and there will be only a single chunk with the given (bucket_id, chunk_num)).
+            const bool bucket_from_this_input_still_in_memory = chunks.contains(last_chunk_id[input_num]);
+            if (bucket_from_this_input_still_in_memory)
+            {
+                all_finished = false;
+                continue;
+            }
+
+            in->setNeeded();
+
+            if (!in->hasData())
+            {
+                need_data = true;
+                all_finished = false;
+                continue;
+            }
+
+            auto chunk = in->pull();
+            addChunk(std::move(chunk), input_num);
+
+            if (in->isFinished())
+            {
+                is_input_finished[input_num] = true;
+            }
+            else
+            {
+                /// If chunk was pulled, then we need data from this port.
+                need_data = true;
+                all_finished = false;
+            }
+        }
+
+        if (pushed_to_output)
+            return Status::PortFull;
+
+        if (tryPushChunk())
+            return Status::PortFull;
+
+        if (need_data)
+            return Status::NeedData;
+
+        if (!all_finished)
+            throw Exception(
+                "SortingAggregatedForMemoryBoundMergingTransform has read bucket, but couldn't push it.", ErrorCodes::LOGICAL_ERROR);
+
+        if (overflow_chunk)
+        {
+            output.push(std::move(overflow_chunk));
+            return Status::PortFull;
+        }
+
+        output.finish();
+        return Status::Finished;
+    }
+
+private:
+    bool tryPushChunk()
+    {
+        auto & output = outputs.front();
+
+        if (chunks.empty())
+            return false;
+
+        /// Chunk with min id
+        auto it = chunks.begin();
+        auto current_chunk_id = it->first;
+
+        /// Check if it is actually next in order
+        for (size_t input = 0; input < num_inputs; ++input)
+            if (!is_input_finished[input] && last_chunk_id[input] < current_chunk_id)
+                return false;
+
+        output.push(std::move(it->second));
+        chunks.erase(it);
+        return true;
+    }
+
+    void addChunk(Chunk chunk, size_t from_input)
+    {
+        if (!chunk.hasRows())
+            return;
+
+        const auto & info = chunk.getChunkInfo();
+        if (!info)
+            throw Exception(
+                "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform.", ErrorCodes::LOGICAL_ERROR);
+
+        const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
+        if (!agg_info)
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform.");
+
+        Int32 bucket_id = agg_info->bucket_num;
+        bool is_overflows = agg_info->is_overflows;
+        UInt64 chunk_num = agg_info->chunk_num;
+
+        if (is_overflows)
+            overflow_chunk = std::move(chunk);
+        else
+        {
+            const auto chunk_id = ChunkId{bucket_id, chunk_num};
+            if (chunks.contains(chunk_id))
+            {
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "SortingAggregatedForMemoryBoundMergingTransform already got bucket with number {}",
+                    bucket_id);
+            }
+
+            chunks[chunk_id] = std::move(chunk);
+            last_chunk_id[from_input] = chunk_id;
+        }
+    }
+
+    struct ChunkId
+    {
+        Int32 bucket_id;
+        UInt64 chunk_num;
+
+        bool operator<(const ChunkId & other) const
+        {
+            return std::make_pair(bucket_id, chunk_num) < std::make_pair(other.bucket_id, other.chunk_num);
+        }
+    };
+
+    Block header;
+    size_t num_inputs;
+
+    std::vector<ChunkId> last_chunk_id;
+    std::vector<bool> is_input_finished;
+    std::map<ChunkId, Chunk> chunks;
+    Chunk overflow_chunk;
+};
+
+}
diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
index 8471139d9dc..4e90159aa11 100644
--- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
+++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp
@@ -1,5 +1,6 @@
 #include <limits>
 #include <Interpreters/Aggregator.h>
+#include <Interpreters/sortBlock.h>
 #include <Processors/ISimpleTransform.h>
 #include <Processors/ResizeProcessor.h>
 #include <Processors/Transforms/AggregatingInOrderTransform.h>
@@ -305,8 +306,9 @@ void GroupingAggregatedTransform::work()
 }
 
 
-MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params_)
-    : ISimpleTransform({}, params_->getHeader(), false), params(std::move(params_))
+MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(
+    AggregatingTransformParamsPtr params_, const SortDescription & required_sort_description_)
+    : ISimpleTransform({}, params_->getHeader(), false), params(std::move(params_)), required_sort_description(required_sort_description_)
 {
     setInputNotNeededAfterRead(true);
 }
@@ -356,9 +358,14 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk)
     auto res_info = std::make_shared<AggregatedChunkInfo>();
     res_info->is_overflows = chunks_to_merge->is_overflows;
     res_info->bucket_num = chunks_to_merge->bucket_num;
+    res_info->chunk_num = chunks_to_merge->chunk_num;
     chunk.setChunkInfo(std::move(res_info));
 
     auto block = params->aggregator.mergeBlocks(blocks_list, params->final);
+
+    if (!required_sort_description.empty())
+        sortBlock(block, required_sort_description);
+
     size_t num_rows = block.rows();
     chunk.setColumns(block.getColumns(), num_rows);
 }
diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
index b4a62f8a13e..7c59ad1719f 100644
--- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
+++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h
@@ -1,9 +1,10 @@
 #pragma once
-#include <Processors/IProcessor.h>
+#include <Core/SortDescription.h>
 #include <Interpreters/Aggregator.h>
+#include <Processors/IProcessor.h>
 #include <Processors/ISimpleTransform.h>
-#include <Processors/Transforms/AggregatingTransform.h>
 #include <Processors/ResizeProcessor.h>
+#include <Processors/Transforms/AggregatingTransform.h>
 
 
 namespace DB
@@ -105,7 +106,8 @@ private:
 class MergingAggregatedBucketTransform : public ISimpleTransform
 {
 public:
-    explicit MergingAggregatedBucketTransform(AggregatingTransformParamsPtr params);
+    explicit MergingAggregatedBucketTransform(
+        AggregatingTransformParamsPtr params, const SortDescription & required_sort_description_ = {});
     String getName() const override { return "MergingAggregatedBucketTransform"; }
 
 protected:
@@ -113,6 +115,7 @@ protected:
 
 private:
     AggregatingTransformParamsPtr params;
+    const SortDescription required_sort_description;
 };
 
 /// Has several inputs and single output.
@@ -142,6 +145,7 @@ struct ChunksToMerge : public ChunkInfo
     std::unique_ptr<Chunks> chunks;
     Int32 bucket_num = -1;
     bool is_overflows = false;
+    UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order
 };
 
 class Pipe;
diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp
index 9eef17cf40d..b8f40789e83 100644
--- a/src/Processors/Transforms/MongoDBSource.cpp
+++ b/src/Processors/Transforms/MongoDBSource.cpp
@@ -3,11 +3,8 @@
 #include <string>
 #include <vector>
 
-#include <Common/logger_useful.h>
 #include <Poco/MongoDB/Connection.h>
 #include <Poco/MongoDB/Cursor.h>
-#include <Poco/MongoDB/Element.h>
-#include <Poco/MongoDB/Database.h>
 #include <Poco/MongoDB/ObjectId.h>
 
 #include <Columns/ColumnNullable.h>
@@ -18,7 +15,6 @@
 #include <Common/quoteString.h>
 #include <base/range.h>
 #include <Poco/URI.h>
-#include <Poco/Util/AbstractConfiguration.h>
 #include <Poco/Version.h>
 
 // only after poco
diff --git a/src/Processors/Transforms/MongoDBSource.h b/src/Processors/Transforms/MongoDBSource.h
index 322aa4f50de..d03a7a45477 100644
--- a/src/Processors/Transforms/MongoDBSource.h
+++ b/src/Processors/Transforms/MongoDBSource.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <Poco/MongoDB/Element.h>
+
 #include <Core/Block.h>
 #include <Processors/ISource.h>
 #include <Core/ExternalResultDescription.h>
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 830f400faf2..cc484855e76 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -275,10 +275,8 @@ Chain buildPushingToViewsChain(
         SCOPE_EXIT({ current_thread = original_thread; });
 
         std::unique_ptr<ThreadStatus> view_thread_status_ptr = std::make_unique<ThreadStatus>();
-        /// Disable query profiler for this ThreadStatus since the running (main query) thread should already have one
-        /// If we didn't disable it, then we could end up with N + 1 (N = number of dependencies) profilers which means
-        /// N times more interruptions
-        view_thread_status_ptr->disableProfiling();
+        /// Copy of a ThreadStatus should be internal.
+        view_thread_status_ptr->setInternalThread();
         /// view_thread_status_ptr will be moved later (on and on), so need to capture raw pointer.
         view_thread_status_ptr->deleter = [thread_status = view_thread_status_ptr.get(), running_group]
         {
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index c1e7cefd19e..c2c12da6077 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -30,7 +30,6 @@
 #include <Common/typeid_cast.h>
 #include <Common/randomSeed.h>
 
-
 namespace DB
 {
 
@@ -1024,6 +1023,7 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
                 command.ignore = true;
         }
     }
+
     prepared = true;
 }
 
diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index 4b9667aa95d..f1300dfd940 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -625,8 +625,6 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
     OpenTelemetry::TracingContextHolderPtr thread_trace_context;
 
     Stopwatch watch;
-    auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.getContext()->getSettingsRef());
-
     try
     {
         CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
@@ -644,6 +642,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
         thread_trace_context->root_span.addAttribute("clickhouse.rows", distributed_header.rows);
         thread_trace_context->root_span.addAttribute("clickhouse.bytes", distributed_header.bytes);
 
+        auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
         auto connection = pool->get(timeouts, &distributed_header.insert_settings);
         LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)",
             file_path,
@@ -780,14 +779,6 @@ struct StorageDistributedDirectoryMonitor::Batch
 
             fs::rename(tmp_file, parent.current_batch_file_path);
         }
-        auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(parent.storage.getContext()->getSettingsRef());
-        auto connection = parent.pool->get(timeouts);
-
-        LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
-            file_indices.size(),
-            connection->getDescription(),
-            formatReadableQuantity(total_rows),
-            formatReadableSizeWithBinarySuffix(total_bytes));
 
         bool batch_broken = false;
         bool batch_marked_as_broken = false;
@@ -795,14 +786,14 @@ struct StorageDistributedDirectoryMonitor::Batch
         {
             try
             {
-                sendBatch(*connection, timeouts);
+                sendBatch();
             }
             catch (const Exception & e)
             {
-                if (split_batch_on_failure && isSplittableErrorCode(e.code(), e.isRemoteException()))
+                if (split_batch_on_failure && file_indices.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
                 {
                     tryLogCurrentException(parent.log, "Trying to split batch due to");
-                    sendSeparateFiles(*connection, timeouts);
+                    sendSeparateFiles();
                 }
                 else
                     throw;
@@ -882,9 +873,12 @@ struct StorageDistributedDirectoryMonitor::Batch
     }
 
 private:
-    void sendBatch(Connection & connection, const ConnectionTimeouts & timeouts)
+    void sendBatch()
     {
         std::unique_ptr<RemoteInserter> remote;
+        bool compression_expected = false;
+
+        IConnectionPool::Entry connection;
 
         for (UInt64 file_idx : file_indices)
         {
@@ -902,12 +896,21 @@ private:
 
             if (!remote)
             {
-                remote = std::make_unique<RemoteInserter>(connection, timeouts,
+                auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
+                connection = parent.pool->get(timeouts);
+                compression_expected = connection->getCompression() == Protocol::Compression::Enable;
+
+                LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
+                    file_indices.size(),
+                    connection->getDescription(),
+                    formatReadableQuantity(total_rows),
+                    formatReadableSizeWithBinarySuffix(total_bytes));
+
+                remote = std::make_unique<RemoteInserter>(*connection, timeouts,
                     distributed_header.insert_query,
                     distributed_header.insert_settings,
                     distributed_header.client_info);
             }
-            bool compression_expected = connection.getCompression() == Protocol::Compression::Enable;
             writeRemoteConvert(distributed_header, *remote, compression_expected, in, parent.log);
         }
 
@@ -915,7 +918,7 @@ private:
             remote->onFinish();
     }
 
-    void sendSeparateFiles(Connection & connection, const ConnectionTimeouts & timeouts)
+    void sendSeparateFiles()
     {
         size_t broken_files = 0;
 
@@ -939,11 +942,15 @@ private:
                     distributed_header.client_info.client_trace_context,
                     parent.storage.getContext()->getOpenTelemetrySpanLog());
 
-                RemoteInserter remote(connection, timeouts,
+                auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
+                auto connection = parent.pool->get(timeouts);
+                bool compression_expected = connection->getCompression() == Protocol::Compression::Enable;
+
+                RemoteInserter remote(*connection, timeouts,
                     distributed_header.insert_query,
                     distributed_header.insert_settings,
                     distributed_header.client_info);
-                bool compression_expected = connection.getCompression() == Protocol::Compression::Enable;
+
                 writeRemoteConvert(distributed_header, remote, compression_expected, in, parent.log);
                 remote.onFinish();
             }
diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h
index 5736336983a..a8c27e3d1d4 100644
--- a/src/Storages/ExternalDataSourceConfiguration.h
+++ b/src/Storages/ExternalDataSourceConfiguration.h
@@ -118,7 +118,7 @@ struct URLBasedDataSourceConfiguration
 struct StorageS3Configuration : URLBasedDataSourceConfiguration
 {
     S3::AuthSettings auth_settings;
-    S3Settings::ReadWriteSettings rw_settings;
+    S3Settings::RequestSettings request_settings;
 };
 
 
diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp
index 5f9d5ea3d6d..cdea8749fac 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@@ -99,32 +99,24 @@ Pipe StorageHDFSCluster::read(
         addColumnsStructureToQueryWithClusterEngine(
             query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName());
 
-    for (const auto & replicas : cluster->getShardsAddresses())
+    const auto & current_settings = context->getSettingsRef();
+    auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
+    for (const auto & shard_info : cluster->getShardsInfo())
     {
-        /// There will be only one replica, because we consider each replica as a shard
-        for (const auto & node : replicas)
+        auto try_results = shard_info.pool->getMany(timeouts, &current_settings, PoolMode::GET_MANY);
+        for (auto & try_result : try_results)
         {
-            auto connection = std::make_shared<Connection>(
-                node.host_name, node.port, context->getGlobalContext()->getCurrentDatabase(),
-                node.user, node.password, node.quota_key, node.cluster, node.cluster_secret,
-                "HDFSClusterInititiator",
-                node.compression,
-                node.secure
-            );
-
-
-            /// For unknown reason global context is passed to IStorage::read() method
-            /// So, task_identifier is passed as constructor argument. It is more obvious.
             auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
-                connection,
-                queryToString(query_to_send),
-                header,
-                context,
-                /*throttler=*/nullptr,
-                scalars,
-                Tables(),
-                processed_stage,
-                RemoteQueryExecutor::Extension{.task_iterator = callback});
+                    shard_info.pool,
+                    std::vector<IConnectionPool::Entry>{try_result},
+                    queryToString(query_to_send),
+                    header,
+                    context,
+                    /*throttler=*/nullptr,
+                    scalars,
+                    Tables(),
+                    processed_stage,
+                    RemoteQueryExecutor::Extension{.task_iterator = callback});
 
             pipes.emplace_back(std::make_shared<RemoteSource>(remote_query_executor, add_agg_info, false));
         }
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 7a704a17f4d..bc2d38de215 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -253,7 +253,7 @@ bool IStorage::isStaticStorage() const
     if (storage_policy)
     {
         for (const auto & disk : storage_policy->getDisks())
-            if (!disk->isReadOnly())
+            if (!(disk->isReadOnly() || disk->isWriteOnce()))
                 return false;
         return true;
     }
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index fd48d22b12b..f018a711284 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -583,7 +583,8 @@ public:
     /// Returns storage policy if storage supports it.
     virtual StoragePolicyPtr getStoragePolicy() const { return {}; }
 
-    /// Returns true if all disks of storage are read-only.
+    /// Returns true if all disks of storage are read-only or write-once.
+    /// NOTE: write-once also does not support INSERTs/merges/... for MergeTree
     virtual bool isStaticStorage() const;
 
     virtual bool supportsSubsetOfColumns() const { return false; }
diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp
index 7d6b75557ed..a482dd21099 100644
--- a/src/Storages/MergeTree/ActiveDataPartSet.cpp
+++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp
@@ -65,7 +65,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts)
     }
 
     if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects next part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName());
 
     part_info_to_name.emplace(part_info, name);
     return true;
diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h
index baf3e281257..c9a4887cca3 100644
--- a/src/Storages/MergeTree/BackgroundProcessList.h
+++ b/src/Storages/MergeTree/BackgroundProcessList.h
@@ -10,7 +10,7 @@ namespace DB
 {
 
 /// Common code for background processes lists, like system.merges and system.replicated_fetches
-/// Look at examples in MergeList and ReplicatedFetchList
+/// Look at examples in MergeList, MovesList and ReplicatedFetchList
 
 template <typename ListElement, typename Info>
 class BackgroundProcessList;
diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp
index 7b36a9873e4..e68a04fd9bc 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp
@@ -372,7 +372,12 @@ std::optional<String> DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logg
 
     for (int try_no = 0; try_no < 10; ++try_no)
     {
-        res = (prefix.empty() ? "" : prefix + "_") + part_dir + (try_no ? "_try" + DB::toString(try_no) : "");
+        if (prefix.empty())
+            res = part_dir + (try_no ? "_try" + DB::toString(try_no) : "");
+        else if (prefix.ends_with("_"))
+            res = prefix + part_dir + (try_no ? "_try" + DB::toString(try_no) : "");
+        else
+            res = prefix + "_" + part_dir + (try_no ? "_try" + DB::toString(try_no) : "");
 
         if (!volume->getDisk()->exists(full_relative_path / res))
             return res;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index f38105ce1f6..630fbda833e 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1350,7 +1350,7 @@ void IMergeTreeDataPart::storeVersionMetadata(bool force) const
     if (!wasInvolvedInTransaction() && !force)
         return;
 
-    LOG_TEST(storage.log, "Writing version for {} (creation: {}, removal {})", name, version.creation_tid, version.removal_tid);
+    LOG_TEST(storage.log, "Writing version for {} (creation: {}, removal {}, creation csn {})", name, version.creation_tid, version.removal_tid, version.creation_csn);
     assert(storage.supportsTransactions());
 
     if (!isStoredOnDisk())
@@ -1382,7 +1382,7 @@ void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN wh
 void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const
 {
     chassert(!version.creation_tid.isEmpty());
-    chassert(version.removal_csn == 0);
+    chassert(version.removal_csn == 0 || (version.removal_csn == Tx::PrehistoricCSN && version.removal_tid.isPrehistoric()));
     chassert(!version.removal_tid.isEmpty());
     chassert(isStoredOnDisk());
 
@@ -1390,6 +1390,12 @@ void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const
     {
         /// Metadata file probably does not exist, because it was not written on part creation, because it was created without a transaction.
         /// Let's create it (if needed). Concurrent writes are not possible, because creation_csn is prehistoric and we own removal_tid_lock.
+
+        /// It can happen that VersionMetadata::isVisible sets creation_csn to PrehistoricCSN when creation_tid is Prehistoric
+        /// In order to avoid a race always write creation_csn as PrehistoricCSN for Prehistoric creation_tid
+        assert(version.creation_csn == Tx::UnknownCSN || version.creation_csn == Tx::PrehistoricCSN);
+        version.creation_csn.store(Tx::PrehistoricCSN);
+
         storeVersionMetadata();
         return;
     }
@@ -1531,8 +1537,8 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const
     {
         WriteBufferFromOwnString expected;
         version.write(expected);
-        tryLogCurrentException(storage.log, fmt::format("File {} contains:\n{}\nexpected:\n{}\nlock: {}",
-                                                        version_file_name, content, expected.str(), version.removal_tid_lock));
+        tryLogCurrentException(storage.log, fmt::format("File {} contains:\n{}\nexpected:\n{}\nlock: {}\nname: {}",
+                                                        version_file_name, content, expected.str(), version.removal_tid_lock, name));
         return false;
     }
 }
@@ -2023,8 +2029,7 @@ std::optional<std::string> getIndexExtensionFromFilesystem(const IDataPartStorag
         for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
         {
             const auto & extension = fs::path(it->name()).extension();
-            if (extension == getIndexExtension(false)
-                    || extension == getIndexExtension(true))
+            if (extension == getIndexExtension(true))
                 return extension;
         }
     }
@@ -2036,4 +2041,20 @@ bool isCompressedFromIndexExtension(const String & index_extension)
     return index_extension == getIndexExtension(true);
 }
 
+Strings getPartsNamesWithStates(const MergeTreeDataPartsVector & parts)
+{
+    Strings part_names;
+    for (const auto & p : parts)
+        part_names.push_back(p->getNameWithState());
+    return part_names;
+}
+
+Strings getPartsNames(const MergeTreeDataPartsVector & parts)
+{
+    Strings part_names;
+    for (const auto & p : parts)
+        part_names.push_back(p->name);
+    return part_names;
+}
+
 }
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 6515eb1a65c..7801e5a60ba 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -595,4 +595,9 @@ inline String getIndexExtension(bool is_compressed_primary_key) { return is_comp
 std::optional<String> getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage);
 bool isCompressedFromIndexExtension(const String & index_extension);
 
+using MergeTreeDataPartsVector = std::vector<MergeTreeDataPartPtr>;
+
+Strings getPartsNamesWithStates(const MergeTreeDataPartsVector & parts);
+Strings getPartsNames(const MergeTreeDataPartsVector & parts);
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
index c3f069498be..525d76d0f0f 100644
--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
@@ -315,7 +315,9 @@ MergeTreeReadTaskColumns getReadTaskColumns(
         /// 1. Columns for row level filter
         if (prewhere_info->row_level_filter)
         {
-            Names row_filter_column_names =  prewhere_info->row_level_filter->getRequiredColumnsNames();
+            Names row_filter_column_names = prewhere_info->row_level_filter->getRequiredColumnsNames();
+            injectRequiredColumns(
+                data_part_info_for_reader, storage_snapshot, with_subcolumns, row_filter_column_names);
             result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, row_filter_column_names));
             pre_name_set.insert(row_filter_column_names.begin(), row_filter_column_names.end());
         }
@@ -323,7 +325,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
         /// 2. Columns for prewhere
         Names all_pre_column_names = prewhere_info->prewhere_actions->getRequiredColumnsNames();
 
-        const auto injected_pre_columns = injectRequiredColumns(
+        injectRequiredColumns(
              data_part_info_for_reader, storage_snapshot, with_subcolumns, all_pre_column_names);
 
         for (const auto & name : all_pre_column_names)
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 051cb79b3d7..088821c6188 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -20,6 +20,7 @@
 #include <DataTypes/ObjectUtils.h>
 #include <Columns/ColumnObject.h>
 #include <DataTypes/hasNullable.h>
+#include <Disks/createVolume.h>
 #include <Disks/ObjectStorages/DiskObjectStorage.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
@@ -167,6 +168,7 @@ namespace ErrorCodes
     extern const int INCORRECT_QUERY;
     extern const int CANNOT_RESTORE_TABLE;
     extern const int ZERO_COPY_REPLICATION_ERROR;
+    extern const int SERIALIZATION_ERROR;
 }
 
 
@@ -282,8 +284,8 @@ MergeTreeData::MergeTreeData(
 
     checkTTLExpressions(metadata_, metadata_);
 
-    /// format_file always contained on any data path
-    PathWithDisk version_file;
+    const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME;
+    std::optional<UInt32> read_format_version;
     /// Creating directories, if not exist.
     for (const auto & disk : getDisks())
     {
@@ -292,42 +294,44 @@ MergeTreeData::MergeTreeData(
 
         disk->createDirectories(relative_data_path);
         disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME);
-        String current_version_file_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME;
 
-        if (disk->exists(current_version_file_path))
+        if (disk->exists(format_version_path))
         {
-            if (!version_file.first.empty())
-                throw Exception(ErrorCodes::CORRUPTED_DATA, "Duplication of version file {} and {}", fullPath(version_file.second, version_file.first), current_version_file_path);
-            version_file = {current_version_file_path, disk};
+            auto buf = disk->readFile(format_version_path);
+            UInt32 current_format_version{0};
+            readIntText(current_format_version, *buf);
+            if (!buf->eof())
+                throw Exception(ErrorCodes::CORRUPTED_DATA, "Bad version file: {}", fullPath(disk, format_version_path));
+
+            if (!read_format_version.has_value())
+                read_format_version = current_format_version;
+            else if (*read_format_version != current_format_version)
+                throw Exception(ErrorCodes::CORRUPTED_DATA, "Version file on {} contains version {} expected version is {}.", fullPath(disk, format_version_path), current_format_version, *read_format_version);
         }
     }
 
-    /// If not choose any
-    if (version_file.first.empty())
-        version_file = {fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME, getStoragePolicy()->getAnyDisk()};
-
-    bool version_file_exists = version_file.second->exists(version_file.first);
-
     // When data path or file not exists, ignore the format_version check
-    if (!attach || !version_file_exists)
+    if (!attach || !read_format_version)
     {
         format_version = min_format_version;
-        if (!version_file.second->isReadOnly())
+
+        // try to write to first non-readonly disk
+        for (const auto & disk : getStoragePolicy()->getDisks())
         {
-            auto buf = version_file.second->writeFile(version_file.first, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, context_->getWriteSettings());
-            writeIntText(format_version.toUnderType(), *buf);
-            if (getContext()->getSettingsRef().fsync_metadata)
-                buf->sync();
+            if (!disk->isReadOnly())
+            {
+                auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, context_->getWriteSettings());
+                writeIntText(format_version.toUnderType(), *buf);
+                if (getContext()->getSettingsRef().fsync_metadata)
+                    buf->sync();
+
+                break;
+            }
         }
     }
     else
     {
-        auto buf = version_file.second->readFile(version_file.first);
-        UInt32 read_format_version;
-        readIntText(read_format_version, *buf);
-        format_version = read_format_version;
-        if (!buf->eof())
-            throw Exception("Bad version file: " + fullPath(version_file.second, version_file.first), ErrorCodes::CORRUPTED_DATA);
+        format_version = *read_format_version;
     }
 
     if (format_version < min_format_version)
@@ -1681,7 +1685,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif
     return cleared_count;
 }
 
-scope_guard MergeTreeData::getTemporaryPartDirectoryHolder(const String & part_dir_name)
+scope_guard MergeTreeData::getTemporaryPartDirectoryHolder(const String & part_dir_name) const
 {
     temporary_parts.add(part_dir_name);
     return [this, part_dir_name]() { temporary_parts.remove(part_dir_name); };
@@ -1711,6 +1715,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
     /// in the "zero-copy replication" (because it is a non-production feature).
     /// Please don't use "zero-copy replication" (a non-production feature) in production.
     /// It is not ready for production usage. Don't use it.
+
     bool need_remove_parts_in_order = supportsReplication() && getSettings()->allow_remote_fs_zero_copy_replication;
 
     if (need_remove_parts_in_order)
@@ -1727,7 +1732,6 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
         need_remove_parts_in_order = has_zero_copy_disk;
     }
 
-    time_t now = time(nullptr);
     std::vector<DataPartIteratorByStateAndInfo> parts_to_delete;
     std::vector<MergeTreePartInfo> skipped_parts;
 
@@ -1743,6 +1747,8 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
         return false;
     };
 
+    auto time_now = time(nullptr);
+
     {
         auto parts_lock = lockParts();
 
@@ -1758,8 +1764,6 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
                 continue;
             }
 
-            auto part_remove_time = part->remove_time.load(std::memory_order_relaxed);
-
             /// Grab only parts that are not used by anyone (SELECTs for example).
             if (!part.unique())
             {
@@ -1767,7 +1771,8 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
                 continue;
             }
 
-            if ((part_remove_time < now && now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds() && !has_skipped_mutation_parent(part))
+            auto part_remove_time = part->remove_time.load(std::memory_order_relaxed);
+            if ((part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds() && !has_skipped_mutation_parent(part))
                 || force
                 || isInMemoryPart(part)     /// Remove in-memory parts immediately to not store excessive data in RAM
                 || (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately))
@@ -1777,6 +1782,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
             else
             {
                 skipped_parts.push_back(part->info);
+                continue;
             }
         }
 
@@ -1789,7 +1795,8 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
     }
 
     if (!res.empty())
-        LOG_TRACE(log, "Found {} old parts to remove.", res.size());
+        LOG_TRACE(log, "Found {} old parts to remove. Parts {}",
+                  res.size(), fmt::join(getPartsNames(res), ", "));
 
     return res;
 }
@@ -1824,6 +1831,8 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa
 
             (*it)->assertState({DataPartState::Deleting});
 
+            LOG_DEBUG(log, "Finally removing part from memory {}", part->name);
+
             data_parts_indexes.erase(it);
         }
     }
@@ -1846,6 +1855,7 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa
 
         part_log_elem.database_name = table_id.database_name;
         part_log_elem.table_name = table_id.table_name;
+        part_log_elem.table_uuid = table_id.uuid;
 
         for (const auto & part : parts)
         {
@@ -1919,6 +1929,8 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts, bool
     {
         get_failed_parts();
 
+        LOG_DEBUG(log, "Failed to remove all parts, all count {}, removed {}", parts.size(), part_names_succeed.size());
+
         if (throw_on_error)
             throw;
     }
@@ -2109,11 +2121,24 @@ size_t MergeTreeData::clearEmptyParts()
         if (part->rows_count != 0)
             continue;
 
-        /// Do not try to drop uncommitted parts.
+        /// Do not try to drop uncommitted parts. If the newest tx doesn't see it that is probably hasn't been committed jet
         if (!part->version.getCreationTID().isPrehistoric() && !part->version.isVisible(TransactionLog::instance().getLatestSnapshot()))
             continue;
 
-        LOG_TRACE(log, "Will drop empty part {}", part->name);
+        /// Don't drop empty parts that cover other parts
+        /// Otherwise covered parts resurrect
+        {
+            auto lock = lockParts();
+            if (part->getState() != DataPartState::Active)
+                continue;
+
+            DataPartsVector covered_parts = getCoveredOutdatedParts(part, lock);
+            if (!covered_parts.empty())
+                continue;
+        }
+
+        LOG_INFO(log, "Will drop empty part {}", part->name);
+
         dropPartNoWaitNoThrow(part->name);
         ++cleared_count;
     }
@@ -2891,16 +2916,16 @@ MergeTreeData::PartsTemporaryRename::~PartsTemporaryRename()
     }
 }
 
-
-MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
-    const MergeTreePartInfo & new_part_info,
-    const String & new_part_name,
-    DataPartPtr & out_covering_part,
+MergeTreeData::PartHierarchy MergeTreeData::getPartHierarchy(
+    const MergeTreePartInfo & part_info,
+    DataPartState state,
     DataPartsLock & /* data_parts_lock */) const
 {
+    PartHierarchy result;
+
     /// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself.
-    auto it_middle = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo{DataPartState::Active, new_part_info});
-    auto committed_parts_range = getDataPartsStateRange(DataPartState::Active);
+    auto it_middle = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo{state, part_info});
+    auto committed_parts_range = getDataPartsStateRange(state);
 
     /// Go to the left.
     DataPartIteratorByStateAndInfo begin = it_middle;
@@ -2908,17 +2933,16 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
     {
         auto prev = std::prev(begin);
 
-        if (!new_part_info.contains((*prev)->info))
+        if (!part_info.contains((*prev)->info))
         {
-            if ((*prev)->info.contains(new_part_info))
+            if ((*prev)->info.contains(part_info))
             {
-                out_covering_part = *prev;
-                return {};
+                result.covering_parts.push_back(*prev);
+            }
+            else if (!part_info.isDisjoint((*prev)->info))
+            {
+                result.intersected_parts.push_back(*prev);
             }
-
-            if (!new_part_info.isDisjoint((*prev)->info))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug.",
-                                new_part_name, (*prev)->getNameWithState());
 
             break;
         }
@@ -2926,24 +2950,29 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
         begin = prev;
     }
 
+    std::reverse(result.covering_parts.begin(), result.covering_parts.end());
+
     /// Go to the right.
     DataPartIteratorByStateAndInfo end = it_middle;
     while (end != committed_parts_range.end())
     {
-        if ((*end)->info == new_part_info)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected duplicate part {}. It is a bug.", (*end)->getNameWithState());
-
-        if (!new_part_info.contains((*end)->info))
+        if ((*end)->info == part_info)
         {
-            if ((*end)->info.contains(new_part_info))
-            {
-                out_covering_part = *end;
-                return {};
-            }
+            result.duplicate_part = *end;
+            result.covering_parts.clear();
+            return result;
+        }
 
-            if (!new_part_info.isDisjoint((*end)->info))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects next part {}. It is a bug.",
-                                new_part_name, (*end)->getNameWithState());
+        if (!part_info.contains((*end)->info))
+        {
+            if ((*end)->info.contains(part_info))
+            {
+                result.covering_parts.push_back(*end);
+            }
+            else if (!part_info.isDisjoint((*end)->info))
+            {
+                result.intersected_parts.push_back(*end);
+            }
 
             break;
         }
@@ -2951,31 +2980,47 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
         ++end;
     }
 
-    return DataPartsVector{begin, end};
+    result.covered_parts.insert(result.covered_parts.end(), begin, end);
+
+    return result;
 }
 
-
-bool MergeTreeData::renameTempPartAndAdd(
-    MutableDataPartPtr & part,
-    Transaction & out_transaction,
-    DataPartsLock & lock)
+MergeTreeData::DataPartsVector MergeTreeData::getCoveredOutdatedParts(
+    const DataPartPtr & part,
+    DataPartsLock & data_parts_lock) const
 {
-    DataPartsVector covered_parts;
+    part->assertState({DataPartState::Active, DataPartState::PreActive});
+    PartHierarchy hierarchy = getPartHierarchy(part->info, DataPartState::Outdated, data_parts_lock);
 
-    if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts))
-        return false;
+    if (hierarchy.duplicate_part)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected duplicate part {}. It is a bug.", hierarchy.duplicate_part->getNameWithState());
 
-    if (!covered_parts.empty())
-        throw Exception("Added part " + part->name + " covers " + toString(covered_parts.size())
-            + " existing part(s) (including " + covered_parts[0]->name + ")", ErrorCodes::LOGICAL_ERROR);
-
-    return true;
+    return hierarchy.covered_parts;
 }
 
-void MergeTreeData::checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const
+MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
+    const MergeTreePartInfo & new_part_info,
+    const String & new_part_name,
+    DataPartPtr & out_covering_part,
+    DataPartsLock & data_parts_lock) const
 {
-    part->assertState({DataPartState::Temporary});
+    PartHierarchy hierarchy = getPartHierarchy(new_part_info, DataPartState::Active, data_parts_lock);
 
+    if (!hierarchy.intersected_parts.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug.",
+                        new_part_name, hierarchy.intersected_parts.back()->getNameWithState());
+
+    if (hierarchy.duplicate_part)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected duplicate part {}. It is a bug.", hierarchy.duplicate_part->getNameWithState());
+
+    if (!hierarchy.covering_parts.empty())
+        out_covering_part = std::move(hierarchy.covering_parts.back());
+
+    return std::move(hierarchy.covered_parts);
+}
+
+void MergeTreeData::checkPartPartition(MutableDataPartPtr & part, DataPartsLock & lock) const
+{
     if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock))
     {
         if (part->partition.value != existing_part_in_partition->partition.value)
@@ -2984,14 +3029,22 @@ void MergeTreeData::checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPa
                 + existing_part_in_partition->name + ", newly added part: " + part->name,
                 ErrorCodes::CORRUPTED_DATA);
     }
+}
 
-    if (auto it_duplicate = data_parts_by_info.find(part->info); it_duplicate != data_parts_by_info.end())
+void MergeTreeData::checkPartDuplicate(MutableDataPartPtr & part, Transaction & transaction, DataPartsLock & /*lock*/) const
+{
+    auto it_duplicate = data_parts_by_info.find(part->info);
+
+    if (it_duplicate != data_parts_by_info.end())
     {
         String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists";
 
         if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting}))
             throw Exception(message + ", but it will be deleted soon", ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
 
+        if (transaction.txn)
+            throw Exception(message, ErrorCodes::SERIALIZATION_ERROR);
+
         throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART);
     }
 }
@@ -3020,49 +3073,59 @@ bool MergeTreeData::renameTempPartAndReplaceImpl(
     DataPartsLock & lock,
     DataPartsVector * out_covered_parts)
 {
-    LOG_TRACE(log, "Renaming temporary part {} to {}.", part->getDataPartStorage().getPartDirectory(), part->name);
+    LOG_TRACE(log, "Renaming temporary part {} to {} with tid {}.", part->getDataPartStorage().getPartDirectory(), part->name, out_transaction.getTID());
 
     if (&out_transaction.data != this)
-        throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.",
-            ErrorCodes::LOGICAL_ERROR);
+        throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR);
+
+    part->assertState({DataPartState::Temporary});
+    checkPartPartition(part, lock);
+    checkPartDuplicate(part, out_transaction, lock);
+
+    PartHierarchy hierarchy = getPartHierarchy(part->info, DataPartState::Active, lock);
+
+    if (!hierarchy.intersected_parts.empty())
+    {
+        String message = fmt::format("Part {} intersects part {}", part->name, hierarchy.intersected_parts.back()->getNameWithState());
+
+        // Drop part|partition operation inside some transactions sees some stale snapshot from the time when transactions has been started.
+        // So such operation may attempt to delete already outdated part. In this case, this outdated part is most likely covered by the other part and intersection may occur.
+        // Part mayght be outdated due to merge|mutation|update|optimization operations.
+        if (part->isEmpty() || (hierarchy.intersected_parts.size() == 1 && hierarchy.intersected_parts.back()->isEmpty()))
+        {
+            message += fmt::format(" One of them is empty part. That is a race between drop operation under transaction and a merge/mutation.");
+            throw Exception(message, ErrorCodes::SERIALIZATION_ERROR);
+        }
+
+        if (hierarchy.intersected_parts.size() > 1)
+            message += fmt::format(" There are {} intersected parts.", hierarchy.intersected_parts.size());
+
+        throw Exception(ErrorCodes::LOGICAL_ERROR, message + " It is a bug.");
+    }
 
     if (part->hasLightweightDelete())
         has_lightweight_delete_parts.store(true);
 
-    checkPartCanBeAddedToTable(part, lock);
-
-    DataPartPtr covering_part;
-    DataPartsVector covered_parts = getActivePartsToReplace(part->info, part->name, covering_part, lock);
-
-    if (covering_part)
-    {
-        LOG_WARNING(log, "Tried to add obsolete part {} covered by {}", part->name, covering_part->getNameWithState());
-        return false;
-    }
-
     /// All checks are passed. Now we can rename the part on disk.
     /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts
     preparePartForCommit(part, out_transaction);
 
     if (out_covered_parts)
     {
-        out_covered_parts->reserve(covered_parts.size());
-
-        for (DataPartPtr & covered_part : covered_parts)
-            out_covered_parts->emplace_back(std::move(covered_part));
+        out_covered_parts->reserve(out_covered_parts->size() + hierarchy.covered_parts.size());
+        std::move(hierarchy.covered_parts.begin(), hierarchy.covered_parts.end(), std::back_inserter(*out_covered_parts));
     }
 
     return true;
 }
 
-MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplaceUnlocked(
+bool MergeTreeData::renameTempPartAndReplaceUnlocked(
     MutableDataPartPtr & part,
     Transaction & out_transaction,
-    DataPartsLock & lock)
+    DataPartsLock & lock,
+    DataPartsVector * out_covered_parts)
 {
-    DataPartsVector covered_parts;
-    renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts);
-    return covered_parts;
+    return renameTempPartAndReplaceImpl(part, out_transaction, lock, out_covered_parts);
 }
 
 MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
@@ -3070,7 +3133,26 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
     Transaction & out_transaction)
 {
     auto part_lock = lockParts();
-    return renameTempPartAndReplaceUnlocked(part, out_transaction, part_lock);
+    DataPartsVector covered_parts;
+    renameTempPartAndReplaceImpl(part, out_transaction, part_lock, &covered_parts);
+    return covered_parts;
+}
+
+bool MergeTreeData::renameTempPartAndAdd(
+    MutableDataPartPtr & part,
+    Transaction & out_transaction,
+    DataPartsLock & lock)
+{
+    DataPartsVector covered_parts;
+
+    if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts))
+        return false;
+
+    if (!covered_parts.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Added part {} covers {} existing part(s) (including {})",
+            part->name, toString(covered_parts.size()), covered_parts[0]->name);
+
+    return true;
 }
 
 void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock)
@@ -4547,17 +4629,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
 
 DataPartsVector MergeTreeData::getVisibleDataPartsVector(ContextPtr local_context) const
 {
-    DataPartsVector res;
-    if (const auto * txn = local_context->getCurrentTransaction().get())
-    {
-        res = getDataPartsVectorForInternalUsage({DataPartState::Active, DataPartState::Outdated});
-        filterVisibleDataParts(res, txn->getSnapshot(), txn->tid);
-    }
-    else
-    {
-        res = getDataPartsVectorForInternalUsage();
-    }
-    return res;
+    return getVisibleDataPartsVector(local_context->getCurrentTransaction());
 }
 
 DataPartsVector MergeTreeData::getVisibleDataPartsVectorUnlocked(ContextPtr local_context, const DataPartsLock & lock) const
@@ -4609,17 +4681,8 @@ void MergeTreeData::filterVisibleDataParts(DataPartsVector & maybe_visible_parts
     std::erase_if(maybe_visible_parts, need_remove_pred);
     [[maybe_unused]] size_t visible_size = maybe_visible_parts.size();
 
-
-    auto get_part_names = [&maybe_visible_parts]() -> Strings
-    {
-        Strings visible_part_names;
-        for (const auto & p : maybe_visible_parts)
-            visible_part_names.push_back(p->name);
-        return visible_part_names;
-    };
-
     LOG_TEST(log, "Got {} parts (of {}) visible in snapshot {} (TID {}): {}",
-             visible_size, total_size, snapshot_version, current_tid, fmt::join(get_part_names(), ", "));
+             visible_size, total_size, snapshot_version, current_tid, fmt::join(getPartsNamesWithStates(maybe_visible_parts), ", "));
 }
 
 
@@ -5106,6 +5169,7 @@ CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_c
         static_cast<double>(part_size_compressed) / getTotalActiveSizeInBytes());
 }
 
+
 MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const
 {
     DataParts res;
@@ -5168,11 +5232,16 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState()
     clear();
 }
 
+TransactionID MergeTreeData::Transaction::getTID() const
+{
+    if (txn)
+        return txn->tid;
+    return Tx::PrehistoricTID;
+}
+
 void MergeTreeData::Transaction::addPart(MutableDataPartPtr & part)
 {
     precommitted_parts.insert(part);
-    if (asInMemoryPart(part))
-        has_in_memory_parts = true;
 }
 
 void MergeTreeData::Transaction::rollback()
@@ -5180,11 +5249,14 @@ void MergeTreeData::Transaction::rollback()
     if (!isEmpty())
     {
         WriteBufferFromOwnString buf;
-        buf << " Removing parts:";
+        buf << "Removing parts:";
         for (const auto & part : precommitted_parts)
             buf << " " << part->getDataPartStorage().getPartDirectory();
         buf << ".";
-        LOG_DEBUG(data.log, "Undoing transaction.{}", buf.str());
+        LOG_DEBUG(data.log, "Undoing transaction {}. {}", getTID(), buf.str());
+
+        for (const auto & part : precommitted_parts)
+            part->version.creation_csn.store(Tx::RolledBackCSN);
 
         auto lock = data.lockParts();
 
@@ -5215,7 +5287,6 @@ void MergeTreeData::Transaction::rollback()
 void MergeTreeData::Transaction::clear()
 {
     precommitted_parts.clear();
-    has_in_memory_parts = false;
 }
 
 MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData::DataPartsLock * acquired_parts_lock)
@@ -5232,26 +5303,41 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData:
             if (part->getDataPartStorage().hasActiveTransaction())
                 part->getDataPartStorage().commitTransaction();
 
-        bool commit_to_wal = has_in_memory_parts && settings->in_memory_parts_enable_wal;
-        if (txn || commit_to_wal)
-        {
-            MergeTreeData::WriteAheadLogPtr wal;
-            if (commit_to_wal)
-                wal = data.getWriteAheadLog();
-
+        if (txn)
             for (const auto & part : precommitted_parts)
             {
-                if (txn)
-                {
-                    DataPartPtr covering_part;
-                    DataPartsVector covered_parts = data.getActivePartsToReplace(part->info, part->name, covering_part, *owing_parts_lock);
-                    MergeTreeTransaction::addNewPartAndRemoveCovered(data.shared_from_this(), part, covered_parts, txn);
-                }
+                DataPartPtr covering_part;
+                DataPartsVector covered_active_parts = data.getActivePartsToReplace(part->info, part->name, covering_part, *owing_parts_lock);
 
-                if (auto part_in_memory = asInMemoryPart(part))
-                    wal->addPart(part_in_memory);
+                /// outdated parts should be also collected here
+                /// the visible outdated parts should be tried to be removed
+                /// more likely the conflict happens at the removing visible outdated parts, what is right actually
+                DataPartsVector covered_outdated_parts = data.getCoveredOutdatedParts(part, *owing_parts_lock);
+
+                LOG_TEST(data.log, "Got {} oudated parts covered by {} (TID {} CSN {}): {}",
+                         covered_outdated_parts.size(), part->getNameWithState(), txn->tid, txn->getSnapshot(), fmt::join(getPartsNames(covered_outdated_parts), ", "));
+                data.filterVisibleDataParts(covered_outdated_parts, txn->getSnapshot(), txn->tid);
+
+                DataPartsVector covered_parts;
+                covered_parts.reserve(covered_active_parts.size() + covered_outdated_parts.size());
+                std::move(covered_active_parts.begin(), covered_active_parts.end(), std::back_inserter(covered_parts));
+                std::move(covered_outdated_parts.begin(), covered_outdated_parts.end(), std::back_inserter(covered_parts));
+
+                MergeTreeTransaction::addNewPartAndRemoveCovered(data.shared_from_this(), part, covered_parts, txn);
             }
-        }
+
+        MergeTreeData::WriteAheadLogPtr wal;
+        auto get_inited_wal = [&] ()
+        {
+            if (!wal)
+                wal = data.getWriteAheadLog();
+            return wal;
+        };
+
+        if (settings->in_memory_parts_enable_wal)
+            for (const auto & part : precommitted_parts)
+                if (auto part_in_memory = asInMemoryPart(part))
+                    get_inited_wal()->addPart(part_in_memory);
 
         NOEXCEPT_SCOPE({
             auto current_time = time(nullptr);
@@ -5296,6 +5382,10 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData:
 
                         data.modifyPartState(covered_part, DataPartState::Outdated);
                         data.removePartContributionToColumnAndSecondaryIndexSizes(covered_part);
+
+                        if (settings->in_memory_parts_enable_wal)
+                            if (isInMemoryPart(covered_part))
+                                get_inited_wal()->dropPart(covered_part->name);
                     }
 
                     reduce_parts += covered_parts.size();
@@ -6278,24 +6368,38 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
     auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
     auto src_part_storage = src_part->getDataPartStoragePtr();
 
+    scope_guard src_flushed_tmp_dir_lock;
+    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
+
     /// If source part is in memory, flush it to disk and clone it already in on-disk format
+    /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock
+    /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor
     if (auto src_part_in_memory = asInMemoryPart(src_part))
     {
-        auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
-        src_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot);
+        auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
+
+        auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
+        src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
+
+        auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot);
+        src_flushed_tmp_part = createPart(src_part->name, src_part->info, flushed_part_storage);
+        src_flushed_tmp_part->is_temp = true;
+
+        src_part_storage = flushed_part_storage;
     }
 
     String with_copy;
     if (copy_instead_of_hardlink)
         with_copy = " (copying data)";
 
-    LOG_DEBUG(log, "Cloning part {} to {}{}",
-              src_part_storage->getFullPath(),
-              std::string(fs::path(src_part_storage->getFullRootPath()) / tmp_dst_part_name),
-              with_copy);
-
     auto dst_part_storage = src_part_storage->freeze(relative_data_path, tmp_dst_part_name, /* make_source_readonly */ false, {}, copy_instead_of_hardlink, files_to_copy_instead_of_hardlinks);
 
+    LOG_DEBUG(log, "Clone {} part {} to {}{}",
+              src_flushed_tmp_part ? "flushed" : "",
+              src_part_storage->getFullPath(),
+              std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name),
+              with_copy);
+
     auto dst_data_part = createPart(dst_part_name, dst_part_info, dst_part_storage);
 
     if (!copy_instead_of_hardlink && hardlinked_files)
@@ -6461,12 +6565,21 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
         LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
 
         auto data_part_storage = part->getDataPartStoragePtr();
-        String src_part_path = data_part_storage->getRelativePath();
         String backup_part_path = fs::path(backup_path) / relative_data_path;
+
+        scope_guard src_flushed_tmp_dir_lock;
+        MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
+
         if (auto part_in_memory = asInMemoryPart(part))
         {
-            auto flushed_part_path = part_in_memory->getRelativePathForPrefix("tmp_freeze");
-            data_part_storage = part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot);
+            auto flushed_part_path = *part_in_memory->getRelativePathForPrefix("tmp_freeze");
+            src_flushed_tmp_dir_lock = part->storage.getTemporaryPartDirectoryHolder("tmp_freeze" + part->name);
+
+            auto flushed_part_storage = part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot);
+            src_flushed_tmp_part = createPart(part->name, part->info, flushed_part_storage);
+            src_flushed_tmp_part->is_temp = true;
+
+            data_part_storage = flushed_part_storage;
         }
 
         auto callback = [this, &part, &backup_part_path](const DiskPtr & disk)
@@ -6551,6 +6664,7 @@ bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const
         if (canUseAdaptiveGranularity() && !src_part->index_granularity_info.mark_type.adaptive)
             return false;
     }
+
     return true;
 }
 
@@ -6596,6 +6710,7 @@ try
 
     part_log_elem.database_name = table_id.database_name;
     part_log_elem.table_name = table_id.table_name;
+    part_log_elem.table_uuid = table_id.uuid;
     part_log_elem.partition_id = MergeTreePartInfo::fromPartName(new_part_name, format_version).partition_id;
     part_log_elem.part_name = new_part_name;
 
@@ -6646,7 +6761,7 @@ MergeTreeData::CurrentlyMovingPartsTagger::CurrentlyMovingPartsTagger(MergeTreeM
 MergeTreeData::CurrentlyMovingPartsTagger::~CurrentlyMovingPartsTagger()
 {
     std::lock_guard lock(data.moving_parts_mutex);
-    for (const auto & moving_part : parts_to_move)
+    for (auto & moving_part : parts_to_move)
     {
         /// Something went completely wrong
         if (!data.currently_moving_parts.contains(moving_part.part))
@@ -6772,6 +6887,14 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge
                 nullptr);
         };
 
+        // Register in global moves list (StorageSystemMoves)
+        auto moves_list_entry = getContext()->getMovesList().insert(
+            getStorageID(),
+            moving_part.part->name,
+            moving_part.reserved_space->getDisk()->getName(),
+            moving_part.reserved_space->getDisk()->getPath(),
+            moving_part.part->getBytesOnDisk());
+
         try
         {
             /// If zero-copy replication enabled than replicas shouldn't try to
@@ -7211,6 +7334,89 @@ void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type)
     }
 }
 
+MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart(
+        MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name,
+        const MergeTreeTransactionPtr & txn)
+{
+    auto metadata_snapshot = getInMemoryMetadataPtr();
+    auto settings = getSettings();
+
+    auto block = metadata_snapshot->getSampleBlock();
+    NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
+    setAllObjectsToDummyTupleType(columns);
+
+    auto minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
+    minmax_idx->update(block, getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
+
+    DB::IMergeTreeDataPart::TTLInfos move_ttl_infos;
+    VolumePtr volume = getStoragePolicy()->getVolume(0);
+    ReservationPtr reservation = reserveSpacePreferringTTLRules(metadata_snapshot, 0, move_ttl_infos, time(nullptr), 0, true);
+    VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume);
+
+    auto new_data_part_storage = std::make_shared<DataPartStorageOnDisk>(
+        data_part_volume,
+        getRelativeDataPath(),
+        EMPTY_PART_TMP_PREFIX + new_part_name);
+
+    auto new_data_part = createPart(
+        new_part_name,
+        choosePartTypeOnDisk(0, block.rows()),
+        new_part_info,
+        new_data_part_storage
+        );
+
+    new_data_part->name = new_part_name;
+
+    if (settings->assign_part_uuids)
+        new_data_part->uuid = UUIDHelpers::generateV4();
+
+    new_data_part->setColumns(columns, {});
+    new_data_part->rows_count = block.rows();
+
+    new_data_part->partition = partition;
+
+    new_data_part->minmax_idx = std::move(minmax_idx);
+    new_data_part->is_temp = true;
+
+    SyncGuardPtr sync_guard;
+    if (new_data_part->isStoredOnDisk())
+    {
+        /// The name could be non-unique in case of stale files from previous runs.
+        if (new_data_part_storage->exists())
+        {
+            /// The path has to be unique, all tmp directories are deleted at startup in case of stale files from previous runs.
+            /// New part have to capture its name, therefore there is no concurrentcy in directory creation
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "New empty part is about to matirialize but the dirrectory already exist"
+                            ", new part {}"
+                            ", directory {}",
+                            new_part_name, new_data_part_storage->getFullPath());
+        }
+
+        new_data_part_storage->createDirectories();
+
+        if (getSettings()->fsync_part_directory)
+            sync_guard = new_data_part_storage->getDirectorySyncGuard();
+    }
+
+    /// This effectively chooses minimal compression method:
+    ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
+    auto compression_codec = getContext()->chooseCompressionCodec(0, 0);
+
+    const auto & index_factory = MergeTreeIndexFactory::instance();
+    MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns,
+        index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, txn);
+
+    bool sync_on_insert = settings->fsync_after_insert;
+
+    out.write(block);
+    /// Here is no projections as no data inside
+
+    out.finalizePart(new_data_part, sync_on_insert);
+
+    return new_data_part;
+}
+
 CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger()
 {
     std::lock_guard lock(storage.currently_submerging_emerging_mutex);
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 8bd0fc1f280..e1cc62b5e35 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -9,6 +9,7 @@
 #include <IO/ReadBufferFromFile.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Disks/StoragePolicy.h>
 #include <Processors/Merges/Algorithms/Graphite.h>
 #include <Storages/MergeTree/BackgroundJobsAssignee.h>
 #include <Storages/MergeTree/MergeTreeIndices.h>
@@ -29,7 +30,6 @@
 #include <Storages/extractKeyExpressionList.h>
 #include <Storages/PartitionCommands.h>
 #include <Interpreters/PartLog.h>
-#include <Disks/StoragePolicy.h>
 
 
 #include <boost/multi_index_container.hpp>
@@ -220,6 +220,9 @@ public:
     using DataPartsLock = std::unique_lock<std::mutex>;
     DataPartsLock lockParts() const { return DataPartsLock(data_parts_mutex); }
 
+    using OperationDataPartsLock = std::unique_lock<std::mutex>;
+    OperationDataPartsLock lockOperationsWithParts() const { return OperationDataPartsLock(operation_with_data_parts_mutex); }
+
     MergeTreeDataPartType choosePartType(size_t bytes_uncompressed, size_t rows_count) const;
     MergeTreeDataPartType choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const;
 
@@ -271,6 +274,8 @@ public:
             }
         }
 
+        TransactionID getTID() const;
+
     private:
         friend class MergeTreeData;
 
@@ -278,7 +283,6 @@ public:
         MergeTreeTransaction * txn;
         MutableDataParts precommitted_parts;
         MutableDataParts locked_parts;
-        bool has_in_memory_parts = false;
 
         void clear();
     };
@@ -563,10 +567,11 @@ public:
         Transaction & out_transaction);
 
     /// Unlocked version of previous one. Useful when added multiple parts with a single lock.
-    DataPartsVector renameTempPartAndReplaceUnlocked(
+    bool renameTempPartAndReplaceUnlocked(
         MutableDataPartPtr & part,
         Transaction & out_transaction,
-        DataPartsLock & lock);
+        DataPartsLock & lock,
+        DataPartsVector * out_covered_parts = nullptr);
 
     /// Remove parts from working set immediately (without wait for background
     /// process). Transfer part state to temporary. Have very limited usage only
@@ -917,6 +922,9 @@ public:
     using WriteAheadLogPtr = std::shared_ptr<MergeTreeWriteAheadLog>;
     WriteAheadLogPtr getWriteAheadLog();
 
+    constexpr static auto EMPTY_PART_TMP_PREFIX = "tmp_empty_";
+    MergeTreeData::MutableDataPartPtr createEmptyPart(MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name, const MergeTreeTransactionPtr & txn);
+
     MergeTreeDataFormatVersion format_version;
 
     /// Merging params - what additional actions to perform during merge.
@@ -1025,7 +1033,7 @@ public:
     using MatcherFn = std::function<bool(const String &)>;
 
     /// Returns an object that protects temporary directory from cleanup
-    scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name);
+    scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name) const;
 
 protected:
     friend class IMergeTreeDataPart;
@@ -1108,6 +1116,10 @@ protected:
     DataPartsIndexes::index<TagByInfo>::type & data_parts_by_info;
     DataPartsIndexes::index<TagByStateAndInfo>::type & data_parts_by_state_and_info;
 
+    /// Mutex for critical sections which alter set of parts
+    /// It is like truncate, drop/detach partition
+    mutable std::mutex operation_with_data_parts_mutex;
+
     /// Current description of columns of data type Object.
     /// It changes only when set of parts is changed and is
     /// protected by @data_parts_mutex.
@@ -1217,6 +1229,23 @@ protected:
         DataPartPtr & out_covering_part,
         DataPartsLock & data_parts_lock) const;
 
+    DataPartsVector getCoveredOutdatedParts(
+        const DataPartPtr & part,
+        DataPartsLock & data_parts_lock) const;
+
+    struct PartHierarchy
+    {
+        DataPartPtr duplicate_part;
+        DataPartsVector covering_parts;
+        DataPartsVector covered_parts;
+        DataPartsVector intersected_parts;
+    };
+
+    PartHierarchy getPartHierarchy(
+        const MergeTreePartInfo & part_info,
+        DataPartState state,
+        DataPartsLock & /* data_parts_lock */) const;
+
     /// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument.
     bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const;
 
@@ -1286,8 +1315,9 @@ protected:
     static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type);
 
 private:
-    /// Checking that candidate part doesn't break invariants: correct partition and doesn't exist already
-    void checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const;
+    /// Checking that candidate part doesn't break invariants: correct partition
+    void checkPartPartition(MutableDataPartPtr & part, DataPartsLock & lock) const;
+    void checkPartDuplicate(MutableDataPartPtr & part, Transaction & transaction, DataPartsLock & lock) const;
 
     /// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes
     /// in precommitted state and to transaction
@@ -1377,7 +1407,7 @@ private:
 
     static MutableDataPartPtr preparePartForRemoval(const DataPartPtr & part);
 
-    TemporaryParts temporary_parts;
+    mutable TemporaryParts temporary_parts;
 };
 
 /// RAII struct to record big parts that are submerging or emerging.
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index fcc1b4cb3e2..79670c0ab27 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -244,7 +244,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
             * So we have to check if this part is currently being inserted with quorum and so on and so forth.
             * Obviously we have to check it manually only for the first part
             * of each partition because it will be automatically checked for a pair of parts. */
-            if (!can_merge_callback(nullptr, part, txn.get(), nullptr))
+            if (!can_merge_callback(nullptr, part, txn.get(), out_disable_reason))
                 continue;
 
             /// This part can be merged only with next parts (no prev part exists), so start
@@ -256,7 +256,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
         {
             /// If we cannot merge with previous part we had to start new parts
             /// interval (in the same partition)
-            if (!can_merge_callback(*prev_part, part, txn.get(), nullptr))
+            if (!can_merge_callback(*prev_part, part, txn.get(), out_disable_reason))
             {
                 /// Now we have no previous part
                 prev_part = nullptr;
@@ -268,7 +268,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
                 /// for example, merge is already assigned for such parts, or they participate in quorum inserts
                 /// and so on.
                 /// Also we don't start new interval here (maybe all next parts cannot be merged and we don't want to have empty interval)
-                if (!can_merge_callback(nullptr, part, txn.get(), nullptr))
+                if (!can_merge_callback(nullptr, part, txn.get(), out_disable_reason))
                     continue;
 
                 /// Starting new interval in the same partition
@@ -314,18 +314,32 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
     if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled())
     {
         /// TTL delete is preferred to recompression
-        TTLDeleteMergeSelector delete_ttl_selector(
+        TTLDeleteMergeSelector drop_ttl_selector(
                 next_delete_ttl_merge_times_by_partition,
                 current_time,
                 data_settings->merge_with_ttl_timeout,
-                data_settings->ttl_only_drop_parts);
+                true);
 
-        parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge);
+        /// The size of the completely expired part of TTL drop is not affected by the merge pressure and the size of the storage space
+        parts_to_merge = drop_ttl_selector.select(parts_ranges, data_settings->max_bytes_to_merge_at_max_space_in_pool);
         if (!parts_to_merge.empty())
         {
             future_part->merge_type = MergeType::TTLDelete;
         }
-        else if (metadata_snapshot->hasAnyRecompressionTTL())
+        else if (!data_settings->ttl_only_drop_parts)
+        {
+            TTLDeleteMergeSelector delete_ttl_selector(
+                next_delete_ttl_merge_times_by_partition,
+                current_time,
+                data_settings->merge_with_ttl_timeout,
+                false);
+
+            parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge);
+            if (!parts_to_merge.empty())
+                future_part->merge_type = MergeType::TTLDelete;
+        }
+
+        if (parts_to_merge.empty() && metadata_snapshot->hasAnyRecompressionTTL())
         {
             TTLRecompressMergeSelector recompress_ttl_selector(
                     next_recompress_ttl_merge_times_by_partition,
@@ -621,8 +635,16 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart
 size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts)
 {
     size_t res = 0;
+    time_t current_time = std::time(nullptr);
     for (const MergeTreeData::DataPartPtr & part : source_parts)
+    {
+        /// Exclude expired parts
+        time_t part_max_ttl = part->ttl_infos.part_max_ttl;
+        if (part_max_ttl && part_max_ttl <= current_time)
+            continue;
+
         res += part->getBytesOnDisk();
+    }
 
     return static_cast<size_t>(res * DISK_USAGE_COEFFICIENT_TO_RESERVE);
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 0318fc0648c..1f0ac007e5d 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -408,7 +408,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
                     /* group_by_use_nulls */ false,
                     std::move(group_by_info),
                     std::move(group_by_sort_description),
-                    should_produce_results_in_order_of_bucket_number);
+                    should_produce_results_in_order_of_bucket_number,
+                    settings.enable_memory_bound_merging_of_aggregation_results);
                 query_plan->addStep(std::move(aggregating_step));
             };
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index 743bb504dbd..4dd0614015c 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -70,15 +70,18 @@ namespace ErrorCodes
     extern const int INCORRECT_NUMBER_OF_COLUMNS;
     extern const int INCORRECT_QUERY;
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
-MergeTreeIndexGranuleAnnoy::MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_)
+template <typename Distance>
+MergeTreeIndexGranuleAnnoy<Distance>::MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_)
     : index_name(index_name_)
     , index_sample_block(index_sample_block_)
     , index(nullptr)
 {}
 
-MergeTreeIndexGranuleAnnoy::MergeTreeIndexGranuleAnnoy(
+template <typename Distance>
+MergeTreeIndexGranuleAnnoy<Distance>::MergeTreeIndexGranuleAnnoy(
     const String & index_name_,
     const Block & index_sample_block_,
     AnnoyIndexPtr index_base_)
@@ -87,7 +90,8 @@ MergeTreeIndexGranuleAnnoy::MergeTreeIndexGranuleAnnoy(
     , index(std::move(index_base_))
 {}
 
-void MergeTreeIndexGranuleAnnoy::serializeBinary(WriteBuffer & ostr) const
+template <typename Distance>
+void MergeTreeIndexGranuleAnnoy<Distance>::serializeBinary(WriteBuffer & ostr) const
 {
     /// number of dimensions is required in the constructor,
     /// so it must be written and read separately from the other part
@@ -95,7 +99,8 @@ void MergeTreeIndexGranuleAnnoy::serializeBinary(WriteBuffer & ostr) const
     index->serialize(ostr);
 }
 
-void MergeTreeIndexGranuleAnnoy::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
+template <typename Distance>
+void MergeTreeIndexGranuleAnnoy<Distance>::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
 {
     uint64_t dimension;
     readIntBinary(dimension, istr);
@@ -103,8 +108,8 @@ void MergeTreeIndexGranuleAnnoy::deserializeBinary(ReadBuffer & istr, MergeTreeI
     index->deserialize(istr);
 }
 
-
-MergeTreeIndexAggregatorAnnoy::MergeTreeIndexAggregatorAnnoy(
+template <typename Distance>
+MergeTreeIndexAggregatorAnnoy<Distance>::MergeTreeIndexAggregatorAnnoy(
     const String & index_name_,
     const Block & index_sample_block_,
     uint64_t number_of_trees_)
@@ -113,16 +118,18 @@ MergeTreeIndexAggregatorAnnoy::MergeTreeIndexAggregatorAnnoy(
     , number_of_trees(number_of_trees_)
 {}
 
-MergeTreeIndexGranulePtr MergeTreeIndexAggregatorAnnoy::getGranuleAndReset()
+template <typename Distance>
+MergeTreeIndexGranulePtr MergeTreeIndexAggregatorAnnoy<Distance>::getGranuleAndReset()
 {
     // NOLINTNEXTLINE(*)
     index->build(static_cast<int>(number_of_trees), /*number_of_threads=*/1);
-    auto granule = std::make_shared<MergeTreeIndexGranuleAnnoy>(index_name, index_sample_block, index);
+    auto granule = std::make_shared<MergeTreeIndexGranuleAnnoy<Distance> >(index_name, index_sample_block, index);
     index = nullptr;
     return granule;
 }
 
-void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, size_t limit)
+template <typename Distance>
+void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t * pos, size_t limit)
 {
     if (*pos >= block.rows())
         throw Exception(
@@ -193,8 +200,9 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, si
 MergeTreeIndexConditionAnnoy::MergeTreeIndexConditionAnnoy(
     const IndexDescription & /*index*/,
     const SelectQueryInfo & query,
-    ContextPtr context)
-    : condition(query, context)
+    ContextPtr context,
+    const String& distance_name_)
+    : condition(query, context), distance_name(distance_name_)
 {}
 
 
@@ -205,10 +213,28 @@ bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /
 
 bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const
 {
-    return condition.alwaysUnknownOrTrue("L2Distance");
+    return condition.alwaysUnknownOrTrue(distance_name);
 }
 
 std::vector<size_t> MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const
+{
+    if (distance_name == "L2Distance")
+    {
+        return getUsefulRangesImpl<::Annoy::Euclidean>(idx_granule);
+    }
+    else if (distance_name == "cosineDistance")
+    {
+        return getUsefulRangesImpl<::Annoy::Angular>(idx_granule);
+    }
+    else
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown distance name. Must be 'L2Distance' or 'cosineDistance'. Got {}", distance_name);
+    }
+}
+
+
+template <typename Distance>
+std::vector<size_t> MergeTreeIndexConditionAnnoy::getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const
 {
     UInt64 limit = condition.getLimit();
     UInt64 index_granularity = condition.getIndexGranularity();
@@ -220,7 +246,7 @@ std::vector<size_t> MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex
 
     std::vector<float> target_vec = condition.getTargetVector();
 
-    auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleAnnoy>(idx_granule);
+    auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleAnnoy<Distance> >(idx_granule);
     if (granule == nullptr)
         throw Exception("Granule has the wrong type", ErrorCodes::LOGICAL_ERROR);
 
@@ -267,33 +293,54 @@ std::vector<size_t> MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex
     return result_vector;
 }
 
-
-MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_)
-    : IMergeTreeIndex(index_)
-    , number_of_trees(number_of_trees_)
-{
-}
-
 MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const
 {
-    return std::make_shared<MergeTreeIndexGranuleAnnoy>(index.name, index.sample_block);
+    if (distance_name == "L2Distance")
+    {
+        return std::make_shared<MergeTreeIndexGranuleAnnoy<::Annoy::Euclidean> >(index.name, index.sample_block);
+    }
+    if (distance_name == "cosineDistance")
+    {
+        return std::make_shared<MergeTreeIndexGranuleAnnoy<::Annoy::Angular> >(index.name, index.sample_block);
+    }
+    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown distance name. Must be 'L2Distance' or 'cosineDistance'. Got {}", distance_name);
 }
 
 MergeTreeIndexAggregatorPtr MergeTreeIndexAnnoy::createIndexAggregator() const
 {
-    return std::make_shared<MergeTreeIndexAggregatorAnnoy>(index.name, index.sample_block, number_of_trees);
+    if (distance_name == "L2Distance")
+    {
+        return std::make_shared<MergeTreeIndexAggregatorAnnoy<::Annoy::Euclidean> >(index.name, index.sample_block, number_of_trees);
+    }
+    if (distance_name == "cosineDistance")
+    {
+        return std::make_shared<MergeTreeIndexAggregatorAnnoy<::Annoy::Angular> >(index.name, index.sample_block, number_of_trees);
+    }
+    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown distance name. Must be 'L2Distance' or 'cosineDistance'. Got {}", distance_name);
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(
     const SelectQueryInfo & query, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeIndexConditionAnnoy>(index, query, context);
+    return std::make_shared<MergeTreeIndexConditionAnnoy>(index, query, context, distance_name);
 };
 
 MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index)
 {
-    uint64_t param = index.arguments[0].get<uint64_t>();
-    return std::make_shared<MergeTreeIndexAnnoy>(index, param);
+    uint64_t param = 100;
+    String distance_name = "L2Distance";
+    if (!index.arguments.empty() && !index.arguments[0].tryGet<uint64_t>(param))
+    {
+        if (!index.arguments[0].tryGet<String>(distance_name))
+        {
+            throw Exception("Can't parse first argument", ErrorCodes::INCORRECT_DATA);
+        }
+    }
+    if (index.arguments.size() > 1 && !index.arguments[1].tryGet<String>(distance_name))
+    {
+        throw Exception("Can't parse second argument", ErrorCodes::INCORRECT_DATA);
+    }
+    return std::make_shared<MergeTreeIndexAnnoy>(index, param, distance_name);
 }
 
 static void assertIndexColumnsType(const Block & header)
@@ -332,13 +379,18 @@ static void assertIndexColumnsType(const Block & header)
 
 void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
 {
-    if (index.arguments.size() != 1)
+    if (index.arguments.size() > 2)
     {
-        throw Exception("Annoy index must have exactly one argument.", ErrorCodes::INCORRECT_QUERY);
+        throw Exception("Annoy index must not have more than two parameters", ErrorCodes::INCORRECT_QUERY);
     }
-    if (index.arguments[0].getType() != Field::Types::UInt64)
+    if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64
+        && index.arguments[0].getType() != Field::Types::String)
     {
-        throw Exception("Annoy index argument must be UInt64.", ErrorCodes::INCORRECT_QUERY);
+        throw Exception("Annoy index first argument must be UInt64 or String.", ErrorCodes::INCORRECT_QUERY);
+    }
+    if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String)
+    {
+        throw Exception("Annoy index second argument must be String.", ErrorCodes::INCORRECT_QUERY);
     }
 
     if (index.column_names.size() != 1 || index.data_types.size() != 1)
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
index 6a844947bd2..3b1a41eb85d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
@@ -17,7 +17,7 @@ namespace ApproximateNearestNeighbour
     using AnnoyIndexThreadedBuildPolicy = ::Annoy::AnnoyIndexMultiThreadedBuildPolicy;
     // TODO: Support different metrics. List of available metrics can be taken from here:
     // https://github.com/spotify/annoy/blob/master/src/annoymodule.cc#L151-L171
-    template <typename Distance = ::Annoy::Euclidean>
+    template <typename Distance>
     class AnnoyIndex : public ::Annoy::AnnoyIndex<UInt64, Float32, Distance, ::Annoy::Kiss64Random, AnnoyIndexThreadedBuildPolicy>
     {
         using Base = ::Annoy::AnnoyIndex<UInt64, Float32, Distance, ::Annoy::Kiss64Random, AnnoyIndexThreadedBuildPolicy>;
@@ -29,9 +29,10 @@ namespace ApproximateNearestNeighbour
     };
 }
 
+template <typename Distance>
 struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule
 {
-    using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<>;
+    using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<Distance>;
     using AnnoyIndexPtr = std::shared_ptr<AnnoyIndex>;
 
     MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_);
@@ -52,10 +53,10 @@ struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule
     AnnoyIndexPtr index;
 };
 
-
+template <typename Distance>
 struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator
 {
-    using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<>;
+    using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<Distance>;
     using AnnoyIndexPtr = std::shared_ptr<AnnoyIndex>;
 
     MergeTreeIndexAggregatorAnnoy(const String & index_name_, const Block & index_sample_block, uint64_t number_of_trees);
@@ -78,7 +79,8 @@ public:
     MergeTreeIndexConditionAnnoy(
         const IndexDescription & index,
         const SelectQueryInfo & query,
-        ContextPtr context);
+        ContextPtr context,
+        const String& distance_name);
 
     bool alwaysUnknownOrTrue() const override;
 
@@ -89,14 +91,24 @@ public:
     ~MergeTreeIndexConditionAnnoy() override = default;
 
 private:
+    template <typename Distance>
+    std::vector<size_t> getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const;
+
     ApproximateNearestNeighbour::ANNCondition condition;
+    const String distance_name;
 };
 
 
 class MergeTreeIndexAnnoy : public IMergeTreeIndex
 {
 public:
-    MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_);
+
+    MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_, const String& distance_name_)
+        : IMergeTreeIndex(index_)
+        , number_of_trees(number_of_trees_)
+        , distance_name(distance_name_)
+    {}
+
     ~MergeTreeIndexAnnoy() override = default;
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
@@ -109,6 +121,7 @@ public:
 
 private:
     const uint64_t number_of_trees;
+    const String distance_name;
 };
 
 
diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h
index c19cc55e74e..60c7e4e8822 100644
--- a/src/Storages/MergeTree/MergeTreePartInfo.h
+++ b/src/Storages/MergeTree/MergeTreePartInfo.h
@@ -154,6 +154,8 @@ struct DetachedPartInfo : public MergeTreePartInfo
         "deleting",
         "tmp-fetch",
         "covered-by-broken",
+        "merge-not-byte-identical",
+        "mutate-not-byte-identical"
     });
 
     static constexpr auto DETACHED_REASONS_REMOVABLE_BY_TIMEOUT = std::to_array<std::string_view>({
@@ -163,7 +165,9 @@ struct DetachedPartInfo : public MergeTreePartInfo
         "ignored",
         "broken-on-start",
         "deleting",
-        "clone"
+        "clone",
+        "merge-not-byte-identical",
+        "mutate-not-byte-identical"
     });
 
     /// NOTE: It may parse part info incorrectly.
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h
index 0266b2daa46..dfb4bb954d7 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.h
+++ b/src/Storages/MergeTree/MergeTreePartsMover.h
@@ -5,14 +5,14 @@
 #include <vector>
 #include <Disks/StoragePolicy.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
+#include <Storages/MergeTree/MovesList.h>
 #include <Common/ActionBlocker.h>
 
 namespace DB
 {
 
 
-/// Active part from storage and destination reservation where
-/// it have to be moved.
+/// Active part from storage and destination reservation where it has to be moved
 struct MergeTreeMoveEntry
 {
     std::shared_ptr<const IMergeTreeDataPart> part;
@@ -54,7 +54,7 @@ public:
 
     /// Replaces cloned part from detached directory into active data parts set.
     /// Replacing part changes state to DeleteOnDestroy and will be removed from disk after destructor of
-    ///IMergeTreeDataPart called. If replacing part doesn't exists or not active (committed) than
+    /// IMergeTreeDataPart called. If replacing part doesn't exists or not active (committed) than
     /// cloned part will be removed and log message will be reported. It may happen in case of concurrent
     /// merge or mutation.
     void swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_parts) const;
diff --git a/src/Storages/MergeTree/MovesList.cpp b/src/Storages/MergeTree/MovesList.cpp
new file mode 100644
index 00000000000..730cd44a697
--- /dev/null
+++ b/src/Storages/MergeTree/MovesList.cpp
@@ -0,0 +1,37 @@
+#include <Storages/MergeTree/MovesList.h>
+#include <Common/CurrentMetrics.h>
+#include <base/getThreadId.h>
+
+namespace DB
+{
+
+MovesListElement::MovesListElement(
+        const StorageID & table_id_,
+        const std::string & part_name_,
+        const std::string & target_disk_name_,
+        const std::string & target_disk_path_,
+        UInt64 part_size_)
+    : table_id(table_id_)
+    , part_name(part_name_)
+    , target_disk_name(target_disk_name_)
+    , target_disk_path(target_disk_path_)
+    , part_size(part_size_)
+    , thread_id(getThreadId())
+{
+}
+
+MoveInfo MovesListElement::getInfo() const
+{
+    MoveInfo res;
+    res.database = table_id.database_name;
+    res.table = table_id.table_name;
+    res.part_name = part_name;
+    res.target_disk_name = target_disk_name;
+    res.target_disk_path = target_disk_path;
+    res.part_size = part_size;
+    res.elapsed = watch.elapsedSeconds();
+    res.thread_id = thread_id;
+    return res;
+}
+
+}
diff --git a/src/Storages/MergeTree/MovesList.h b/src/Storages/MergeTree/MovesList.h
new file mode 100644
index 00000000000..42f0901b41d
--- /dev/null
+++ b/src/Storages/MergeTree/MovesList.h
@@ -0,0 +1,64 @@
+#pragma once
+#include <Storages/MergeTree/BackgroundProcessList.h>
+#include <Interpreters/StorageID.h>
+#include <Common/Stopwatch.h>
+#include <Common/CurrentMetrics.h>
+#include <Poco/URI.h>
+#include <boost/noncopyable.hpp>
+
+namespace CurrentMetrics
+{
+    extern const Metric Move;
+}
+
+namespace DB
+{
+
+struct MoveInfo
+{
+    std::string database;
+    std::string table;
+    std::string part_name;
+    std::string target_disk_name;
+    std::string target_disk_path;
+    UInt64 part_size;
+
+    Float64 elapsed;
+    UInt64 thread_id;
+};
+
+struct MovesListElement : private boost::noncopyable
+{
+    const StorageID table_id;
+    const std::string part_name;
+    const std::string target_disk_name;
+    const std::string target_disk_path;
+    const UInt64 part_size;
+
+    Stopwatch watch;
+    const UInt64 thread_id;
+
+    MovesListElement(
+        const StorageID & table_id_,
+        const std::string & part_name_,
+        const std::string & target_disk_name_,
+        const std::string & target_disk_path_,
+        UInt64 part_size_);
+
+    MoveInfo getInfo() const;
+};
+
+
+/// List of currently processing moves
+class MovesList final : public BackgroundProcessList<MovesListElement, MoveInfo>
+{
+private:
+    using Parent = BackgroundProcessList<MovesListElement, MoveInfo>;
+
+public:
+    MovesList()
+        : Parent(CurrentMetrics::Move)
+    {}
+};
+
+}
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 2d7afeafd0d..176d45367f6 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -732,27 +732,126 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper
 namespace
 {
 
-Names getPartNamesToMutate(
-    const ReplicatedMergeTreeMutationEntry & mutation, const ActiveDataPartSet & parts, const DropPartsRanges & drop_ranges)
-{
-    Names result;
-    for (const auto & pair : mutation.block_numbers)
-    {
-        const String & partition_id = pair.first;
-        Int64 block_num = pair.second;
 
+/// Simplified representation of queue entry. Contain two sets
+/// 1) Which parts we will receive after entry execution
+/// 2) Which parts we will drop/remove after entry execution
+///
+/// We use this representation to understand which parts mutation actually have to mutate.
+struct QueueEntryRepresentation
+{
+    std::vector<std::string> produced_parts;
+    std::vector<std::string> dropped_parts;
+};
+
+using QueueRepresentation = std::map<std::string, QueueEntryRepresentation>;
+
+/// Produce a map from queue znode name to simplified entry representation.
+QueueRepresentation getQueueRepresentation(const std::list<ReplicatedMergeTreeLogEntryPtr> & entries, MergeTreeDataFormatVersion format_version)
+{
+    using LogEntryType = ReplicatedMergeTreeLogEntryData::Type;
+    QueueRepresentation result;
+    for (const auto & entry : entries)
+    {
+        const auto & key = entry->znode_name;
+        switch (entry->type)
+        {
+            /// explicetely specify all types of entries without default, so if
+            /// someone decide to add new type it will produce a compiler warning (error in our case)
+            case LogEntryType::GET_PART:
+            case LogEntryType::ATTACH_PART:
+            case LogEntryType::MERGE_PARTS:
+            case LogEntryType::MUTATE_PART:
+            {
+                result[key].produced_parts.push_back(entry->new_part_name);
+                break;
+            }
+            case LogEntryType::REPLACE_RANGE:
+            {
+                /// Quite tricky entry, it both produce and drop parts (in some cases)
+                const auto & new_parts = entry->replace_range_entry->new_part_names;
+                auto & produced_parts = result[key].produced_parts;
+                produced_parts.insert(
+                    produced_parts.end(), new_parts.begin(), new_parts.end());
+
+                if (auto drop_range = entry->getDropRange(format_version))
+                {
+                    auto & dropped_parts = result[key].dropped_parts;
+                    dropped_parts.push_back(*drop_range);
+                }
+                break;
+            }
+            case LogEntryType::DROP_RANGE:
+            {
+                result[key].dropped_parts.push_back(entry->new_part_name);
+                break;
+            }
+            /// These entries don't produce/drop any parts
+            case LogEntryType::EMPTY:
+            case LogEntryType::ALTER_METADATA:
+            case LogEntryType::CLEAR_INDEX:
+            case LogEntryType::CLEAR_COLUMN:
+            case LogEntryType::SYNC_PINNED_PART_UUIDS:
+            case LogEntryType::CLONE_PART_FROM_SHARD:
+            {
+                break;
+            }
+        }
+    }
+    return result;
+}
+
+/// Try to understand which part we need to mutate to finish mutation. In ReplicatedQueue we have two sets of parts:
+/// current parts -- set of parts which we actually have (on disk)
+/// virtual parts -- set of parts which we will have after we will execute our queue
+///
+/// From the first glance it can sound that these two sets should be enough to understand which parts we have to mutate
+/// to finish mutation but it's not true:
+/// 1) Obviously we cannot rely on current_parts because we can have stale state (some parts are absent, some merges not finished). We also have to account parts which we will
+///    get after queue execution.
+/// 2) But we cannot rely on virtual_parts for this, because they contain parts which we will get after we have executed our queue. So if we need to execute mutation 0000000001 for part all_0_0_0
+///    and we have already pulled entry to mutate this part into own queue our virtual parts will contain part all_0_0_0_1, not part all_0_0_0.
+///
+/// To avoid such issues we simply traverse all entries in queue in order and applying diff (add parts/remove parts) to current parts if they could be affected by mutation. Such approach is expensive
+/// but we do it only once since we get the mutation. After that we just update parts_to_do for each mutation when pulling entries into our queue (addPartToMutations, removePartFromMutations).
+ActiveDataPartSet getPartNamesToMutate(
+    const ReplicatedMergeTreeMutationEntry & mutation, const ActiveDataPartSet & current_parts,
+    const QueueRepresentation & queue_representation, MergeTreeDataFormatVersion format_version)
+{
+    ActiveDataPartSet result(format_version);
+    /// Traverse mutation by partition
+    for (const auto & [partition_id, block_num] : mutation.block_numbers)
+    {
         /// Note that we cannot simply count all parts to mutate using getPartsCoveredBy(appropriate part_info)
         /// because they are not consecutive in `parts`.
         MergeTreePartInfo covering_part_info(
             partition_id, 0, block_num, MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER);
-        for (const String & covered_part_name : parts.getPartsCoveredBy(covering_part_info))
+
+        /// First of all add all affected current_parts
+        for (const String & covered_part_name : current_parts.getPartsCoveredBy(covering_part_info))
         {
-            auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, parts.getFormatVersion());
+            auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, current_parts.getFormatVersion());
             if (part_info.getDataVersion() < block_num)
+                result.add(covered_part_name);
+        }
+
+        /// Traverse queue and update affected current_parts
+        for (const auto & [_, entry_representation] : queue_representation)
+        {
+            /// First we have to drop something if entry drop parts
+            for (const auto & part_to_drop : entry_representation.dropped_parts)
             {
-                /// We don't need to mutate part if it's covered by DROP_RANGE
-                if (!drop_ranges.hasDropRange(part_info))
-                    result.push_back(covered_part_name);
+                auto part_to_drop_info = MergeTreePartInfo::fromPartName(part_to_drop, format_version);
+                if (part_to_drop_info.partition_id == partition_id)
+                    result.removePartAndCoveredParts(part_to_drop);
+            }
+
+            /// After we have to add parts if entry adds them
+            for (const auto & part_to_add : entry_representation.produced_parts)
+            {
+                auto part_to_add_info = MergeTreePartInfo::fromPartName(part_to_add, format_version);
+                if (part_to_add_info.partition_id == partition_id && part_to_add_info.getDataVersion() < block_num)
+                    result.add(part_to_add);
             }
         }
     }
@@ -858,20 +957,13 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C
                     LOG_TRACE(log, "Adding mutation {} for partition {} for all block numbers less than {}", entry->znode_name, partition_id, block_num);
                 }
 
-                /// Initialize `mutation.parts_to_do`.
-                /// We need to mutate all parts in `current_parts` and all parts that will appear after queue entries execution.
-                /// So, we need to mutate all parts in virtual_parts (with the corresponding block numbers).
-                Strings virtual_parts_to_mutate = getPartNamesToMutate(*entry, virtual_parts, drop_ranges);
-                for (const String & current_part_to_mutate : virtual_parts_to_mutate)
-                {
-                    assert(MergeTreePartInfo::fromPartName(current_part_to_mutate, format_version).level < MergeTreePartInfo::MAX_LEVEL);
-                    mutation.parts_to_do.add(current_part_to_mutate);
-                }
+                /// Initialize `mutation.parts_to_do`. We cannot use only current_parts + virtual_parts here so we
+                /// traverse all the queue and build correct state of parts_to_do.
+                auto queue_representation = getQueueRepresentation(queue, format_version);
+                mutation.parts_to_do = getPartNamesToMutate(*entry, virtual_parts, queue_representation, format_version);
 
                 if (mutation.parts_to_do.size() == 0)
-                {
                     some_mutations_are_probably_done = true;
-                }
 
                 /// otherwise it's already done
                 if (entry->isAlterMutation() && entry->znode_name > mutation_pointer)
@@ -1774,8 +1866,11 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep
             }
             else if (mutation.parts_to_do.size() == 0)
             {
+                /// Why it doesn't mean that mutation 100% finished? Because when we were creating part_to_do set
+                /// some INSERT queries could be in progress. So we have to double-check that no affected committing block
+                /// numbers exist and no new parts were surprisingly committed.
                 LOG_TRACE(log, "Will check if mutation {} is done", mutation.entry->znode_name);
-                candidates.push_back(mutation.entry);
+                candidates.emplace_back(mutation.entry);
             }
         }
     }
@@ -1785,12 +1880,15 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep
     else
         LOG_DEBUG(log, "Trying to finalize {} mutations", candidates.size());
 
+    /// We need to check committing block numbers and new parts which could be committed.
+    /// Actually we don't need most of predicate logic here but it all the code related to committing blocks
+    /// and updatading queue state is implemented there.
     auto merge_pred = getMergePredicate(zookeeper);
 
     std::vector<const ReplicatedMergeTreeMutationEntry *> finished;
-    for (const ReplicatedMergeTreeMutationEntryPtr & candidate : candidates)
+    for (const auto & candidate : candidates)
     {
-        if (merge_pred.isMutationFinished(*candidate))
+        if (merge_pred.isMutationFinished(candidate->znode_name, candidate->block_numbers))
             finished.push_back(candidate.get());
     }
 
@@ -2312,9 +2410,11 @@ std::optional<std::pair<Int64, int>> ReplicatedMergeTreeMergePredicate::getDesir
 }
 
 
-bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const ReplicatedMergeTreeMutationEntry & mutation) const
+bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const std::string & znode_name, const std::map<String, int64_t> & block_numbers) const
 {
-    for (const auto & kv : mutation.block_numbers)
+    /// Check committing block numbers, maybe some affected inserts
+    /// still not written to disk and committed to ZK.
+    for (const auto & kv : block_numbers)
     {
         const String & partition_id = kv.first;
         Int64 block_num = kv.second;
@@ -2326,24 +2426,28 @@ bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const ReplicatedMerge
                 partition_it->second.begin(), partition_it->second.lower_bound(block_num));
             if (blocks_count)
             {
-                LOG_TRACE(queue.log, "Mutation {} is not done yet because in partition ID {} there are still {} uncommitted blocks.", mutation.znode_name, partition_id, blocks_count);
+                LOG_TRACE(queue.log, "Mutation {} is not done yet because in partition ID {} there are still {} uncommitted blocks.", znode_name, partition_id, blocks_count);
                 return false;
             }
         }
     }
 
+    std::lock_guard lock(queue.state_mutex);
+    /// When we creating predicate we have updated the queue. Some committing inserts can now be committed so
+    /// we check parts_to_do one more time. Also this code is async so mutation actually could be deleted from memory.
+    if (auto it = queue.mutations_by_znode.find(znode_name); it != queue.mutations_by_znode.end())
     {
-        std::lock_guard lock(queue.state_mutex);
+        if (it->second.parts_to_do.size() == 0)
+            return true;
 
-        size_t suddenly_appeared_parts = getPartNamesToMutate(mutation, queue.virtual_parts, queue.drop_ranges).size();
-        if (suddenly_appeared_parts)
-        {
-            LOG_TRACE(queue.log, "Mutation {} is not done yet because {} parts to mutate suddenly appeared.", mutation.znode_name, suddenly_appeared_parts);
-            return false;
-        }
+        LOG_TRACE(queue.log, "Mutation {} is not done because some parts [{}] were just committed", znode_name, fmt::join(it->second.parts_to_do.getParts(), ", "));
+        return false;
+    }
+    else
+    {
+        LOG_TRACE(queue.log, "Mutation {} is done because it doesn't exist anymore", znode_name);
+        return true;
     }
-
-    return true;
 }
 
 bool ReplicatedMergeTreeMergePredicate::hasDropRange(const MergeTreePartInfo & new_drop_range_info) const
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
index 32421f91b04..95bb23029a0 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h
@@ -517,7 +517,7 @@ public:
     /// don't glue them together. Alter is rare operation, so it shouldn't affect performance.
     std::optional<std::pair<Int64, int>> getDesiredMutationVersion(const MergeTreeData::DataPartPtr & part) const;
 
-    bool isMutationFinished(const ReplicatedMergeTreeMutationEntry & mutation) const;
+    bool isMutationFinished(const std::string & znode_name, const std::map<String, int64_t> & block_numbers) const;
 
     /// The version of "log" node that is used to check that no new merges have appeared.
     int32_t getVersion() const { return merges_version; }
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index 65b4dce3ad2..e382e7f7bbb 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -434,7 +434,7 @@ void StorageBuffer::read(
 }
 
 
-static void appendBlock(const Block & from, Block & to)
+static void appendBlock(Poco::Logger * log, const Block & from, Block & to)
 {
     size_t rows = from.rows();
     size_t old_rows = to.rows();
@@ -456,7 +456,24 @@ static void appendBlock(const Block & from, Block & to)
         for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
         {
             const IColumn & col_from = *from.getByPosition(column_no).column.get();
-            last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
+            {
+                /// Usually IColumn::mutate() here will simply move pointers,
+                /// however in case of parallel reading from it via SELECT, it
+                /// is possible for the full IColumn::clone() here, and in this
+                /// case it may fail due to MEMORY_LIMIT_EXCEEDED, and this
+                /// breaks the rollback, since the column got lost, it is
+                /// neither in last_col nor in "to" block.
+                ///
+                /// The safest option here, is to do a full clone every time,
+                /// however, it is overhead. And it looks like the only
+                /// exception that is possible here is MEMORY_LIMIT_EXCEEDED,
+                /// and it is better to simply suppress it, to avoid overhead
+                /// for every INSERT into Buffer (Anyway we have a
+                /// LOGICAL_ERROR in rollback that will bail if something else
+                /// will happens here).
+                LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global);
+                last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
+            }
 
             /// In case of ColumnAggregateFunction aggregate states will
             /// be allocated from the query context but can be destroyed from the
@@ -468,7 +485,10 @@ static void appendBlock(const Block & from, Block & to)
             last_col->ensureOwnership();
             last_col->insertRangeFrom(col_from, 0, rows);
 
-            to.getByPosition(column_no).column = std::move(last_col);
+            {
+                DENY_ALLOCATIONS_IN_SCOPE;
+                to.getByPosition(column_no).column = std::move(last_col);
+            }
         }
         CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows);
         CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, to.bytes() - old_bytes);
@@ -481,6 +501,9 @@ static void appendBlock(const Block & from, Block & to)
         /// So ignore any memory limits, even global (since memory tracking has drift).
         LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global);
 
+        /// But first log exception to get more details in case of LOGICAL_ERROR
+        tryLogCurrentException(log, "Caught exception while adding data to buffer, rolling back...");
+
         try
         {
             for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
@@ -625,7 +648,7 @@ private:
         size_t old_rows = buffer.data.rows();
         size_t old_bytes = buffer.data.allocatedBytes();
 
-        appendBlock(sorted_block, buffer.data);
+        appendBlock(storage.log, sorted_block, buffer.data);
 
         storage.total_writes.rows += (buffer.data.rows() - old_rows);
         storage.total_writes.bytes += (buffer.data.allocatedBytes() - old_bytes);
diff --git a/src/Storages/StorageDelta.cpp b/src/Storages/StorageDeltaLake.cpp
similarity index 79%
rename from src/Storages/StorageDelta.cpp
rename to src/Storages/StorageDeltaLake.cpp
index e8287a2fd61..479a11b5eb4 100644
--- a/src/Storages/StorageDelta.cpp
+++ b/src/Storages/StorageDeltaLake.cpp
@@ -1,7 +1,7 @@
 #include "config.h"
 #if USE_AWS_S3
 
-#include <Storages/StorageDelta.h>
+#include <Storages/StorageDeltaLake.h>
 #include <Common/logger_useful.h>
 
 #include <IO/ReadBufferFromS3.h>
@@ -47,7 +47,7 @@ void DeltaLakeMetadata::remove(const String & filename, uint64_t /*timestamp */)
         throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid table metadata, tried to remove {} before adding it", filename);
 }
 
-std::vector<String> DeltaLakeMetadata::ListCurrentFiles() &&
+std::vector<String> DeltaLakeMetadata::listCurrentFiles() &&
 {
     std::vector<String> keys;
     keys.reserve(file_update_time.size());
@@ -61,10 +61,10 @@ std::vector<String> DeltaLakeMetadata::ListCurrentFiles() &&
 JsonMetadataGetter::JsonMetadataGetter(StorageS3::S3Configuration & configuration_, const String & table_path_, ContextPtr context)
     : base_configuration(configuration_), table_path(table_path_)
 {
-    Init(context);
+    init(context);
 }
 
-void JsonMetadataGetter::Init(ContextPtr context)
+void JsonMetadataGetter::init(ContextPtr context)
 {
     auto keys = getJsonLogFiles();
 
@@ -151,12 +151,14 @@ std::vector<String> JsonMetadataGetter::getJsonLogFiles()
 std::shared_ptr<ReadBuffer> JsonMetadataGetter::createS3ReadBuffer(const String & key, ContextPtr context)
 {
     /// TODO: add parallel downloads
+    S3Settings::RequestSettings request_settings;
+    request_settings.max_single_read_retries = 10;
     return std::make_shared<ReadBufferFromS3>(
         base_configuration.client,
         base_configuration.uri.bucket,
         key,
         base_configuration.uri.version_id,
-        /* max single read retries */10,
+        request_settings,
         context->getReadSettings());
 }
 
@@ -178,7 +180,53 @@ void JsonMetadataGetter::handleJSON(const JSON & json)
     }
 }
 
-StorageDelta::StorageDelta(
+namespace
+{
+
+StorageS3::S3Configuration getBaseConfiguration(const StorageS3Configuration & configuration)
+{
+    return {configuration.url, configuration.auth_settings, configuration.request_settings, configuration.headers};
+}
+
+// DeltaLake stores data in parts in different files
+// keys is vector of parts with latest version
+// generateQueryFromKeys constructs query from parts filenames for
+// underlying StorageS3 engine
+String generateQueryFromKeys(const std::vector<String> & keys)
+{
+    std::string new_query = fmt::format("{{{}}}", fmt::join(keys, ","));
+    return new_query;
+}
+
+
+StorageS3Configuration getAdjustedS3Configuration(
+    const ContextPtr & context,
+    StorageS3::S3Configuration & base_configuration,
+    const StorageS3Configuration & configuration,
+    const std::string & table_path,
+    Poco::Logger * log)
+{
+    JsonMetadataGetter getter{base_configuration, table_path, context};
+
+    auto keys = getter.getFiles();
+    auto new_uri = base_configuration.uri.uri.toString() + generateQueryFromKeys(keys);
+
+    LOG_DEBUG(log, "New uri: {}", new_uri);
+    LOG_DEBUG(log, "Table path: {}", table_path);
+
+    // set new url in configuration
+    StorageS3Configuration new_configuration;
+    new_configuration.url = new_uri;
+    new_configuration.auth_settings.access_key_id = configuration.auth_settings.access_key_id;
+    new_configuration.auth_settings.secret_access_key = configuration.auth_settings.secret_access_key;
+    new_configuration.format = configuration.format;
+
+    return new_configuration;
+}
+
+}
+
+StorageDeltaLake::StorageDeltaLake(
     const StorageS3Configuration & configuration_,
     const StorageID & table_id_,
     ColumnsDescription columns_,
@@ -187,28 +235,14 @@ StorageDelta::StorageDelta(
     ContextPtr context_,
     std::optional<FormatSettings> format_settings_)
     : IStorage(table_id_)
-    , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers}
+    , base_configuration{getBaseConfiguration(configuration_)}
     , log(&Poco::Logger::get("StorageDeltaLake (" + table_id_.table_name + ")"))
     , table_path(base_configuration.uri.key)
 {
     StorageInMemoryMetadata storage_metadata;
     StorageS3::updateS3Configuration(context_, base_configuration);
 
-    JsonMetadataGetter getter{base_configuration, table_path, context_};
-
-    auto keys = getter.getFiles();
-    auto new_uri = base_configuration.uri.uri.toString() + generateQueryFromKeys(std::move(keys));
-
-    LOG_DEBUG(log, "New uri: {}", new_uri);
-    LOG_DEBUG(log, "Table path: {}", table_path);
-
-    // set new url in configuration
-    StorageS3Configuration new_configuration;
-    new_configuration.url = new_uri;
-    new_configuration.auth_settings.access_key_id = configuration_.auth_settings.access_key_id;
-    new_configuration.auth_settings.secret_access_key = configuration_.auth_settings.secret_access_key;
-    new_configuration.format = configuration_.format;
-
+    auto new_configuration = getAdjustedS3Configuration(context_, base_configuration, configuration_, table_path, log);
 
     if (columns_.empty())
     {
@@ -236,7 +270,7 @@ StorageDelta::StorageDelta(
         nullptr);
 }
 
-Pipe StorageDelta::read(
+Pipe StorageDeltaLake::read(
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -250,16 +284,18 @@ Pipe StorageDelta::read(
     return s3engine->read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
 }
 
-String StorageDelta::generateQueryFromKeys(std::vector<String> && keys)
+ColumnsDescription StorageDeltaLake::getTableStructureFromData(
+    const StorageS3Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
 {
-    // DeltaLake store data parts in different files
-    // keys are filenames of parts
-    // for StorageS3 to read all parts we need format {key1,key2,key3,...keyn}
-    std::string new_query = fmt::format("{{{}}}", fmt::join(keys, ","));
-    return new_query;
+    auto base_configuration = getBaseConfiguration(configuration);
+    StorageS3::updateS3Configuration(ctx, base_configuration);
+    auto new_configuration = getAdjustedS3Configuration(
+        ctx, base_configuration, configuration, base_configuration.uri.key, &Poco::Logger::get("StorageDeltaLake"));
+    return StorageS3::getTableStructureFromData(
+        new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
 }
 
-void registerStorageDelta(StorageFactory & factory)
+void registerStorageDeltaLake(StorageFactory & factory)
 {
     factory.registerStorage(
         "DeltaLake",
@@ -285,7 +321,7 @@ void registerStorageDelta(StorageFactory & factory)
                 configuration.format = "Parquet";
             }
 
-            return std::make_shared<StorageDelta>(
+            return std::make_shared<StorageDeltaLake>(
                 configuration, args.table_id, args.columns, args.constraints, args.comment, args.getContext(), std::nullopt);
         },
         {
diff --git a/src/Storages/StorageDelta.h b/src/Storages/StorageDeltaLake.h
similarity index 83%
rename from src/Storages/StorageDelta.h
rename to src/Storages/StorageDeltaLake.h
index e3bb4c0b416..5915d498a9f 100644
--- a/src/Storages/StorageDelta.h
+++ b/src/Storages/StorageDeltaLake.h
@@ -32,7 +32,7 @@ public:
     void setLastModifiedTime(const String & filename, uint64_t timestamp);
     void remove(const String & filename, uint64_t timestamp);
 
-    std::vector<String> ListCurrentFiles() &&;
+    std::vector<String> listCurrentFiles() &&;
 
 private:
     std::unordered_map<String, uint64_t> file_update_time;
@@ -44,10 +44,10 @@ class JsonMetadataGetter
 public:
     JsonMetadataGetter(StorageS3::S3Configuration & configuration_, const String & table_path_, ContextPtr context);
 
-    std::vector<String> getFiles() { return std::move(metadata).ListCurrentFiles(); }
+    std::vector<String> getFiles() { return std::move(metadata).listCurrentFiles(); }
 
 private:
-    void Init(ContextPtr context);
+    void init(ContextPtr context);
 
     std::vector<String> getJsonLogFiles();
 
@@ -60,13 +60,13 @@ private:
     DeltaLakeMetadata metadata;
 };
 
-class StorageDelta : public IStorage
+class StorageDeltaLake : public IStorage
 {
 public:
     // 1. Parses internal file structure of table
     // 2. Finds out parts with latest version
     // 3. Creates url for underlying StorageS3 enigne to handle reads
-    StorageDelta(
+    StorageDeltaLake(
         const StorageS3Configuration & configuration_,
         const StorageID & table_id_,
         ColumnsDescription columns_,
@@ -87,14 +87,12 @@ public:
         size_t max_block_size,
         size_t num_streams) override;
 
+    static ColumnsDescription getTableStructureFromData(
+        const StorageS3Configuration & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        ContextPtr ctx);
 private:
-    void Init();
-
-    // DeltaLake stores data in parts in different files
-    // keys is vector of parts with latest version
-    // generateQueryFromKeys constructs query from parts filenames for
-    // underlying StorageS3 engine
-    static String generateQueryFromKeys(std::vector<String> && keys);
+    void init();
 
     StorageS3::S3Configuration base_configuration;
     std::shared_ptr<StorageS3> s3engine;
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index f7f68eba30f..51cca15e5ab 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -703,7 +703,7 @@ void StorageDistributed::read(
             select_stream_factory, modified_query_ast,
             local_context, query_info,
             sharding_key_expr, sharding_key_column_name,
-            query_info.cluster);
+            query_info.cluster, processed_stage);
     else
         ClusterProxy::executeQuery(
             query_plan, header, processed_stage,
diff --git a/src/Storages/StorageHudi.cpp b/src/Storages/StorageHudi.cpp
index 121856c4a57..d5675ceb17c 100644
--- a/src/Storages/StorageHudi.cpp
+++ b/src/Storages/StorageHudi.cpp
@@ -28,115 +28,20 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-StorageHudi::StorageHudi(
-    const StorageS3Configuration & configuration_,
-    const StorageID & table_id_,
-    ColumnsDescription columns_,
-    const ConstraintsDescription & constraints_,
-    const String & comment,
-    ContextPtr context_,
-    std::optional<FormatSettings> format_settings_)
-    : IStorage(table_id_)
-    , base_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers}
-    , log(&Poco::Logger::get("StorageHudi (" + table_id_.table_name + ")"))
-    , table_path(base_configuration.uri.key)
+namespace
 {
-    StorageInMemoryMetadata storage_metadata;
-    StorageS3::updateS3Configuration(context_, base_configuration);
 
-    auto keys = getKeysFromS3();
-    auto new_uri = base_configuration.uri.uri.toString() + generateQueryFromKeys(keys, configuration_.format);
-
-    LOG_DEBUG(log, "New uri: {}", new_uri);
-    LOG_DEBUG(log, "Table path: {}", table_path);
-
-    StorageS3Configuration new_configuration;
-    new_configuration.url = new_uri;
-    new_configuration.auth_settings.access_key_id = configuration_.auth_settings.access_key_id;
-    new_configuration.auth_settings.secret_access_key = configuration_.auth_settings.secret_access_key;
-    new_configuration.format = configuration_.format;
-
-    if (columns_.empty())
-    {
-        columns_ = StorageS3::getTableStructureFromData(
-            new_configuration, /*distributed processing*/ false, format_settings_, context_, nullptr);
-        storage_metadata.setColumns(columns_);
-    }
-    else
-        storage_metadata.setColumns(columns_);
-
-    storage_metadata.setConstraints(constraints_);
-    storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
-
-    s3engine = std::make_shared<StorageS3>(
-        new_configuration,
-        table_id_,
-        columns_,
-        constraints_,
-        comment,
-        context_,
-        format_settings_,
-        /* distributed_processing_ */ false,
-        nullptr);
+StorageS3::S3Configuration getBaseConfiguration(const StorageS3Configuration & configuration)
+{
+    return {configuration.url, configuration.auth_settings, configuration.request_settings, configuration.headers};
 }
 
-Pipe StorageHudi::read(
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
-    ContextPtr context,
-    QueryProcessingStage::Enum processed_stage,
-    size_t max_block_size,
-    size_t num_streams)
-{
-    StorageS3::updateS3Configuration(context, base_configuration);
-    return s3engine->read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-}
-
-std::vector<std::string> StorageHudi::getKeysFromS3()
-{
-    std::vector<std::string> keys;
-
-    const auto & client = base_configuration.client;
-
-    Aws::S3::Model::ListObjectsV2Request request;
-    Aws::S3::Model::ListObjectsV2Outcome outcome;
-
-    bool is_finished{false};
-    const auto bucket{base_configuration.uri.bucket};
-
-    request.SetBucket(bucket);
-    request.SetPrefix(table_path);
-
-    while (!is_finished)
-    {
-        outcome = client->ListObjectsV2(request);
-        if (!outcome.IsSuccess())
-            throw Exception(
-                ErrorCodes::S3_ERROR,
-                "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}",
-                quoteString(bucket),
-                quoteString(table_path),
-                backQuote(outcome.GetError().GetExceptionName()),
-                quoteString(outcome.GetError().GetMessage()));
-
-        const auto & result_batch = outcome.GetResult().GetContents();
-        for (const auto & obj : result_batch)
-        {
-            const auto & filename = obj.GetKey().substr(table_path.size()); /// Object name without tablepath prefix.
-            keys.push_back(filename);
-            LOG_DEBUG(log, "Found file: {}", filename);
-        }
-
-        request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
-        is_finished = !outcome.GetResult().GetIsTruncated();
-    }
-
-    return keys;
-}
-
-String StorageHudi::generateQueryFromKeys(const std::vector<std::string> & keys, const String & format)
+/// Apache Hudi store parts of data in different files.
+/// Every part file has timestamp in it.
+/// Every partition(directory) in Apache Hudi has different versions of part.
+/// To find needed parts we need to find out latest part file for every partition.
+/// Part format is usually parquet, but can differ.
+String generateQueryFromKeys(const std::vector<std::string> & keys, const String & format)
 {
     /// For each partition path take only latest file.
     struct FileInfo
@@ -187,6 +92,138 @@ String StorageHudi::generateQueryFromKeys(const std::vector<std::string> & keys,
     return "{" + list_of_keys + "}";
 }
 
+std::vector<std::string> getKeysFromS3(const StorageS3::S3Configuration & base_configuration, const std::string & table_path, Poco::Logger * log)
+{
+    std::vector<std::string> keys;
+
+    const auto & client = base_configuration.client;
+
+    Aws::S3::Model::ListObjectsV2Request request;
+    Aws::S3::Model::ListObjectsV2Outcome outcome;
+
+    bool is_finished{false};
+    const auto bucket{base_configuration.uri.bucket};
+
+    request.SetBucket(bucket);
+    request.SetPrefix(table_path);
+
+    while (!is_finished)
+    {
+        outcome = client->ListObjectsV2(request);
+        if (!outcome.IsSuccess())
+            throw Exception(
+                ErrorCodes::S3_ERROR,
+                "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}",
+                quoteString(bucket),
+                quoteString(table_path),
+                backQuote(outcome.GetError().GetExceptionName()),
+                quoteString(outcome.GetError().GetMessage()));
+
+        const auto & result_batch = outcome.GetResult().GetContents();
+        for (const auto & obj : result_batch)
+        {
+            const auto & filename = obj.GetKey().substr(table_path.size()); /// Object name without tablepath prefix.
+            keys.push_back(filename);
+            LOG_DEBUG(log, "Found file: {}", filename);
+        }
+
+        request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+        is_finished = !outcome.GetResult().GetIsTruncated();
+    }
+
+    return keys;
+}
+
+
+StorageS3Configuration getAdjustedS3Configuration(
+    StorageS3::S3Configuration & base_configuration,
+    const StorageS3Configuration & configuration,
+    const std::string & table_path,
+    Poco::Logger * log)
+{
+    auto keys = getKeysFromS3(base_configuration, table_path, log);
+    auto new_uri = base_configuration.uri.uri.toString() + generateQueryFromKeys(keys, configuration.format);
+
+    LOG_DEBUG(log, "New uri: {}", new_uri);
+    LOG_DEBUG(log, "Table path: {}", table_path);
+
+    StorageS3Configuration new_configuration;
+    new_configuration.url = new_uri;
+    new_configuration.auth_settings.access_key_id = configuration.auth_settings.access_key_id;
+    new_configuration.auth_settings.secret_access_key = configuration.auth_settings.secret_access_key;
+    new_configuration.format = configuration.format;
+
+    return new_configuration;
+}
+
+}
+
+StorageHudi::StorageHudi(
+    const StorageS3Configuration & configuration_,
+    const StorageID & table_id_,
+    ColumnsDescription columns_,
+    const ConstraintsDescription & constraints_,
+    const String & comment,
+    ContextPtr context_,
+    std::optional<FormatSettings> format_settings_)
+    : IStorage(table_id_)
+    , base_configuration{getBaseConfiguration(configuration_)}
+    , log(&Poco::Logger::get("StorageHudi (" + table_id_.table_name + ")"))
+    , table_path(base_configuration.uri.key)
+{
+    StorageInMemoryMetadata storage_metadata;
+    StorageS3::updateS3Configuration(context_, base_configuration);
+
+    auto new_configuration = getAdjustedS3Configuration(base_configuration, configuration_, table_path, log);
+
+    if (columns_.empty())
+    {
+        columns_ = StorageS3::getTableStructureFromData(
+            new_configuration, /*distributed processing*/ false, format_settings_, context_, nullptr);
+        storage_metadata.setColumns(columns_);
+    }
+    else
+        storage_metadata.setColumns(columns_);
+
+    storage_metadata.setConstraints(constraints_);
+    storage_metadata.setComment(comment);
+    setInMemoryMetadata(storage_metadata);
+
+    s3engine = std::make_shared<StorageS3>(
+        new_configuration,
+        table_id_,
+        columns_,
+        constraints_,
+        comment,
+        context_,
+        format_settings_,
+        /* distributed_processing_ */ false,
+        nullptr);
+}
+
+Pipe StorageHudi::read(
+    const Names & column_names,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & query_info,
+    ContextPtr context,
+    QueryProcessingStage::Enum processed_stage,
+    size_t max_block_size,
+    size_t num_streams)
+{
+    StorageS3::updateS3Configuration(context, base_configuration);
+    return s3engine->read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
+}
+
+ColumnsDescription StorageHudi::getTableStructureFromData(
+    const StorageS3Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
+{
+    auto base_configuration = getBaseConfiguration(configuration);
+    StorageS3::updateS3Configuration(ctx, base_configuration);
+    auto new_configuration = getAdjustedS3Configuration(
+        base_configuration, configuration, base_configuration.uri.key, &Poco::Logger::get("StorageDeltaLake"));
+    return StorageS3::getTableStructureFromData(
+        new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
+}
 
 void registerStorageHudi(StorageFactory & factory)
 {
diff --git a/src/Storages/StorageHudi.h b/src/Storages/StorageHudi.h
index bebda4cd4f6..00b8c01a46d 100644
--- a/src/Storages/StorageHudi.h
+++ b/src/Storages/StorageHudi.h
@@ -48,16 +48,11 @@ public:
         size_t max_block_size,
         size_t num_streams) override;
 
+    static ColumnsDescription getTableStructureFromData(
+        const StorageS3Configuration & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        ContextPtr ctx);
 private:
-    std::vector<std::string> getKeysFromS3();
-
-    /// Apache Hudi store parts of data in different files.
-    /// Every part file has timestamp in it.
-    /// Every partition(directory) in Apache Hudi has different versions of part.
-    /// To find needed parts we need to find out latest part file for every partition.
-    /// Part format is usually parquet, but can differ.
-    static String generateQueryFromKeys(const std::vector<std::string> & keys, const String & format);
-
     StorageS3::S3Configuration base_configuration;
     std::shared_ptr<StorageS3> s3engine;
     Poco::Logger * log;
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 22e416384aa..030ccb5ace2 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -279,25 +279,6 @@ void StorageMergeTree::drop()
     dropAllData();
 }
 
-void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &)
-{
-    {
-        /// Asks to complete merges and does not allow them to start.
-        /// This protects against "revival" of data for a removed partition after completion of merge.
-        auto merge_blocker = stopMergesAndWait();
-
-        auto data_parts_lock = lockParts();
-        auto parts_to_remove = getVisibleDataPartsVectorUnlocked(local_context, data_parts_lock);
-        removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), parts_to_remove, true, data_parts_lock);
-
-        LOG_INFO(log, "Removed {} parts.", parts_to_remove.size());
-    }
-
-    clearOldMutations(true);
-    clearOldPartsFromFilesystem();
-}
-
-
 void StorageMergeTree::alter(
     const AlterCommands & commands,
     ContextPtr local_context,
@@ -826,22 +807,28 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
     CurrentlyMergingPartsTaggerPtr merging_tagger;
     MergeList::EntryPtr merge_entry;
 
-    auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String *) -> bool
+    auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String * disable_reason) -> bool
     {
         if (tx)
         {
             /// Cannot merge parts if some of them are not visible in current snapshot
             /// TODO Transactions: We can use simplified visibility rules (without CSN lookup) here
-            if (left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))
-                return false;
-            if (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))
+            if ((left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))
+                    || (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)))
+            {
+                if (disable_reason)
+                    *disable_reason = "Some part is not visible in transaction";
                 return false;
+            }
 
             /// Do not try to merge parts that are locked for removal (merge will probably fail)
-            if (left && left->version.isRemovalTIDLocked())
-                return false;
-            if (right && right->version.isRemovalTIDLocked())
+            if ((left && left->version.isRemovalTIDLocked())
+                    || (right && right->version.isRemovalTIDLocked()))
+            {
+                if (disable_reason)
+                    *disable_reason = "Some part is locked for removal in another cuncurrent transaction";
                 return false;
+            }
         }
 
         /// This predicate is checked for the first part of each range.
@@ -1398,7 +1385,6 @@ ActionLock StorageMergeTree::stopMergesAndWait()
     return merge_blocker;
 }
 
-
 MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, const String & part_name, bool force)
 {
     if (force)
@@ -1407,7 +1393,8 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c
         auto merge_blocker = stopMergesAndWait();
         auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
         if (!part)
-            throw Exception("Part " + part_name + " not found, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART);
+            throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name);
+
         removePartsFromWorkingSet(txn, {part}, true);
         return part;
     }
@@ -1434,72 +1421,261 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c
 void StorageMergeTree::dropPartNoWaitNoThrow(const String & part_name)
 {
     if (auto part = outdatePart(NO_TRANSACTION_RAW, part_name, /*force=*/ false))
-        dropPartsImpl({part}, /*detach=*/ false);
+    {
+        if (deduplication_log)
+        {
+            deduplication_log->dropPart(part->info);
+        }
+
+        /// Need to destroy part objects before clearing them from filesystem.
+        part.reset();
+
+        clearOldPartsFromFilesystem();
+
+        LOG_INFO(log, "Removed 1 part {}.", part_name);
+    }
 
     /// Else nothing to do, part was removed in some different way
 }
 
-void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPtr query_context)
+struct FutureNewEmptyPart
 {
-    if (auto part = outdatePart(query_context->getCurrentTransaction().get(), part_name, /*force=*/ true))
-        dropPartsImpl({part}, detach);
+    MergeTreePartInfo part_info;
+    MergeTreePartition partition;
+    std::string part_name;
+
+    scope_guard tmp_dir_guard;
+
+    StorageMergeTree::MutableDataPartPtr data_part;
+
+    std::string getDirName() const { return StorageMergeTree::EMPTY_PART_TMP_PREFIX + part_name; }
+};
+
+using FutureNewEmptyParts = std::vector<FutureNewEmptyPart>;
+
+Strings getPartsNames(const FutureNewEmptyParts & parts)
+{
+    Strings part_names;
+    for (const auto & p : parts)
+        part_names.push_back(p.part_name);
+    return part_names;
 }
 
-void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, ContextPtr local_context)
+FutureNewEmptyParts initCoverageWithNewEmptyParts(const DataPartsVector & old_parts)
 {
-    DataPartsVector parts_to_remove;
-    /// New scope controls lifetime of merge_blocker.
+    FutureNewEmptyParts future_parts;
+
+    for (const auto & old_part : old_parts)
     {
-        /// Asks to complete merges and does not allow them to start.
-        /// This protects against "revival" of data for a removed partition after completion of merge.
-        auto merge_blocker = stopMergesAndWait();
-        auto data_parts_lock = lockParts();
-        const auto * partition_ast = partition->as<ASTPartition>();
-        if (partition_ast && partition_ast->all)
-            parts_to_remove = getVisibleDataPartsVectorUnlocked(local_context, data_parts_lock);
-        else
-        {
-            String partition_id = getPartitionIDFromQuery(partition, local_context, &data_parts_lock);
-            parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id, data_parts_lock);
-        }
-        /// TODO should we throw an exception if parts_to_remove is empty?
-        removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), parts_to_remove, true, data_parts_lock);
+        future_parts.emplace_back();
+        auto & new_part = future_parts.back();
+
+        new_part.part_info = old_part->info;
+        new_part.part_info.level += 1;
+        new_part.partition = old_part->partition;
+        new_part.part_name = old_part->getNewName(new_part.part_info);
     }
 
-    dropPartsImpl(std::move(parts_to_remove), detach);
+    return future_parts;
 }
 
-void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool detach)
+StorageMergeTree::MutableDataPartsVector createEmptyDataParts(MergeTreeData & data, FutureNewEmptyParts & future_parts, const MergeTreeTransactionPtr & txn)
 {
-    auto metadata_snapshot = getInMemoryMetadataPtr();
+    StorageMergeTree::MutableDataPartsVector data_parts;
+    for (auto & part: future_parts)
+        data_parts.push_back(data.createEmptyPart(part.part_info, part.partition, part.part_name, txn));
+    return data_parts;
+}
 
-    if (detach)
+void captureTmpDirectoryHolders(MergeTreeData & data, FutureNewEmptyParts & future_parts)
+{
+    for (auto & part : future_parts)
+        part.tmp_dir_guard = data.getTemporaryPartDirectoryHolder(part.getDirName());
+}
+
+void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction)
+{
+    DataPartsVector covered_parts;
+
+    for (auto & part: new_parts)
     {
-        /// If DETACH clone parts to detached/ directory
-        /// NOTE: no race with background cleanup until we hold pointers to parts
-        for (const auto & part : parts_to_remove)
+        DataPartsVector covered_parts_by_one_part = renameTempPartAndReplace(part, transaction);
+
+        if (covered_parts_by_one_part.size() > 1)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} expected to cover not more then 1 part. {} covered parts have been found. This is a bug.",
+                            part->name, covered_parts_by_one_part.size());
+
+        std::move(covered_parts_by_one_part.begin(), covered_parts_by_one_part.end(), std::back_inserter(covered_parts));
+    }
+
+    LOG_INFO(log, "Remove {} parts by covering them with empty {} parts. With txn {}.",
+             covered_parts.size(), new_parts.size(), transaction.getTID());
+
+    transaction.commit();
+
+    /// Remove covered parts without waiting for old_parts_lifetime seconds.
+    for (auto & part: covered_parts)
+        part->remove_time.store(0, std::memory_order_relaxed);
+
+    if (deduplication_log)
+        for (const auto & part : covered_parts)
+            deduplication_log->dropPart(part->info);
+}
+
+void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr query_context, TableExclusiveLockHolder &)
+{
+    /// Asks to complete merges and does not allow them to start.
+    /// This protects against "revival" of data for a removed partition after completion of merge.
+    auto merge_blocker = stopMergesAndWait();
+
+    Stopwatch watch;
+
+    auto txn = query_context->getCurrentTransaction();
+    MergeTreeData::Transaction transaction(*this, txn.get());
+    {
+        auto operation_data_parts_lock = lockOperationsWithParts();
+
+        auto parts = getVisibleDataPartsVector(query_context);
+
+        auto future_parts = initCoverageWithNewEmptyParts(parts);
+
+        LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
+                 future_parts.size(), parts.size(),
+                 fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNamesWithStates(parts), ", "),
+                 transaction.getTID());
+
+        captureTmpDirectoryHolders(*this, future_parts);
+
+        auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
+        renameAndCommitEmptyParts(new_data_parts, transaction);
+
+        PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+
+        LOG_INFO(log, "Truncated table with {} parts by replacing them with new empty {} parts. With txn {}",
+                 parts.size(), future_parts.size(),
+                 transaction.getTID());
+    }
+
+    /// Old parts are needed to be destroyed before clearing them from filesystem.
+    clearOldMutations(true);
+    clearOldPartsFromFilesystem();
+    clearEmptyParts();
+}
+
+void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPtr query_context)
+{
+    /// Asks to complete merges and does not allow them to start.
+    /// This protects against "revival" of data for a removed partition after completion of merge.
+    auto merge_blocker = stopMergesAndWait();
+
+    Stopwatch watch;
+
+    /// It's important to create it outside of lock scope because
+    /// otherwise it can lock parts in destructor and deadlock is possible.
+    auto txn = query_context->getCurrentTransaction();
+    MergeTreeData::Transaction transaction(*this, txn.get());
+    {
+        auto operation_data_parts_lock = lockOperationsWithParts();
+
+        auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
+        if (!part)
+            throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name);
+
+        if (detach)
         {
+            auto metadata_snapshot = getInMemoryMetadataPtr();
             LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
             part->makeCloneInDetached("", metadata_snapshot);
         }
+
+        {
+            auto future_parts = initCoverageWithNewEmptyParts({part});
+
+            LOG_TEST(log, "Made {} empty parts in order to cover {} part. With txn {}",
+                     fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "),
+                     transaction.getTID());
+
+            captureTmpDirectoryHolders(*this, future_parts);
+
+            auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
+            renameAndCommitEmptyParts(new_data_parts, transaction);
+
+            PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+
+            const auto * op = detach ? "Detached" : "Dropped";
+            LOG_INFO(log, "{} {} part by replacing it with new empty {} part. With txn {}",
+                     op, part->name, future_parts[0].part_name,
+                     transaction.getTID());
+        }
     }
 
-    if (deduplication_log)
-    {
-        for (const auto & part : parts_to_remove)
-            deduplication_log->dropPart(part->info);
-    }
-
-    if (detach)
-        LOG_INFO(log, "Detached {} parts.", parts_to_remove.size());
-    else
-        LOG_INFO(log, "Removed {} parts.", parts_to_remove.size());
-
-    /// Need to destroy part objects before clearing them from filesystem.
-    parts_to_remove.clear();
+    /// Old part objects is needed to be destroyed before clearing them from filesystem.
+    clearOldMutations(true);
     clearOldPartsFromFilesystem();
+    clearEmptyParts();
 }
 
+void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, ContextPtr query_context)
+{
+    const auto * partition_ast = partition->as<ASTPartition>();
+
+    /// Asks to complete merges and does not allow them to start.
+    /// This protects against "revival" of data for a removed partition after completion of merge.
+    auto merge_blocker = stopMergesAndWait();
+
+    Stopwatch watch;
+
+    /// It's important to create it outside of lock scope because
+    /// otherwise it can lock parts in destructor and deadlock is possible.
+    auto txn = query_context->getCurrentTransaction();
+    MergeTreeData::Transaction transaction(*this, txn.get());
+    {
+        auto operation_data_parts_lock = lockOperationsWithParts();
+
+        DataPartsVector parts;
+        {
+            if (partition_ast && partition_ast->all)
+                parts = getVisibleDataPartsVector(query_context);
+            else
+            {
+                String partition_id = getPartitionIDFromQuery(partition, query_context);
+                parts = getVisibleDataPartsVectorInPartition(query_context, partition_id);
+            }
+        }
+
+        if (detach)
+            for (const auto & part : parts)
+            {
+                auto metadata_snapshot = getInMemoryMetadataPtr();
+                LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
+                part->makeCloneInDetached("", metadata_snapshot);
+            }
+
+        auto future_parts = initCoverageWithNewEmptyParts(parts);
+
+        LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
+                 future_parts.size(), parts.size(),
+                 fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
+                 transaction.getTID());
+
+        captureTmpDirectoryHolders(*this, future_parts);
+
+        auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
+        renameAndCommitEmptyParts(new_data_parts, transaction);
+
+        PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+
+        const auto * op = detach ? "Detached" : "Dropped";
+        LOG_INFO(log, "{} partition with {} parts by replacing them with new empty {} parts. With txn {}",
+                 op, parts.size(), future_parts.size(),
+                 transaction.getTID());
+    }
+
+    /// Old parts are needed to be destroyed before clearing them from filesystem.
+    clearOldMutations(true);
+    clearOldPartsFromFilesystem();
+    clearEmptyParts();
+}
 
 PartitionCommandsResultInfo StorageMergeTree::attachPartition(
     const ASTPtr & partition, const StorageMetadataPtr & /* metadata_snapshot */,
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 745546b96f6..11d7d8f8fc1 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -169,6 +169,8 @@ private:
             String * out_disable_reason = nullptr,
             bool optimize_skip_merged_partitions = false);
 
+    void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction);
+
     /// Make part state outdated and queue it to remove without timeout
     /// If force, then stop merges and block them until part state became outdated. Throw exception if part doesn't exists
     /// If not force, then take merges selector and check that part is not participating in background operations.
@@ -217,7 +219,6 @@ private:
     void dropPartNoWaitNoThrow(const String & part_name) override;
     void dropPart(const String & part_name, bool detach, ContextPtr context) override;
     void dropPartition(const ASTPtr & partition, bool detach, ContextPtr context) override;
-    void dropPartsImpl(DataPartsVector && parts_to_remove, bool detach);
     PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, ContextPtr context) override;
 
     void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr context) override;
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 8b4788c8d55..d8e24a655ec 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -67,7 +67,6 @@
 #include <IO/Operators.h>
 #include <IO/ConnectionTimeouts.h>
 #include <IO/ConnectionTimeoutsContext.h>
-#include <Disks/createVolume.h>
 
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/PartLog.h>
@@ -131,7 +130,7 @@ namespace ErrorCodes
     extern const int NO_ZOOKEEPER;
     extern const int INCORRECT_DATA;
     extern const int INCOMPATIBLE_COLUMNS;
-    extern const int REPLICA_IS_ALREADY_EXIST;
+    extern const int REPLICA_ALREADY_EXISTS;
     extern const int NO_REPLICA_HAS_PART;
     extern const int LOGICAL_ERROR;
     extern const int TOO_MANY_UNEXPECTED_DATA_PARTS;
@@ -779,7 +778,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr
     /// Do not use LOGICAL_ERROR code, because it may happen if user has specified wrong zookeeper_path
     throw Exception("Cannot create table, because it is created concurrently every time "
                     "or because of wrong zookeeper_path "
-                    "or because of logical error", ErrorCodes::REPLICA_IS_ALREADY_EXIST);
+                    "or because of logical error", ErrorCodes::REPLICA_ALREADY_EXISTS);
 }
 
 void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metadata_snapshot)
@@ -843,7 +842,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
         switch (code)
         {
             case Coordination::Error::ZNODEEXISTS:
-                throw Exception(ErrorCodes::REPLICA_IS_ALREADY_EXIST, "Replica {} already exists", replica_path);
+                throw Exception(ErrorCodes::REPLICA_ALREADY_EXISTS, "Replica {} already exists", replica_path);
             case Coordination::Error::ZBADVERSION:
                 LOG_ERROR(log, "Retrying createReplica(), because some other replicas were created at the same time");
                 break;
@@ -1554,7 +1553,7 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry)
 
     if (entry.type == LogEntry::ATTACH_PART)
     {
-        if (MutableDataPartPtr part = attachPartHelperFoundValidPart(entry); part)
+        if (MutableDataPartPtr part = attachPartHelperFoundValidPart(entry))
         {
             LOG_TRACE(log, "Found valid local part for {}, preparing the transaction", part->name);
 
@@ -4507,6 +4506,9 @@ void StorageReplicatedMergeTree::assertNotReadonly() const
 
 SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context)
 {
+    if (!initialization_done)
+        throw Exception(ErrorCodes::NOT_INITIALIZED, "Table is not initialized yet");
+
     /// If table is read-only because it doesn't have metadata in zk yet, then it's not possible to insert into it
     /// Without this check, we'll write data parts on disk, and afterwards will remove them since we'll fail to commit them into zk
     /// In case of remote storage like s3, it'll generate unnecessary PUT requests
@@ -7642,7 +7644,15 @@ void StorageReplicatedMergeTree::createTableSharedID() const
         return;
     }
 
-    auto zookeeper = getZooKeeper();
+    /// We may call getTableSharedID when table is shut down. If exception happen, restarting thread will be already turned
+    /// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from
+    /// context.
+    ZooKeeperPtr zookeeper;
+    if (shutdown_called.load())
+        zookeeper = getZooKeeperIfTableShutDown();
+    else
+        zookeeper = getZooKeeper();
+
     String zookeeper_table_id_path = fs::path(zookeeper_path) / "table_shared_id";
     String id;
     if (!zookeeper->tryGet(zookeeper_table_id_path, id))
@@ -8262,56 +8272,25 @@ bool StorageReplicatedMergeTree::checkIfDetachedPartitionExists(const String & p
 bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperPtr zookeeper, const String & lost_part_name)
 {
     LOG_INFO(log, "Going to replace lost part {} with empty part", lost_part_name);
-    auto metadata_snapshot = getInMemoryMetadataPtr();
-    auto settings = getSettings();
-
-    constexpr static auto TMP_PREFIX = "tmp_empty_";
 
     auto new_part_info = MergeTreePartInfo::fromPartName(lost_part_name, format_version);
-    auto block = metadata_snapshot->getSampleBlock();
 
-    DB::IMergeTreeDataPart::TTLInfos move_ttl_infos;
-
-    NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
-    ReservationPtr reservation = reserveSpacePreferringTTLRules(metadata_snapshot, 0, move_ttl_infos, time(nullptr), 0, true);
-    VolumePtr volume = getStoragePolicy()->getVolume(0);
-
-    auto minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
-    minmax_idx->update(block, getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
-
-    auto new_volume = createVolumeFromReservation(reservation, volume);
-
-    auto data_part_storage = std::make_shared<DataPartStorageOnDisk>(
-        new_volume,
-        relative_data_path,
-        TMP_PREFIX + lost_part_name);
-
-    data_part_storage->beginTransaction();
-
-    auto new_data_part = createPart(
-        lost_part_name,
-        choosePartType(0, block.rows()),
-        new_part_info,
-        data_part_storage);
-
-    if (settings->assign_part_uuids)
-        new_data_part->uuid = UUIDHelpers::generateV4();
-
-    new_data_part->setColumns(columns, {});
-    new_data_part->rows_count = block.rows();
+    auto metadata_snapshot = getInMemoryMetadataPtr();
 
+    MergeTreePartition partition;
     {
-        auto lock = lockParts();
+        DataPartsLock lock = lockParts();
+
         auto parts_in_partition = getDataPartsPartitionRange(new_part_info.partition_id);
         if (!parts_in_partition.empty())
         {
-            new_data_part->partition = (*parts_in_partition.begin())->partition;
+            partition = (*parts_in_partition.begin())->partition;
         }
         else if (auto parsed_partition = MergeTreePartition::tryParseValueFromID(
                      new_part_info.partition_id,
                      metadata_snapshot->getPartitionKey().sample_block))
         {
-            new_data_part->partition = MergeTreePartition(*parsed_partition);
+            partition = MergeTreePartition(*parsed_partition);
         }
         else
         {
@@ -8319,43 +8298,10 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
                              "resolve this manually using DROP/DETACH PARTITION.", lost_part_name, new_part_info.partition_id);
             return false;
         }
-
     }
 
-    new_data_part->minmax_idx = std::move(minmax_idx);
-    new_data_part->is_temp = true;
-
-    SyncGuardPtr sync_guard;
-    if (new_data_part->isStoredOnDisk())
-    {
-        /// The name could be non-unique in case of stale files from previous runs.
-        if (data_part_storage->exists())
-        {
-            LOG_WARNING(log, "Removing old temporary directory {}", new_data_part->getDataPartStorage().getFullPath());
-            data_part_storage->removeRecursive();
-        }
-
-        data_part_storage->createDirectories();
-
-        if (getSettings()->fsync_part_directory)
-            sync_guard = data_part_storage->getDirectorySyncGuard();
-    }
-
-    /// This effectively chooses minimal compression method:
-    ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
-    auto compression_codec = getContext()->chooseCompressionCodec(0, 0);
-
-    const auto & index_factory = MergeTreeIndexFactory::instance();
-    MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns,
-        index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, NO_TRANSACTION_PTR);
-
-    bool sync_on_insert = settings->fsync_after_insert;
-
-    out.write(block);
-    /// TODO(ab): What projections should we add to the empty part? How can we make sure that it
-    /// won't block future merges? Perhaps we should also check part emptiness when selecting parts
-    /// to merge.
-    out.finalizePart(new_data_part, sync_on_insert);
+    MergeTreeData::MutableDataPartPtr new_data_part = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR);
+    new_data_part->name = lost_part_name;
 
     try
     {
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index ab9b71f5ff3..7b575d0d12f 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -100,7 +100,8 @@ public:
         const Block & virtual_header_,
         ContextPtr context_,
         std::unordered_map<String, S3::ObjectInfo> * object_infos_,
-        Strings * read_keys_)
+        Strings * read_keys_,
+        const S3Settings::RequestSettings & request_settings_)
         : WithContext(context_)
         , client(client_)
         , globbed_uri(globbed_uri_)
@@ -108,6 +109,7 @@ public:
         , virtual_header(virtual_header_)
         , object_infos(object_infos_)
         , read_keys(read_keys_)
+        , request_settings(request_settings_)
     {
         if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
             throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION);
@@ -258,6 +260,7 @@ private:
     bool is_finished{false};
     std::unordered_map<String, S3::ObjectInfo> * object_infos;
     Strings * read_keys;
+    S3Settings::RequestSettings request_settings;
 };
 
 StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
@@ -267,8 +270,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
     const Block & virtual_header,
     ContextPtr context,
     std::unordered_map<String, S3::ObjectInfo> * object_infos_,
-    Strings * read_keys_)
-    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_))
+    Strings * read_keys_,
+    const S3Settings::RequestSettings & request_settings_)
+    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_, request_settings_))
 {
 }
 
@@ -381,7 +385,7 @@ StorageS3Source::StorageS3Source(
     std::optional<FormatSettings> format_settings_,
     const ColumnsDescription & columns_,
     UInt64 max_block_size_,
-    UInt64 max_single_read_retries_,
+    const S3Settings::RequestSettings & request_settings_,
     String compression_hint_,
     const std::shared_ptr<const Aws::S3::S3Client> & client_,
     const String & bucket_,
@@ -397,7 +401,7 @@ StorageS3Source::StorageS3Source(
     , format(format_)
     , columns_desc(columns_)
     , max_block_size(max_block_size_)
-    , max_single_read_retries(max_single_read_retries_)
+    , request_settings(request_settings_)
     , compression_hint(std::move(compression_hint_))
     , client(client_)
     , sample_block(sample_block_)
@@ -463,7 +467,7 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & k
     if (!use_parallel_download || object_too_small)
     {
         LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size);
-        return std::make_unique<ReadBufferFromS3>(client, bucket, key, version_id, max_single_read_retries, getContext()->getReadSettings());
+        return std::make_unique<ReadBufferFromS3>(client, bucket, key, version_id, request_settings, getContext()->getReadSettings());
     }
 
     assert(object_size > 0);
@@ -475,7 +479,7 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & k
     }
 
     auto factory = std::make_unique<ReadBufferS3Factory>(
-        client, bucket, key, version_id, download_buffer_size, object_size, max_single_read_retries, getContext()->getReadSettings());
+        client, bucket, key, version_id, download_buffer_size, object_size, request_settings, getContext()->getReadSettings());
     LOG_TRACE(
         log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size);
 
@@ -585,7 +589,7 @@ public:
                 s3_configuration_.client,
                 bucket,
                 key,
-                s3_configuration_.rw_settings,
+                s3_configuration_.request_settings,
                 std::nullopt,
                 DBMS_DEFAULT_BUFFER_SIZE,
                 threadPoolCallbackRunner<void>(IOThreadPool::get(), "S3ParallelRead"),
@@ -749,7 +753,7 @@ StorageS3::StorageS3(
     bool distributed_processing_,
     ASTPtr partition_by_)
     : IStorage(table_id_)
-    , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers}
+    , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers}
     , keys({s3_configuration.uri.key})
     , format_name(configuration_.format)
     , compression_method(configuration_.compression_method)
@@ -815,7 +819,7 @@ std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(
     {
         /// Iterate through disclosed globs and make a source for each file
         auto glob_iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-            *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys);
+            *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys, s3_configuration.request_settings);
         return std::make_shared<StorageS3Source::IteratorWrapper>([glob_iterator]() { return glob_iterator->next(); });
     }
     else
@@ -905,7 +909,7 @@ Pipe StorageS3::read(
             format_settings,
             columns_description,
             max_block_size,
-            s3_configuration.rw_settings.max_single_read_retries,
+            s3_configuration.request_settings,
             compression_method,
             s3_configuration.client,
             s3_configuration.uri.bucket,
@@ -1022,12 +1026,10 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &,
 void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd)
 {
     auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString());
-    const auto & config_rw_settings = settings.rw_settings;
+    if (upd.request_settings != settings.request_settings)
+        upd.request_settings = settings.request_settings;
 
-    if (upd.rw_settings != config_rw_settings)
-        upd.rw_settings = settings.rw_settings;
-
-    upd.rw_settings.updateFromSettingsIfEmpty(ctx->getSettings());
+    upd.request_settings.updateFromSettingsIfEmpty(ctx->getSettings());
 
     if (upd.client)
     {
@@ -1045,10 +1047,12 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration
         ctx->getRemoteHostFilter(),
         static_cast<unsigned>(ctx->getGlobalContext()->getSettingsRef().s3_max_redirects),
         ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
-        /* for_disk_s3 = */ false);
+        /* for_disk_s3 = */ false,
+        upd.request_settings.get_request_throttler,
+        upd.request_settings.put_request_throttler);
 
     client_configuration.endpointOverride = upd.uri.endpoint;
-    client_configuration.maxConnections = static_cast<unsigned>(upd.rw_settings.max_connections);
+    client_configuration.maxConnections = static_cast<unsigned>(upd.request_settings.max_connections);
 
     auto credentials = Aws::Auth::AWSCredentials(upd.auth_settings.access_key_id, upd.auth_settings.secret_access_key);
     auto headers = upd.auth_settings.headers;
@@ -1080,17 +1084,17 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat
         else if (arg_name == "use_environment_credentials")
             configuration.auth_settings.use_environment_credentials = checkAndGetLiteralArgument<UInt8>(arg_value, "use_environment_credentials");
         else if (arg_name == "max_single_read_retries")
-            configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument<UInt64>(arg_value, "max_single_read_retries");
+            configuration.request_settings.max_single_read_retries = checkAndGetLiteralArgument<UInt64>(arg_value, "max_single_read_retries");
         else if (arg_name == "min_upload_part_size")
-            configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument<UInt64>(arg_value, "min_upload_part_size");
+            configuration.request_settings.min_upload_part_size = checkAndGetLiteralArgument<UInt64>(arg_value, "min_upload_part_size");
         else if (arg_name == "upload_part_size_multiply_factor")
-            configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument<UInt64>(arg_value, "upload_part_size_multiply_factor");
+            configuration.request_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument<UInt64>(arg_value, "upload_part_size_multiply_factor");
         else if (arg_name == "upload_part_size_multiply_parts_count_threshold")
-            configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument<UInt64>(arg_value, "upload_part_size_multiply_parts_count_threshold");
+            configuration.request_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument<UInt64>(arg_value, "upload_part_size_multiply_parts_count_threshold");
         else if (arg_name == "max_single_part_upload_size")
-            configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument<UInt64>(arg_value, "max_single_part_upload_size");
+            configuration.request_settings.max_single_part_upload_size = checkAndGetLiteralArgument<UInt64>(arg_value, "max_single_part_upload_size");
         else if (arg_name == "max_connections")
-            configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument<UInt64>(arg_value, "max_connections");
+            configuration.request_settings.max_connections = checkAndGetLiteralArgument<UInt64>(arg_value, "max_connections");
         else
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                 "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
@@ -1166,7 +1170,7 @@ ColumnsDescription StorageS3::getTableStructureFromData(
     S3Configuration s3_configuration{
         configuration.url,
         configuration.auth_settings,
-        S3Settings::ReadWriteSettings(ctx->getSettingsRef()),
+        S3Settings::RequestSettings(ctx->getSettingsRef()),
         configuration.headers};
 
     updateS3Configuration(ctx, s3_configuration);
@@ -1228,7 +1232,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
         int zstd_window_log_max = static_cast<int>(ctx->getSettingsRef().zstd_window_log_max);
         return wrapReadBufferWithCompressionMethod(
             std::make_unique<ReadBufferFromS3>(
-                s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.rw_settings.max_single_read_retries, ctx->getReadSettings()),
+                s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.request_settings, ctx->getReadSettings()),
             chooseCompressionMethod(key, compression_method),
             zstd_window_log_max);
     };
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 81bbe2c86ae..aa558ddc0de 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -43,7 +43,8 @@ public:
             const Block & virtual_header,
             ContextPtr context,
             std::unordered_map<String, S3::ObjectInfo> * object_infos = nullptr,
-            Strings * read_keys_ = nullptr);
+            Strings * read_keys_ = nullptr,
+            const S3Settings::RequestSettings & request_settings_ = {});
 
         String next();
 
@@ -79,7 +80,7 @@ public:
         std::optional<FormatSettings> format_settings_,
         const ColumnsDescription & columns_,
         UInt64 max_block_size_,
-        UInt64 max_single_read_retries_,
+        const S3Settings::RequestSettings & request_settings_,
         String compression_hint_,
         const std::shared_ptr<const Aws::S3::S3Client> & client_,
         const String & bucket,
@@ -102,7 +103,7 @@ private:
     String format;
     ColumnsDescription columns_desc;
     UInt64 max_block_size;
-    UInt64 max_single_read_retries;
+    S3Settings::RequestSettings request_settings;
     String compression_hint;
     std::shared_ptr<const Aws::S3::S3Client> client;
     Block sample_block;
@@ -186,7 +187,7 @@ public:
         std::shared_ptr<const Aws::S3::S3Client> client;
 
         S3::AuthSettings auth_settings;
-        S3Settings::ReadWriteSettings rw_settings;
+        S3Settings::RequestSettings request_settings;
 
         /// If s3 configuration was passed from ast, then it is static.
         /// If from config - it can be changed with config reload.
@@ -198,11 +199,11 @@ public:
         S3Configuration(
             const String & url_,
             const S3::AuthSettings & auth_settings_,
-            const S3Settings::ReadWriteSettings & rw_settings_,
+            const S3Settings::RequestSettings & request_settings_,
             const HeaderCollection & headers_from_ast_)
             : uri(S3::URI(url_))
             , auth_settings(auth_settings_)
-            , rw_settings(rw_settings_)
+            , request_settings(request_settings_)
             , static_configuration(!auth_settings_.access_key_id.empty())
             , headers_from_ast(headers_from_ast_) {}
     };
@@ -213,7 +214,7 @@ private:
     friend class StorageS3Cluster;
     friend class TableFunctionS3Cluster;
     friend class StorageHudi;
-    friend class StorageDelta;
+    friend class StorageDeltaLake;
 
     S3Configuration s3_configuration;
     std::vector<String> keys;
diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
index 350e942f972..ec970654b6e 100644
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@@ -46,7 +46,7 @@ StorageS3Cluster::StorageS3Cluster(
     const ConstraintsDescription & constraints_,
     ContextPtr context_)
     : IStorage(table_id_)
-    , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers}
+    , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers}
     , filename(configuration_.url)
     , cluster_name(configuration_.cluster_name)
     , format_name(configuration_.format)
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index 68e15d10f52..962f7c113bd 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -4,6 +4,7 @@
 
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Exception.h>
+#include <Common/Throttler.h>
 #include <Interpreters/Context.h>
 #include <base/unit.h>
 #include <boost/algorithm/string/predicate.hpp>
@@ -57,18 +58,26 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
 
             auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config);
 
-            S3Settings::ReadWriteSettings rw_settings;
-            rw_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries);
-            rw_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size);
-            rw_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE);
-            rw_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor);
-            rw_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold);
-            rw_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size);
-            rw_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE);
-            rw_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections);
-            rw_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false);
+            S3Settings::RequestSettings request_settings;
+            request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries);
+            request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size);
+            request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE);
+            request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor);
+            request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold);
+            request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size);
+            request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE);
+            request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections);
+            request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false);
 
-            s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(rw_settings)});
+            // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used.
+            if (UInt64 max_get_rps = get_uint_for_key(key, "max_get_rps", true, settings.s3_max_get_rps))
+                request_settings.get_request_throttler = std::make_shared<Throttler>(
+                    max_get_rps, get_uint_for_key(key, "max_get_burst", true, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps));
+            if (UInt64 max_put_rps = get_uint_for_key(key, "max_put_rps", true, settings.s3_max_put_rps))
+                request_settings.put_request_throttler = std::make_shared<Throttler>(
+                    max_put_rps, get_uint_for_key(key, "max_put_burst", true, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps));
+
+            s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(request_settings)});
         }
     }
 }
@@ -89,7 +98,7 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const
     return {};
 }
 
-S3Settings::ReadWriteSettings::ReadWriteSettings(const Settings & settings)
+S3Settings::RequestSettings::RequestSettings(const Settings & settings)
 {
     max_single_read_retries = settings.s3_max_single_read_retries;
     min_upload_part_size = settings.s3_min_upload_part_size;
@@ -99,9 +108,15 @@ S3Settings::ReadWriteSettings::ReadWriteSettings(const Settings & settings)
     max_connections = settings.s3_max_connections;
     check_objects_after_upload = settings.s3_check_objects_after_upload;
     max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries;
+    if (settings.s3_max_get_rps)
+        get_request_throttler = std::make_shared<Throttler>(
+            settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps);
+    if (settings.s3_max_put_rps)
+        put_request_throttler = std::make_shared<Throttler>(
+            settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps);
 }
 
-void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & settings)
+void S3Settings::RequestSettings::updateFromSettingsIfEmpty(const Settings & settings)
 {
     if (!max_single_read_retries)
         max_single_read_retries = settings.s3_max_single_read_retries;
@@ -122,6 +137,12 @@ void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & s
     if (!max_unexpected_write_error_retries)
         max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries;
     check_objects_after_upload = settings.s3_check_objects_after_upload;
+    if (!get_request_throttler && settings.s3_max_get_rps)
+        get_request_throttler = std::make_shared<Throttler>(
+            settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps);
+    if (!put_request_throttler && settings.s3_max_put_rps)
+        put_request_throttler = std::make_shared<Throttler>(
+            settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps);
 }
 
 }
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index bd90ba569d8..955cd2d025b 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -7,6 +7,7 @@
 #include <vector>
 #include <base/types.h>
 #include <Interpreters/Context_fwd.h>
+#include <Common/Throttler_fwd.h>
 #include <Storages/HeaderCollection.h>
 
 #include <IO/S3Common.h>
@@ -23,7 +24,7 @@ struct Settings;
 
 struct S3Settings
 {
-    struct ReadWriteSettings
+    struct RequestSettings
     {
         size_t max_single_read_retries = 0;
         size_t min_upload_part_size = 0;
@@ -35,11 +36,13 @@ struct S3Settings
         size_t max_connections = 0;
         bool check_objects_after_upload = false;
         size_t max_unexpected_write_error_retries = 0;
+        ThrottlerPtr get_request_throttler;
+        ThrottlerPtr put_request_throttler;
 
-        ReadWriteSettings() = default;
-        explicit ReadWriteSettings(const Settings & settings);
+        RequestSettings() = default;
+        explicit RequestSettings(const Settings & settings);
 
-        inline bool operator==(const ReadWriteSettings & other) const
+        inline bool operator==(const RequestSettings & other) const
         {
             return max_single_read_retries == other.max_single_read_retries
                 && min_upload_part_size == other.min_upload_part_size
@@ -50,18 +53,20 @@ struct S3Settings
                 && max_single_operation_copy_size == other.max_single_operation_copy_size
                 && max_connections == other.max_connections
                 && check_objects_after_upload == other.check_objects_after_upload
-                && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries;
+                && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries
+                && get_request_throttler == other.get_request_throttler
+                && put_request_throttler == other.put_request_throttler;
         }
 
         void updateFromSettingsIfEmpty(const Settings & settings);
     };
 
     S3::AuthSettings auth_settings;
-    ReadWriteSettings rw_settings;
+    RequestSettings request_settings;
 
     inline bool operator==(const S3Settings & other) const
     {
-        return auth_settings == other.auth_settings && rw_settings == other.rw_settings;
+        return auth_settings == other.auth_settings && request_settings == other.request_settings;
     }
 };
 
diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp
index 5ebdb828c34..15258ccfd7f 100644
--- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp
+++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp
@@ -27,8 +27,6 @@ NamesAndTypesList StorageSystemAsynchronousInserts::getNamesAndTypes()
         {"total_bytes", std::make_shared<DataTypeUInt64>()},
         {"entries.query_id", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
         {"entries.bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
-        {"entries.finished", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>())},
-        {"entries.exception", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
     };
 }
 
@@ -40,78 +38,56 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co
     if (!insert_queue)
         return;
 
-    auto [queue, queue_lock] = insert_queue->getQueueLocked();
-    for (const auto & [key, elem] : queue)
+    for (size_t shard_num = 0; shard_num < insert_queue->getPoolSize(); ++shard_num)
     {
-        std::lock_guard elem_lock(elem->mutex);
+        auto [queue, queue_lock] = insert_queue->getQueueLocked(shard_num);
 
-        if (!elem->data)
-            continue;
-
-        auto time_in_microseconds = [](const time_point<steady_clock> & timestamp)
+        for (const auto & [first_update, elem] : queue)
         {
-            auto time_diff = duration_cast<microseconds>(steady_clock::now() - timestamp);
-            auto time_us = (system_clock::now() - time_diff).time_since_epoch().count();
+            const auto & [key, data] = elem;
 
-            DecimalUtils::DecimalComponents<DateTime64> components{time_us / 1'000'000, time_us % 1'000'000};
-            return DecimalField(DecimalUtils::decimalFromComponents<DateTime64>(components, TIME_SCALE), TIME_SCALE);
-        };
-
-        const auto & insert_query = key.query->as<const ASTInsertQuery &>();
-        size_t i = 0;
-
-        res_columns[i++]->insert(queryToString(insert_query));
-
-        /// If query is "INSERT INTO FUNCTION" then table_id is empty.
-        if (insert_query.table_id)
-        {
-            res_columns[i++]->insert(insert_query.table_id.getDatabaseName());
-            res_columns[i++]->insert(insert_query.table_id.getTableName());
-        }
-        else
-        {
-            res_columns[i++]->insertDefault();
-            res_columns[i++]->insertDefault();
-        }
-
-        res_columns[i++]->insert(insert_query.format);
-        res_columns[i++]->insert(time_in_microseconds(elem->data->first_update));
-        res_columns[i++]->insert(elem->data->size);
-
-        Array arr_query_id;
-        Array arr_bytes;
-        Array arr_finished;
-        Array arr_exception;
-
-        for (const auto & entry : elem->data->entries)
-        {
-            arr_query_id.push_back(entry->query_id);
-            arr_bytes.push_back(entry->bytes.size());
-            arr_finished.push_back(entry->isFinished());
-
-            if (auto exception = entry->getException())
+            auto time_in_microseconds = [](const time_point<steady_clock> & timestamp)
             {
-                try
-                {
-                    std::rethrow_exception(exception);
-                }
-                catch (const Exception & e)
-                {
-                    arr_exception.push_back(e.displayText());
-                }
-                catch (...)
-                {
-                    arr_exception.push_back("Unknown exception");
-                }
+                auto time_diff = duration_cast<microseconds>(steady_clock::now() - timestamp);
+                auto time_us = (system_clock::now() - time_diff).time_since_epoch().count();
+
+                DecimalUtils::DecimalComponents<DateTime64> components{time_us / 1'000'000, time_us % 1'000'000};
+                return DecimalField(DecimalUtils::decimalFromComponents<DateTime64>(components, TIME_SCALE), TIME_SCALE);
+            };
+
+            const auto & insert_query = key.query->as<const ASTInsertQuery &>();
+            size_t i = 0;
+
+            res_columns[i++]->insert(queryToString(insert_query));
+
+            /// If query is "INSERT INTO FUNCTION" then table_id is empty.
+            if (insert_query.table_id)
+            {
+                res_columns[i++]->insert(insert_query.table_id.getDatabaseName());
+                res_columns[i++]->insert(insert_query.table_id.getTableName());
             }
             else
-                arr_exception.push_back("");
-        }
+            {
+                res_columns[i++]->insertDefault();
+                res_columns[i++]->insertDefault();
+            }
 
-        res_columns[i++]->insert(arr_query_id);
-        res_columns[i++]->insert(arr_bytes);
-        res_columns[i++]->insert(arr_finished);
-        res_columns[i++]->insert(arr_exception);
+            res_columns[i++]->insert(insert_query.format);
+            res_columns[i++]->insert(time_in_microseconds(first_update));
+            res_columns[i++]->insert(data->size_in_bytes);
+
+            Array arr_query_id;
+            Array arr_bytes;
+
+            for (const auto & entry : data->entries)
+            {
+                arr_query_id.push_back(entry->query_id);
+                arr_bytes.push_back(entry->bytes.size());
+            }
+
+            res_columns[i++]->insert(arr_query_id);
+            res_columns[i++]->insert(arr_bytes);
+        }
     }
 }
 
diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index e1f4f7b82bf..f5b6829c7ef 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -10,6 +10,7 @@ const char * auto_contributors[] {
     "546",
     "7",
     "821008736@qq.com",
+    "94rain",
     "ANDREI STAROVEROV",
     "Aaron Katz",
     "Adam Rutkowski",
@@ -21,6 +22,7 @@ const char * auto_contributors[] {
     "Alain BERRIER",
     "Albert Kidrachev",
     "Alberto",
+    "Alejandro",
     "Aleksandr",
     "Aleksandr Karo",
     "Aleksandr Musorin",
@@ -63,6 +65,7 @@ const char * auto_contributors[] {
     "Alexander Sapin",
     "Alexander Tokmakov",
     "Alexander Tretiakov",
+    "Alexander Yakovlev",
     "Alexandr Kondratev",
     "Alexandr Krasheninnikov",
     "Alexandr Orlov",
@@ -200,6 +203,7 @@ const char * auto_contributors[] {
     "Brett Hoerner",
     "Brian Hunter",
     "Bulat Gaifullin",
+    "Camilo Sierra",
     "Carbyn",
     "Carlos Rodríguez Hernández",
     "Caspian",
@@ -235,6 +239,7 @@ const char * auto_contributors[] {
     "Daniel Dao",
     "Daniel Kutenin",
     "Daniel Qin",
+    "Daniil Rubin",
     "Danila Kutenin",
     "Dao",
     "Dao Minh Thuc",
@@ -332,6 +337,7 @@ const char * auto_contributors[] {
     "Fullstop000",
     "Fuwang Hu",
     "G5.Qin",
+    "Gabriel",
     "Gagan Arneja",
     "Gao Qiang",
     "Gary Dotzler",
@@ -345,6 +351,7 @@ const char * auto_contributors[] {
     "Gleb Kanterov",
     "Gleb Novikov",
     "Gleb-Tretyakov",
+    "GoGoWen2021",
     "Gregory",
     "Grigory",
     "Grigory Buteyko",
@@ -432,6 +439,7 @@ const char * auto_contributors[] {
     "Jiang Tao",
     "Jianmei Zhang",
     "Jiebin Sun",
+    "Joanna Hulboj",
     "Jochen Schalanda",
     "John",
     "John Hummel",
@@ -475,6 +483,7 @@ const char * auto_contributors[] {
     "Kostiantyn Storozhuk",
     "Kozlov Ivan",
     "Kruglov Pavel",
+    "Krzysztof Góralski",
     "Kseniia Sumarokova",
     "Kuz Le",
     "Ky Li",
@@ -604,6 +613,7 @@ const char * auto_contributors[] {
     "Mr.General",
     "Murat Kabilov",
     "MyroTk",
+    "Márcio Martins",
     "Mátyás Jani",
     "N. Kolotov",
     "NIKITA MIKHAILOV",
@@ -698,11 +708,13 @@ const char * auto_contributors[] {
     "Pysaoke",
     "Quanfa Fu",
     "Quid37",
+    "Radistka-75",
     "Rafael Acevedo",
     "Rafael David Tinoco",
     "Rajkumar",
     "Rajkumar Varada",
     "Ramazan Polat",
+    "Rami Dridi",
     "Ravengg",
     "Raúl Marín",
     "Realist007",
@@ -787,6 +799,7 @@ const char * auto_contributors[] {
     "SkyhotQin",
     "Slach",
     "Smita Kulkarni",
+    "SmitaRKulkarni",
     "Snow",
     "Sofia Antipushina",
     "Stanislav Pavlovichev",
@@ -1007,6 +1020,7 @@ const char * auto_contributors[] {
     "bobrovskij artemij",
     "booknouse",
     "bseng",
+    "canenoneko",
     "caspian",
     "cekc",
     "centos7",
@@ -1026,6 +1040,7 @@ const char * auto_contributors[] {
     "chertus",
     "chou.fan",
     "christophe.kalenzaga",
+    "clarkcaoliu",
     "clickhouse-robot-curie",
     "cms",
     "cmsxbc",
@@ -1209,6 +1224,7 @@ const char * auto_contributors[] {
     "liuneng1994",
     "liuyangkuan",
     "liuyimin",
+    "lixuchun",
     "liyang",
     "liyang830",
     "lokax",
@@ -1340,6 +1356,7 @@ const char * auto_contributors[] {
     "shangshujie",
     "shedx",
     "shuchaome",
+    "shuyang",
     "simon-says",
     "snyk-bot",
     "songenjie",
@@ -1361,6 +1378,7 @@ const char * auto_contributors[] {
     "taiyang-li",
     "tangjiangling",
     "tao jiang",
+    "taojiatao",
     "tavplubix",
     "tchepavel",
     "tcoyvwac",
diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp
index 86b5eafdf72..3ba40cb76a5 100644
--- a/src/Storages/System/StorageSystemDisks.cpp
+++ b/src/Storages/System/StorageSystemDisks.cpp
@@ -25,6 +25,10 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_)
         {"keep_free_space", std::make_shared<DataTypeUInt64>()},
         {"type", std::make_shared<DataTypeString>()},
         {"is_encrypted", std::make_shared<DataTypeUInt8>()},
+        {"is_read_only", std::make_shared<DataTypeUInt8>()},
+        {"is_write_once", std::make_shared<DataTypeUInt8>()},
+        {"is_remote", std::make_shared<DataTypeUInt8>()},
+        {"is_broken", std::make_shared<DataTypeUInt8>()},
         {"cache_path", std::make_shared<DataTypeString>()},
     }));
     setInMemoryMetadata(storage_metadata);
@@ -49,6 +53,10 @@ Pipe StorageSystemDisks::read(
     MutableColumnPtr col_keep = ColumnUInt64::create();
     MutableColumnPtr col_type = ColumnString::create();
     MutableColumnPtr col_is_encrypted = ColumnUInt8::create();
+    MutableColumnPtr col_is_read_only = ColumnUInt8::create();
+    MutableColumnPtr col_is_write_once = ColumnUInt8::create();
+    MutableColumnPtr col_is_remote = ColumnUInt8::create();
+    MutableColumnPtr col_is_broken = ColumnUInt8::create();
     MutableColumnPtr col_cache_path = ColumnString::create();
 
     for (const auto & [disk_name, disk_ptr] : context->getDisksMap())
@@ -62,6 +70,10 @@ Pipe StorageSystemDisks::read(
         auto data_source_description = disk_ptr->getDataSourceDescription();
         col_type->insert(toString(data_source_description.type));
         col_is_encrypted->insert(data_source_description.is_encrypted);
+        col_is_read_only->insert(disk_ptr->isReadOnly());
+        col_is_write_once->insert(disk_ptr->isWriteOnce());
+        col_is_remote->insert(disk_ptr->isRemote());
+        col_is_broken->insert(disk_ptr->isBroken());
 
         String cache_path;
         if (disk_ptr->supportsCache())
@@ -79,6 +91,10 @@ Pipe StorageSystemDisks::read(
     res_columns.emplace_back(std::move(col_keep));
     res_columns.emplace_back(std::move(col_type));
     res_columns.emplace_back(std::move(col_is_encrypted));
+    res_columns.emplace_back(std::move(col_is_read_only));
+    res_columns.emplace_back(std::move(col_is_write_once));
+    res_columns.emplace_back(std::move(col_is_remote));
+    res_columns.emplace_back(std::move(col_is_broken));
     res_columns.emplace_back(std::move(col_cache_path));
 
     UInt64 num_rows = res_columns.at(0)->size();
diff --git a/src/Storages/System/StorageSystemMoves.cpp b/src/Storages/System/StorageSystemMoves.cpp
new file mode 100644
index 00000000000..6ecc9e7f373
--- /dev/null
+++ b/src/Storages/System/StorageSystemMoves.cpp
@@ -0,0 +1,47 @@
+#include <Interpreters/Context.h>
+#include <Storages/MergeTree/MovesList.h>
+#include <Storages/System/StorageSystemMoves.h>
+#include <Access/ContextAccess.h>
+
+
+namespace DB
+{
+
+NamesAndTypesList StorageSystemMoves::getNamesAndTypes()
+{
+    return {
+        {"database", std::make_shared<DataTypeString>()},
+        {"table", std::make_shared<DataTypeString>()},
+        {"elapsed", std::make_shared<DataTypeFloat64>()},
+        {"target_disk_name", std::make_shared<DataTypeString>()},
+        {"target_disk_path", std::make_shared<DataTypeString>()},
+        {"part_name", std::make_shared<DataTypeString>()},
+        {"part_size", std::make_shared<DataTypeUInt64>()},
+        {"thread_id", std::make_shared<DataTypeUInt64>()},
+    };
+}
+
+
+void StorageSystemMoves::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
+{
+    const auto access = context->getAccess();
+    const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES);
+
+    for (const auto & move : context->getMovesList().get())
+    {
+        if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, move.database, move.table))
+            continue;
+
+        size_t i = 0;
+        res_columns[i++]->insert(move.database);
+        res_columns[i++]->insert(move.table);
+        res_columns[i++]->insert(move.elapsed);
+        res_columns[i++]->insert(move.target_disk_name);
+        res_columns[i++]->insert(move.target_disk_path);
+        res_columns[i++]->insert(move.part_name);
+        res_columns[i++]->insert(move.part_size);
+        res_columns[i++]->insert(move.thread_id);
+    }
+}
+
+}
diff --git a/src/Storages/System/StorageSystemMoves.h b/src/Storages/System/StorageSystemMoves.h
new file mode 100644
index 00000000000..2e4ceec2abd
--- /dev/null
+++ b/src/Storages/System/StorageSystemMoves.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+
+namespace DB
+{
+
+class Context;
+
+
+class StorageSystemMoves final : public IStorageSystemOneBlock<StorageSystemMoves>
+{
+public:
+    std::string getName() const override { return "SystemMoves"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+
+protected:
+    using IStorageSystemOneBlock::IStorageSystemOneBlock;
+
+    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index fa1c26b623d..0be44219c7d 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -195,21 +195,22 @@ void StorageSystemParts::processNextStorage(
         if (columns_mask[src_index++])
             columns[res_index++]->insert(info.engine);
 
-        if (part->isStoredOnDisk())
+        if (columns_mask[src_index++])
         {
-            if (columns_mask[src_index++])
+            if (part->isStoredOnDisk())
                 columns[res_index++]->insert(part->getDataPartStorage().getDiskName());
-            if (columns_mask[src_index++])
-                columns[res_index++]->insert(part->getDataPartStorage().getFullPath());
-        }
-        else
-        {
-            if (columns_mask[src_index++])
-                columns[res_index++]->insertDefault();
-            if (columns_mask[src_index++])
+            else
                 columns[res_index++]->insertDefault();
         }
 
+        if (columns_mask[src_index++])
+        {
+            // The full path changes at clean up thread under deleting state, do not read it, avoid the race
+            if (part->isStoredOnDisk() && part_state != State::Deleting)
+                columns[res_index++]->insert(part->getDataPartStorage().getFullPath());
+            else
+                columns[res_index++]->insertDefault();
+        }
 
         {
             MinimalisticDataPartChecksums helper;
diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h
index cb6265d82df..c3d2e64b303 100644
--- a/src/Storages/System/StorageSystemPartsBase.h
+++ b/src/Storages/System/StorageSystemPartsBase.h
@@ -73,7 +73,7 @@ private:
     static bool hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot);
 
 protected:
-    const FormatSettings format_settings;
+    const FormatSettings format_settings = {};
 
     StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_);
 
diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp
index cd51c767eae..65b5af0c8e9 100644
--- a/src/Storages/System/StorageSystemPartsColumns.cpp
+++ b/src/Storages/System/StorageSystemPartsColumns.cpp
@@ -192,7 +192,13 @@ void StorageSystemPartsColumns::processNextStorage(
             if (columns_mask[src_index++])
                 columns[res_index++]->insert(part->getDataPartStorage().getDiskName());
             if (columns_mask[src_index++])
-                columns[res_index++]->insert(part->getDataPartStorage().getFullPath());
+            {
+                // The full path changes at clean up thread under deleting state, do not read it, avoid the race
+                if (part_state != State::Deleting)
+                    columns[res_index++]->insert(part->getDataPartStorage().getFullPath());
+                else
+                    columns[res_index++]->insertDefault();
+            }
 
             if (columns_mask[src_index++])
                 columns[res_index++]->insert(column.name);
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index 83f922850a3..72301a56d49 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -10,7 +10,6 @@
 #include <Interpreters/Context.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Parsers/queryToString.h>
 #include <Common/typeid_cast.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -232,7 +231,7 @@ protected:
                         {
                             auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables();
                             ASTPtr ast = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr;
-                            res_columns[res_index++]->insert(ast ? queryToString(ast) : "");
+                            res_columns[res_index++]->insert(ast ? ast->formatWithSecretsHidden() : "");
                         }
 
                         // engine_full
@@ -383,7 +382,7 @@ protected:
                     }
 
                     if (columns_mask[src_index++])
-                        res_columns[res_index++]->insert(ast ? queryToString(ast) : "");
+                        res_columns[res_index++]->insert(ast ? ast->formatWithSecretsHidden() : "");
 
                     if (columns_mask[src_index++])
                     {
@@ -391,7 +390,7 @@ protected:
 
                         if (ast_create && ast_create->storage)
                         {
-                            engine_full = queryToString(*ast_create->storage);
+                            engine_full = ast_create->storage->formatWithSecretsHidden();
 
                             static const char * const extra_head = " ENGINE = ";
                             if (startsWith(engine_full, extra_head))
@@ -405,7 +404,7 @@ protected:
                     {
                         String as_select;
                         if (ast_create && ast_create->select)
-                            as_select = queryToString(*ast_create->select);
+                            as_select = ast_create->select->formatWithSecretsHidden();
                         res_columns[res_index++]->insert(as_select);
                     }
                 }
@@ -420,7 +419,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST()))
-                        res_columns[res_index++]->insert(queryToString(expression_ptr));
+                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
                     else
                         res_columns[res_index++]->insertDefault();
                 }
@@ -428,7 +427,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast))
-                        res_columns[res_index++]->insert(queryToString(expression_ptr));
+                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
                     else
                         res_columns[res_index++]->insertDefault();
                 }
@@ -436,7 +435,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast))
-                        res_columns[res_index++]->insert(queryToString(expression_ptr));
+                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
                     else
                         res_columns[res_index++]->insertDefault();
                 }
@@ -444,7 +443,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST()))
-                        res_columns[res_index++]->insert(queryToString(expression_ptr));
+                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
                     else
                         res_columns[res_index++]->insertDefault();
                 }
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index 068f7ddce46..e82f7c9bb2b 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -23,6 +23,7 @@
 #include <Storages/System/StorageSystemGraphite.h>
 #include <Storages/System/StorageSystemMacros.h>
 #include <Storages/System/StorageSystemMerges.h>
+#include <Storages/System/StorageSystemMoves.h>
 #include <Storages/System/StorageSystemReplicatedFetches.h>
 #include <Storages/System/StorageSystemMetrics.h>
 #include <Storages/System/StorageSystemModels.h>
@@ -159,6 +160,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
     attach<StorageSystemProcesses>(context, system_database, "processes");
     attach<StorageSystemMetrics>(context, system_database, "metrics");
     attach<StorageSystemMerges>(context, system_database, "merges");
+    attach<StorageSystemMoves>(context, system_database, "moves");
     attach<StorageSystemMutations>(context, system_database, "mutations");
     attach<StorageSystemReplicas>(context, system_database, "replicas");
     attach<StorageSystemReplicationQueue>(context, system_database, "replication_queue");
diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp
index 200b8e637da..164b125a192 100644
--- a/src/Storages/registerStorages.cpp
+++ b/src/Storages/registerStorages.cpp
@@ -34,7 +34,7 @@ void registerStorageS3(StorageFactory & factory);
 void registerStorageCOS(StorageFactory & factory);
 void registerStorageOSS(StorageFactory & factory);
 void registerStorageHudi(StorageFactory & factory);
-void registerStorageDelta(StorageFactory & factory);
+void registerStorageDeltaLake(StorageFactory & factory);
 #endif
 
 #if USE_HDFS
@@ -123,7 +123,7 @@ void registerStorages()
     registerStorageCOS(factory);
     registerStorageOSS(factory);
     registerStorageHudi(factory);
-    registerStorageDelta(factory);
+    registerStorageDeltaLake(factory);
     #endif
 
     #if USE_HDFS
diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h
index 79c58083020..21c26062566 100644
--- a/src/TableFunctions/ITableFunction.h
+++ b/src/TableFunctions/ITableFunction.h
@@ -86,6 +86,16 @@ private:
 struct TableFunctionProperties
 {
     Documentation documentation;
+
+    /** It is determined by the possibility of modifying any data or making requests to arbitrary hostnames.
+      *
+      * If users can make a request to an arbitrary hostname, they can get the info from the internal network
+      * or manipulate internal APIs (say - put some data into Memcached, which is available only in the corporate network).
+      * This is named "SSRF attack".
+      * Or a user can use an open ClickHouse server to amplify DoS attacks.
+      *
+      * In those cases, the table function should not be allowed in readonly mode.
+      */
     bool allow_readonly = false;
 };
 
diff --git a/src/TableFunctions/TableFunctionDelta.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp
similarity index 87%
rename from src/TableFunctions/TableFunctionDelta.cpp
rename to src/TableFunctions/TableFunctionDeltaLake.cpp
index 25ea2aaa77f..f831d4ae609 100644
--- a/src/TableFunctions/TableFunctionDelta.cpp
+++ b/src/TableFunctions/TableFunctionDeltaLake.cpp
@@ -10,10 +10,10 @@
 #    include <Interpreters/evaluateConstantExpression.h>
 #    include <Interpreters/parseColumnsListForTableFunction.h>
 #    include <Parsers/ASTLiteral.h>
-#    include <Storages/StorageDelta.h>
+#    include <Storages/StorageDeltaLake.h>
 #    include <Storages/StorageURL.h>
 #    include <Storages/checkAndGetLiteralArgument.h>
-#    include <TableFunctions/TableFunctionDelta.h>
+#    include <TableFunctions/TableFunctionDeltaLake.h>
 #    include <TableFunctions/TableFunctionFactory.h>
 #    include "registerTableFunctions.h"
 
@@ -27,7 +27,7 @@ namespace ErrorCodes
 }
 
 
-void TableFunctionDelta::parseArgumentsImpl(
+void TableFunctionDeltaLake::parseArgumentsImpl(
     const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & base_configuration)
 {
     if (args.empty() || args.size() > 6)
@@ -100,7 +100,7 @@ void TableFunctionDelta::parseArgumentsImpl(
             = checkAndGetLiteralArgument<String>(args[args_to_idx["secret_access_key"]], "secret_access_key");
 }
 
-void TableFunctionDelta::parseArguments(const ASTPtr & ast_function, ContextPtr context)
+void TableFunctionDeltaLake::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     /// Parse args
     ASTs & args_func = ast_function->children;
@@ -125,28 +125,27 @@ void TableFunctionDelta::parseArguments(const ASTPtr & ast_function, ContextPtr
     parseArgumentsImpl(message, args, context, configuration);
 }
 
-ColumnsDescription TableFunctionDelta::getActualTableStructure(ContextPtr context) const
+ColumnsDescription TableFunctionDeltaLake::getActualTableStructure(ContextPtr context) const
 {
     if (configuration.structure == "auto")
     {
         context->checkAccess(getSourceAccessType());
-        return StorageS3::getTableStructureFromData(configuration, false, std::nullopt, context);
+        return StorageDeltaLake::getTableStructureFromData(configuration, std::nullopt, context);
     }
 
     return parseColumnsListFromString(configuration.structure, context);
 }
 
-StoragePtr TableFunctionDelta::executeImpl(
+StoragePtr TableFunctionDeltaLake::executeImpl(
     const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    Poco::URI uri(configuration.url);
-    S3::URI s3_uri(uri);
+    S3::URI s3_uri(configuration.url);
 
     ColumnsDescription columns;
     if (configuration.structure != "auto")
         columns = parseColumnsListFromString(configuration.structure, context);
 
-    StoragePtr storage = std::make_shared<StorageDelta>(
+    StoragePtr storage = std::make_shared<StorageDeltaLake>(
         configuration, StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, context, std::nullopt);
 
     storage->startup();
@@ -155,14 +154,14 @@ StoragePtr TableFunctionDelta::executeImpl(
 }
 
 
-void registerTableFunctionDelta(TableFunctionFactory & factory)
+void registerTableFunctionDeltaLake(TableFunctionFactory & factory)
 {
-    factory.registerFunction<TableFunctionDelta>(
+    factory.registerFunction<TableFunctionDeltaLake>(
         {.documentation
          = {R"(The table function can be used to read the DeltaLake table stored on object store.)",
-            Documentation::Examples{{"hudi", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)"}},
+            Documentation::Examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)"}},
             Documentation::Categories{"DataLake"}},
-         .allow_readonly = true});
+         .allow_readonly = false});
 }
 
 }
diff --git a/src/TableFunctions/TableFunctionDelta.h b/src/TableFunctions/TableFunctionDeltaLake.h
similarity index 95%
rename from src/TableFunctions/TableFunctionDelta.h
rename to src/TableFunctions/TableFunctionDeltaLake.h
index badfd63f431..e36ffc3847f 100644
--- a/src/TableFunctions/TableFunctionDelta.h
+++ b/src/TableFunctions/TableFunctionDeltaLake.h
@@ -16,7 +16,7 @@ class TableFunctionS3Cluster;
 
 /* deltaLake(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary DeltaLake table on S3.
  */
-class TableFunctionDelta : public ITableFunction
+class TableFunctionDeltaLake : public ITableFunction
 {
 public:
     static constexpr auto name = "deltaLake";
diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp
index 3d2a72ddc9e..ff001661000 100644
--- a/src/TableFunctions/TableFunctionFactory.cpp
+++ b/src/TableFunctions/TableFunctionFactory.cpp
@@ -3,6 +3,7 @@
 #include <Interpreters/Context.h>
 #include <Common/CurrentThread.h>
 #include <Common/Exception.h>
+#include <Common/KnownObjectNames.h>
 #include <IO/WriteHelpers.h>
 #include <Parsers/ASTFunction.h>
 
@@ -27,6 +28,8 @@ void TableFunctionFactory::registerFunction(
         && !case_insensitive_table_functions.emplace(Poco::toLower(name), value).second)
         throw Exception("TableFunctionFactory: the case insensitive table function name '" + name + "' is not unique",
                         ErrorCodes::LOGICAL_ERROR);
+
+    KnownTableFunctionNames::instance().add(name, (case_sensitiveness == CaseInsensitive));
 }
 
 TableFunctionPtr TableFunctionFactory::get(
diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp
index 9f239adb538..b15b350f00b 100644
--- a/src/TableFunctions/TableFunctionFormat.cpp
+++ b/src/TableFunctions/TableFunctionFormat.cpp
@@ -89,9 +89,72 @@ StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, Con
     return res;
 }
 
+static const Documentation format_table_function_documentation =
+{
+    R"(
+Extracts table structure from data and parses it according to specified input format.
+Syntax: `format(format_name, data)`.
+Parameters:
+    - `format_name` - the format of the data.
+    - `data ` - String literal or constant expression that returns a string containing data in specified format.
+Returned value: A table with data parsed from `data` argument according specified format and extracted schema.
+)",
+    Documentation::Examples
+    {
+        {
+            "First example",
+            R"(
+Query:
+```
+:) select * from format(JSONEachRow,
+$$
+{"a": "Hello", "b": 111}
+{"a": "World", "b": 123}
+{"a": "Hello", "b": 112}
+{"a": "World", "b": 124}
+$$)
+```
+
+Result:
+```
+┌───b─┬─a─────┐
+│ 111 │ Hello │
+│ 123 │ World │
+│ 112 │ Hello │
+│ 124 │ World │
+└─────┴───────┘
+```
+)"
+        },
+        {
+            "Second example",
+            R"(
+Query:
+```
+:) desc format(JSONEachRow,
+$$
+{"a": "Hello", "b": 111}
+{"a": "World", "b": 123}
+{"a": "Hello", "b": 112}
+{"a": "World", "b": 124}
+$$)
+```
+
+Result:
+```
+┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ b    │ Nullable(Float64) │              │                    │         │                  │                │
+│ a    │ Nullable(String)  │              │                    │         │                  │                │
+└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
+)"
+        },
+    },
+    Documentation::Categories{"format", "table-functions"}
+};
+
 void registerTableFunctionFormat(TableFunctionFactory & factory)
 {
-    factory.registerFunction<TableFunctionFormat>({}, TableFunctionFactory::CaseInsensitive);
+    factory.registerFunction<TableFunctionFormat>({format_table_function_documentation, false}, TableFunctionFactory::CaseInsensitive);
 }
-
 }
diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp
index b1db90da550..f39f3b515ec 100644
--- a/src/TableFunctions/TableFunctionHudi.cpp
+++ b/src/TableFunctions/TableFunctionHudi.cpp
@@ -130,7 +130,7 @@ ColumnsDescription TableFunctionHudi::getActualTableStructure(ContextPtr context
     if (configuration.structure == "auto")
     {
         context->checkAccess(getSourceAccessType());
-        return StorageS3::getTableStructureFromData(configuration, false, std::nullopt, context);
+        return StorageHudi::getTableStructureFromData(configuration, std::nullopt, context);
     }
 
     return parseColumnsListFromString(configuration.structure, context);
@@ -139,8 +139,7 @@ ColumnsDescription TableFunctionHudi::getActualTableStructure(ContextPtr context
 StoragePtr TableFunctionHudi::executeImpl(
     const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    Poco::URI uri(configuration.url);
-    S3::URI s3_uri(uri);
+    S3::URI s3_uri(configuration.url);
 
     ColumnsDescription columns;
     if (configuration.structure != "auto")
@@ -162,7 +161,7 @@ void registerTableFunctionHudi(TableFunctionFactory & factory)
          = {R"(The table function can be used to read the Hudi table stored on object store.)",
             Documentation::Examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)"}},
             Documentation::Categories{"DataLake"}},
-         .allow_readonly = true});
+         .allow_readonly = false});
 }
 }
 
diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp
index 962afed8c11..b88b7fda063 100644
--- a/src/TableFunctions/TableFunctionMongoDB.cpp
+++ b/src/TableFunctions/TableFunctionMongoDB.cpp
@@ -1,3 +1,5 @@
+#include <TableFunctions/TableFunctionMongoDB.h>
+
 #include <Common/Exception.h>
 
 #include <Interpreters/evaluateConstantExpression.h>
@@ -7,7 +9,6 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTIdentifier.h>
 
-#include <TableFunctions/TableFunctionMongoDB.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <TableFunctions/registerTableFunctions.h>
diff --git a/src/TableFunctions/TableFunctionMongoDB.h b/src/TableFunctions/TableFunctionMongoDB.h
index 40e4802e9e6..dd62bf4b2b4 100644
--- a/src/TableFunctions/TableFunctionMongoDB.h
+++ b/src/TableFunctions/TableFunctionMongoDB.h
@@ -1,8 +1,8 @@
 #pragma once
 
+#include <Storages/StorageMongoDB.h>
 #include <TableFunctions/ITableFunction.h>
 #include <Storages/ExternalDataSourceConfiguration.h>
-#include <Storages/StorageMongoDB.h>
 
 namespace DB
 {
diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index be6dc6d28c5..3b68a0766aa 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -153,8 +153,7 @@ bool TableFunctionS3::supportsReadingSubsetOfColumns()
 
 StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    Poco::URI uri(configuration.url);
-    S3::URI s3_uri(uri);
+    S3::URI s3_uri (configuration.url);
 
     ColumnsDescription columns;
     if (configuration.structure != "auto")
diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp
index e6c32766559..95892d36b18 100644
--- a/src/TableFunctions/registerTableFunctions.cpp
+++ b/src/TableFunctions/registerTableFunctions.cpp
@@ -28,7 +28,7 @@ void registerTableFunctions()
     registerTableFunctionS3Cluster(factory);
     registerTableFunctionCOS(factory);
     registerTableFunctionHudi(factory);
-    registerTableFunctionDelta(factory);
+    registerTableFunctionDeltaLake(factory);
     registerTableFunctionOSS(factory);
 
 #endif
diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h
index 12a26bec70a..5f91205474e 100644
--- a/src/TableFunctions/registerTableFunctions.h
+++ b/src/TableFunctions/registerTableFunctions.h
@@ -25,7 +25,7 @@ void registerTableFunctionS3(TableFunctionFactory & factory);
 void registerTableFunctionS3Cluster(TableFunctionFactory & factory);
 void registerTableFunctionCOS(TableFunctionFactory & factory);
 void registerTableFunctionHudi(TableFunctionFactory & factory);
-void registerTableFunctionDelta(TableFunctionFactory & factory);
+void registerTableFunctionDeltaLake(TableFunctionFactory & factory);
 void registerTableFunctionOSS(TableFunctionFactory & factory);
 #endif
 
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/Dockerfile b/tests/ci/cancel_and_rerun_workflow_lambda/Dockerfile
deleted file mode 100644
index 0d50224c51d..00000000000
--- a/tests/ci/cancel_and_rerun_workflow_lambda/Dockerfile
+++ /dev/null
@@ -1,13 +0,0 @@
-FROM public.ecr.aws/lambda/python:3.9
-
-# Install the function's dependencies using file requirements.txt
-# from your project folder.
-
-COPY requirements.txt  .
-RUN  pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
-
-# Copy function code
-COPY app.py ${LAMBDA_TASK_ROOT}
-
-# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
-CMD [ "app.handler" ]
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index 21a5ce517f6..6d63aaa141e 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+from base64 import b64decode
 from collections import namedtuple
 from typing import Any, Dict, List
 from threading import Thread
@@ -19,26 +20,25 @@ NEED_RERUN_OR_CANCELL_WORKFLOWS = {
     "BackportPR",
 }
 
-# https://docs.github.com/en/rest/reference/actions#cancel-a-workflow-run
-#
-API_URL = os.getenv("API_URL", "https://api.github.com/repos/ClickHouse/ClickHouse")
-
 MAX_RETRY = 5
 
 DEBUG_INFO = {}  # type: Dict[str, Any]
 
 
 class Worker(Thread):
-    def __init__(self, request_queue: Queue, ignore_exception: bool = False):
+    def __init__(
+        self, request_queue: Queue, token: str, ignore_exception: bool = False
+    ):
         Thread.__init__(self)
         self.queue = request_queue
+        self.token = token
         self.ignore_exception = ignore_exception
         self.response = {}  # type: Dict
 
     def run(self):
         m = self.queue.get()
         try:
-            self.response = _exec_get_with_retry(m)
+            self.response = _exec_get_with_retry(m, self.token)
         except Exception as e:
             if not self.ignore_exception:
                 raise
@@ -98,10 +98,11 @@ def get_token_from_aws():
     return get_access_token(encoded_jwt, installation_id)
 
 
-def _exec_get_with_retry(url):
+def _exec_get_with_retry(url: str, token: str) -> dict:
+    headers = {"Authorization": f"token {token}"}
     for i in range(MAX_RETRY):
         try:
-            response = requests.get(url)
+            response = requests.get(url, headers=headers)
             response.raise_for_status()
             return response.json()
         except Exception as ex:
@@ -113,23 +114,25 @@ def _exec_get_with_retry(url):
 
 WorkflowDescription = namedtuple(
     "WorkflowDescription",
-    ["run_id", "head_sha", "status", "rerun_url", "cancel_url", "conclusion"],
+    ["url", "run_id", "head_sha", "status", "rerun_url", "cancel_url", "conclusion"],
 )
 
 
 def get_workflows_description_for_pull_request(
     pull_request_event,
+    token,
 ) -> List[WorkflowDescription]:
     head_repo = pull_request_event["head"]["repo"]["full_name"]
     head_branch = pull_request_event["head"]["ref"]
     print("PR", pull_request_event["number"], "has head ref", head_branch)
 
     workflows_data = []
-    request_url = f"{API_URL}/actions/runs?per_page=100"
+    repo_url = pull_request_event["base"]["repo"]["url"]
+    request_url = f"{repo_url}/actions/runs?per_page=100"
     # Get all workflows for the current branch
     for i in range(1, 11):
         workflows = _exec_get_with_retry(
-            f"{request_url}&event=pull_request&branch={head_branch}&page={i}"
+            f"{request_url}&event=pull_request&branch={head_branch}&page={i}", token
         )
         if not workflows["workflow_runs"]:
             break
@@ -164,6 +167,7 @@ def get_workflows_description_for_pull_request(
         ):
             workflow_descriptions.append(
                 WorkflowDescription(
+                    url=workflow["url"],
                     run_id=workflow["id"],
                     head_sha=workflow["head_sha"],
                     status=workflow["status"],
@@ -176,19 +180,22 @@ def get_workflows_description_for_pull_request(
     return workflow_descriptions
 
 
-def get_workflow_description_fallback(pull_request_event) -> List[WorkflowDescription]:
+def get_workflow_description_fallback(
+    pull_request_event, token
+) -> List[WorkflowDescription]:
     head_repo = pull_request_event["head"]["repo"]["full_name"]
     head_branch = pull_request_event["head"]["ref"]
     print("Get last 500 workflows from API to search related there")
     # Fallback for a case of an already deleted branch and no workflows received
-    request_url = f"{API_URL}/actions/runs?per_page=100"
+    repo_url = pull_request_event["base"]["repo"]["url"]
+    request_url = f"{repo_url}/actions/runs?per_page=100"
     q = Queue()  # type: Queue
     workers = []
     workflows_data = []
     i = 1
     for i in range(1, 6):
         q.put(f"{request_url}&page={i}")
-        worker = Worker(q, True)
+        worker = Worker(q, token, True)
         worker.start()
         workers.append(worker)
 
@@ -220,6 +227,7 @@ def get_workflow_description_fallback(pull_request_event) -> List[WorkflowDescri
 
     workflow_descriptions = [
         WorkflowDescription(
+            url=wf["url"],
             run_id=wf["id"],
             head_sha=wf["head_sha"],
             status=wf["status"],
@@ -233,9 +241,10 @@ def get_workflow_description_fallback(pull_request_event) -> List[WorkflowDescri
     return workflow_descriptions
 
 
-def get_workflow_description(workflow_id) -> WorkflowDescription:
-    workflow = _exec_get_with_retry(API_URL + f"/actions/runs/{workflow_id}")
+def get_workflow_description(workflow_url, token) -> WorkflowDescription:
+    workflow = _exec_get_with_retry(workflow_url, token)
     return WorkflowDescription(
+        url=workflow["url"],
         run_id=workflow["id"],
         head_sha=workflow["head_sha"],
         status=workflow["status"],
@@ -268,8 +277,11 @@ def exec_workflow_url(urls_to_cancel, token):
 
 def main(event):
     token = get_token_from_aws()
-    DEBUG_INFO["event_body"] = event["body"]
-    event_data = json.loads(event["body"])
+    DEBUG_INFO["event"] = event
+    if event["isBase64Encoded"]:
+        event_data = json.loads(b64decode(event["body"]))
+    else:
+        event_data = json.loads(event["body"])
 
     print("Got event for PR", event_data["number"])
     action = event_data["action"]
@@ -279,9 +291,12 @@ def main(event):
     print("PR has labels", labels)
     if action == "closed" or "do not test" in labels:
         print("PR merged/closed or manually labeled 'do not test' will kill workflows")
-        workflow_descriptions = get_workflows_description_for_pull_request(pull_request)
+        workflow_descriptions = get_workflows_description_for_pull_request(
+            pull_request, token
+        )
         workflow_descriptions = (
-            workflow_descriptions or get_workflow_description_fallback(pull_request)
+            workflow_descriptions
+            or get_workflow_description_fallback(pull_request, token)
         )
         urls_to_cancel = []
         for workflow_description in workflow_descriptions:
@@ -294,9 +309,12 @@ def main(event):
         exec_workflow_url(urls_to_cancel, token)
     elif action == "synchronize":
         print("PR is synchronized, going to stop old actions")
-        workflow_descriptions = get_workflows_description_for_pull_request(pull_request)
+        workflow_descriptions = get_workflows_description_for_pull_request(
+            pull_request, token
+        )
         workflow_descriptions = (
-            workflow_descriptions or get_workflow_description_fallback(pull_request)
+            workflow_descriptions
+            or get_workflow_description_fallback(pull_request, token)
         )
         urls_to_cancel = []
         for workflow_description in workflow_descriptions:
@@ -308,11 +326,14 @@ def main(event):
                 urls_to_cancel.append(workflow_description.cancel_url)
         print(f"Found {len(urls_to_cancel)} workflows to cancel")
         exec_workflow_url(urls_to_cancel, token)
-    elif action == "labeled" and "can be tested" in labels:
+    elif action == "labeled" and event_data["label"]["name"] == "can be tested":
         print("PR marked with can be tested label, rerun workflow")
-        workflow_descriptions = get_workflows_description_for_pull_request(pull_request)
+        workflow_descriptions = get_workflows_description_for_pull_request(
+            pull_request, token
+        )
         workflow_descriptions = (
-            workflow_descriptions or get_workflow_description_fallback(pull_request)
+            workflow_descriptions
+            or get_workflow_description_fallback(pull_request, token)
         )
         if not workflow_descriptions:
             print("Not found any workflows")
@@ -330,7 +351,10 @@ def main(event):
             print("Cancelled")
 
         for _ in range(45):
-            latest_workflow_desc = get_workflow_description(most_recent_workflow.run_id)
+            # If the number of retries is changed: tune the lambda limits accordingly
+            latest_workflow_desc = get_workflow_description(
+                most_recent_workflow.url, token
+            )
             print("Checking latest workflow", latest_workflow_desc)
             if latest_workflow_desc.status in ("completed", "cancelled"):
                 print("Finally latest workflow done, going to rerun")
@@ -347,6 +371,12 @@ def main(event):
 def handler(event, _):
     try:
         main(event)
+
+        return {
+            "statusCode": 200,
+            "headers": {"Content-Type": "application/json"},
+            "body": '{"status": "OK"}',
+        }
     finally:
         for name, value in DEBUG_INFO.items():
             print(f"Value of {name}: ", value)
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/build_and_deploy_archive.sh b/tests/ci/cancel_and_rerun_workflow_lambda/build_and_deploy_archive.sh
new file mode 120000
index 00000000000..96ba3fa024e
--- /dev/null
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/build_and_deploy_archive.sh
@@ -0,0 +1 @@
+../team_keys_lambda/build_and_deploy_archive.sh
\ No newline at end of file
diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py
similarity index 76%
rename from tests/ci/metrics_lambda/app.py
rename to tests/ci/ci_runners_metrics_lambda/app.py
index 4a1921bf312..c1b20beb599 100644
--- a/tests/ci/metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@@ -1,16 +1,34 @@
 #!/usr/bin/env python3
+"""
+Lambda function to:
+    - calculate number of running runners
+    - cleaning dead runners from GitHub
+    - terminating stale lost runners in EC2
+"""
 
 import argparse
 import sys
 import json
 import time
 from collections import namedtuple
+from datetime import datetime
 
 import jwt
 import requests
 import boto3
 from botocore.exceptions import ClientError
 
+UNIVERSAL_LABEL = "universal"
+RUNNER_TYPE_LABELS = [
+    "builder",
+    "func-tester",
+    "func-tester-aarch64",
+    "fuzzer-unit-tester",
+    "stress-tester",
+    "style-checker",
+    "style-checker-aarch64",
+]
+
 
 def get_dead_runners_in_ec2(runners):
     ids = {
@@ -74,9 +92,45 @@ def get_dead_runners_in_ec2(runners):
     return result_to_delete
 
 
-def get_key_and_app_from_aws():
-    import boto3
+def get_lost_ec2_instances(runners):
+    client = boto3.client("ec2")
+    reservations = client.describe_instances(
+        Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}]
+    )["Reservations"]
+    lost_instances = []
+    # Here we refresh the runners to get the most recent state
+    now = datetime.now().timestamp()
 
+    for reservation in reservations:
+        for instance in reservation["Instances"]:
+            # Do not consider instances started 20 minutes ago as problematic
+            if now - instance["LaunchTime"].timestamp() < 1200:
+                continue
+
+            runner_type = [
+                tag["Value"]
+                for tag in instance["Tags"]
+                if tag["Key"] == "github:runner-type"
+            ][0]
+            # If there's no necessary labels in runner type it's fine
+            if not (
+                UNIVERSAL_LABEL in runner_type or runner_type in RUNNER_TYPE_LABELS
+            ):
+                continue
+
+            if instance["State"]["Name"] == "running" and (
+                not [
+                    runner
+                    for runner in runners
+                    if runner.name == instance["InstanceId"]
+                ]
+            ):
+                lost_instances.append(instance)
+
+    return lost_instances
+
+
+def get_key_and_app_from_aws():
     secret_name = "clickhouse_github_secret_key"
     session = boto3.session.Session()
     client = session.client(
@@ -130,21 +184,25 @@ def list_runners(access_token):
         "Authorization": f"token {access_token}",
         "Accept": "application/vnd.github.v3+json",
     }
+    per_page = 100
     response = requests.get(
-        "https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100",
+        f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}",
         headers=headers,
     )
     response.raise_for_status()
     data = response.json()
     total_runners = data["total_count"]
+    print("Expected total runners", total_runners)
     runners = data["runners"]
 
-    total_pages = int(total_runners / 100 + 1)
+    # round to 0 for 0, 1 for 1..100, but to 2 for 101..200
+    total_pages = (total_runners - 1) // per_page + 1
+
     print("Total pages", total_pages)
     for i in range(2, total_pages + 1):
         response = requests.get(
             "https://api.github.com/orgs/ClickHouse/actions/runners"
-            f"?page={i}&per_page=100",
+            f"?page={i}&per_page={per_page}",
             headers=headers,
         )
         response.raise_for_status()
@@ -170,26 +228,23 @@ def list_runners(access_token):
 def group_runners_by_tag(listed_runners):
     result = {}
 
-    RUNNER_TYPE_LABELS = [
-        "builder",
-        "func-tester",
-        "func-tester-aarch64",
-        "fuzzer-unit-tester",
-        "stress-tester",
-        "style-checker",
-        "style-checker-aarch64",
-    ]
+    def add_to_result(tag, runner):
+        if tag not in result:
+            result[tag] = []
+        result[tag].append(runner)
+
     for runner in listed_runners:
+        if UNIVERSAL_LABEL in runner.tags:
+            # Do not proceed other labels if UNIVERSAL_LABEL is included
+            add_to_result(UNIVERSAL_LABEL, runner)
+            continue
+
         for tag in runner.tags:
             if tag in RUNNER_TYPE_LABELS:
-                if tag not in result:
-                    result[tag] = []
-                result[tag].append(runner)
+                add_to_result(tag, runner)
                 break
         else:
-            if "unlabeled" not in result:
-                result["unlabeled"] = []
-            result["unlabeled"].append(runner)
+            add_to_result("unlabeled", runner)
     return result
 
 
@@ -263,8 +318,8 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru
     encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
     installation_id = get_installation_id(encoded_jwt)
     access_token = get_access_token(encoded_jwt, installation_id)
-    runners = list_runners(access_token)
-    grouped_runners = group_runners_by_tag(runners)
+    gh_runners = list_runners(access_token)
+    grouped_runners = group_runners_by_tag(gh_runners)
     for group, group_runners in grouped_runners.items():
         if push_to_cloudwatch:
             print(group)
@@ -276,11 +331,18 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru
 
     if delete_offline_runners:
         print("Going to delete offline runners")
-        dead_runners = get_dead_runners_in_ec2(runners)
+        dead_runners = get_dead_runners_in_ec2(gh_runners)
         for runner in dead_runners:
             print("Deleting runner", runner)
             delete_runner(access_token, runner)
 
+        lost_instances = get_lost_ec2_instances(gh_runners)
+        if lost_instances:
+            print("Going to terminate lost runners")
+            ids = [i["InstanceId"] for i in lost_instances]
+            print("Terminating runners:", ids)
+            boto3.client("ec2").terminate_instances(InstanceIds=ids)
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Get list of runners and their states")
diff --git a/tests/ci/ci_runners_metrics_lambda/build_and_deploy_archive.sh b/tests/ci/ci_runners_metrics_lambda/build_and_deploy_archive.sh
new file mode 120000
index 00000000000..96ba3fa024e
--- /dev/null
+++ b/tests/ci/ci_runners_metrics_lambda/build_and_deploy_archive.sh
@@ -0,0 +1 @@
+../team_keys_lambda/build_and_deploy_archive.sh
\ No newline at end of file
diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/ci_runners_metrics_lambda/requirements.txt
similarity index 100%
rename from tests/ci/metrics_lambda/requirements.txt
rename to tests/ci/ci_runners_metrics_lambda/requirements.txt
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index fb7228628fd..873aee9aabf 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -441,11 +441,15 @@ def main():
 
     result_images = {}
     images_processing_result = []
+    additional_cache = ""
+    if pr_info.release_pr or pr_info.merged_pr:
+        additional_cache = str(pr_info.release_pr or pr_info.merged_pr)
+
     for image in changed_images:
         # If we are in backport PR, then pr_info.release_pr is defined
         # We use it as tag to reduce rebuilding time
         images_processing_result += process_image_with_parents(
-            image, image_versions, pr_info.release_pr, args.push
+            image, image_versions, additional_cache, args.push
         )
         result_images[image.repo] = result_version
 
diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py
index a18f47497fd..ab0c3c6f688 100644
--- a/tests/ci/env_helper.py
+++ b/tests/ci/env_helper.py
@@ -42,11 +42,13 @@ def GITHUB_JOB_ID() -> str:
     if _GITHUB_JOB_ID:
         return _GITHUB_JOB_ID
     jobs = []
+    page = 1
     while not _GITHUB_JOB_ID:
         response = get_with_retries(
             f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
-            f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100"
+            f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
         )
+        page += 1
         data = response.json()
         jobs.extend(data["jobs"])
         for job in data["jobs"]:
@@ -55,7 +57,10 @@ def GITHUB_JOB_ID() -> str:
             _GITHUB_JOB_ID = job["id"]
             _GITHUB_JOB_URL = job["html_url"]
             return _GITHUB_JOB_ID
-        if len(jobs) == data["total_count"]:
+        if (
+            len(jobs) >= data["total_count"]  # just in case of inconsistency
+            or len(data["jobs"]) == 0  # if we excided pages
+        ):
             _GITHUB_JOB_ID = "0"
 
     return _GITHUB_JOB_ID
diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py
index 4fb8cb8f49f..163e1ce071e 100644
--- a/tests/ci/get_robot_token.py
+++ b/tests/ci/get_robot_token.py
@@ -1,4 +1,6 @@
 #!/usr/bin/env python3
+import logging
+
 import boto3  # type: ignore
 from github import Github  # type: ignore
 
@@ -9,14 +11,30 @@ def get_parameter_from_ssm(name, decrypt=True, client=None):
     return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
 
 
-def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4):
+def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
     client = boto3.client("ssm", region_name="us-east-1")
-    tokens = {}
-    for i in range(1, total_tokens + 1):
-        token_name = token_prefix_env_name + str(i)
-        token = get_parameter_from_ssm(token_name, True, client)
-        gh = Github(token, per_page=100)
-        rest, _ = gh.rate_limiting
-        tokens[token] = rest
+    parameters = client.describe_parameters(
+        ParameterFilters=[
+            {"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]}
+        ]
+    )["Parameters"]
+    assert parameters
+    token = {"login": "", "value": "", "rest": 0}
 
-    return max(tokens.items(), key=lambda x: x[1])[0]
+    for token_name in [p["Name"] for p in parameters]:
+        value = get_parameter_from_ssm(token_name, True, client)
+        gh = Github(value, per_page=100)
+        # Do not spend additional request to API by accessin user.login unless
+        # the token is chosen by the remaining requests number
+        user = gh.get_user()
+        rest, _ = gh.rate_limiting
+        logging.info("Get token with %s remaining requests", rest)
+        if token["rest"] < rest:
+            token = {"user": user, "value": value, "rest": rest}
+
+    assert token["value"]
+    logging.info(
+        "User %s with %s remaining requests is used", token["user"].login, token["rest"]
+    )
+
+    return token["value"]
diff --git a/tests/ci/metrics_lambda/Dockerfile b/tests/ci/metrics_lambda/Dockerfile
deleted file mode 100644
index 0d50224c51d..00000000000
--- a/tests/ci/metrics_lambda/Dockerfile
+++ /dev/null
@@ -1,13 +0,0 @@
-FROM public.ecr.aws/lambda/python:3.9
-
-# Install the function's dependencies using file requirements.txt
-# from your project folder.
-
-COPY requirements.txt  .
-RUN  pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
-
-# Copy function code
-COPY app.py ${LAMBDA_TASK_ROOT}
-
-# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
-CMD [ "app.handler" ]
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 78cf9fad001..acde5be5814 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -112,6 +112,16 @@ if __name__ == "__main__":
     else:
         check_name_with_group = check_name
 
+    is_aarch64 = "aarch64" in os.getenv("CHECK_NAME", "Performance Comparison").lower()
+    if pr_info.number != 0 and is_aarch64 and "pr-performance" not in pr_info.labels:
+        status = "success"
+        message = "Skipped, not labeled with 'pr-performance'"
+        report_url = GITHUB_RUN_URL
+        post_commit_status(
+            gh, pr_info.sha, check_name_with_group, message, status, report_url
+        )
+        sys.exit(0)
+
     test_grep_exclude_filter = CI_CONFIG["tests_config"][check_name][
         "test_grep_exclude_filter"
     ]
diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index 5f725a61b3e..6a2fac0a291 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -64,6 +64,7 @@ def get_pr_for_commit(sha, ref):
 class PRInfo:
     default_event = {
         "commits": 1,
+        "head_commit": {"message": "commit_message"},
         "before": "HEAD~",
         "after": "HEAD",
         "ref": None,
@@ -86,7 +87,9 @@ class PRInfo:
         self.changed_files = set()  # type: Set[str]
         self.body = ""
         self.diff_urls = []
+        # release_pr and merged_pr are used for docker images additional cache
         self.release_pr = 0
+        self.merged_pr = 0
         ref = github_event.get("ref", "refs/heads/master")
         if ref and ref.startswith("refs/heads/"):
             ref = ref[11:]
@@ -158,6 +161,14 @@ class PRInfo:
 
             self.diff_urls.append(github_event["pull_request"]["diff_url"])
         elif "commits" in github_event:
+            # `head_commit` always comes with `commits`
+            commit_message = github_event["head_commit"]["message"]
+            if commit_message.startswith("Merge pull request #"):
+                merged_pr = commit_message.split(maxsplit=4)[3]
+                try:
+                    self.merged_pr = int(merged_pr[1:])
+                except ValueError:
+                    logging.error("Failed to convert %s to integer", merged_pr)
             self.sha = github_event["after"]
             pull_request = get_pr_for_commit(self.sha, github_event["ref"])
             repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
diff --git a/tests/ci/release.py b/tests/ci/release.py
index fd4bda3eae4..8024091e300 100755
--- a/tests/ci/release.py
+++ b/tests/ci/release.py
@@ -1,5 +1,14 @@
 #!/usr/bin/env python
 
+"""
+script to create releases for ClickHouse
+
+The `gh` CLI prefered over the PyGithub to have an easy way to rollback bad
+release in command line by simple execution giving rollback commands
+
+On another hand, PyGithub is used for convenient getting commit's status from API
+"""
+
 
 from contextlib import contextmanager
 from typing import List, Optional
@@ -8,6 +17,8 @@ import logging
 import subprocess
 
 from git_helper import commit, release_branch
+from github_helper import GitHub
+from mark_release_ready import RELEASE_READY_STATUS
 from version_helper import (
     FILE_WITH_VERSION_PATH,
     GENERATED_CONTRIBUTORS,
@@ -67,12 +78,12 @@ class Release:
         self._release_branch = ""
         self._rollback_stack = []  # type: List[str]
 
-    def run(self, cmd: str, cwd: Optional[str] = None) -> str:
+    def run(self, cmd: str, cwd: Optional[str] = None, **kwargs) -> str:
         cwd_text = ""
         if cwd:
             cwd_text = f" (CWD='{cwd}')"
         logging.info("Running command%s:\n    %s", cwd_text, cmd)
-        return self._git.run(cmd, cwd)
+        return self._git.run(cmd, cwd, **kwargs)
 
     def set_release_branch(self):
         # Fetch release commit in case it does not exist locally
@@ -94,6 +105,38 @@ class Release:
             return VersionType.LTS
         return VersionType.STABLE
 
+    def check_commit_release_ready(self):
+        # First, get the auth token from gh cli
+        auth_status = self.run(
+            "gh auth status -t", stderr=subprocess.STDOUT
+        ).splitlines()
+        token = ""
+        for line in auth_status:
+            if "✓ Token:" in line:
+                token = line.split()[-1]
+        if not token:
+            logging.error("Can not extract token from `gh auth`")
+            raise subprocess.SubprocessError("Can not extract token from `gh auth`")
+        gh = GitHub(token, per_page=100)
+        repo = gh.get_repo(str(self.repo))
+
+        # Statuses are ordered by descending updated_at, so the first necessary
+        # status in the list is the most recent
+        statuses = repo.get_commit(self.release_commit).get_statuses()
+        for status in statuses:
+            if status.context == RELEASE_READY_STATUS:
+                if status.state == "success":
+                    return
+
+                raise Exception(
+                    f"the status {RELEASE_READY_STATUS} is {status.state}, not success"
+                )
+
+        raise Exception(
+            f"the status {RELEASE_READY_STATUS} "
+            f"is not found for commit {self.release_commit}"
+        )
+
     def check_prerequisites(self):
         """
         Check tooling installed in the system, `git` is checked by Git() init
@@ -108,6 +151,8 @@ class Release:
             )
             raise
 
+        self.check_commit_release_ready()
+
     def do(self, check_dirty: bool, check_branch: bool, with_release_branch: bool):
         self.check_prerequisites()
 
diff --git a/tests/ci/token_lambda/app.py b/tests/ci/runner_token_rotation_lambda/app.py
similarity index 98%
rename from tests/ci/token_lambda/app.py
rename to tests/ci/runner_token_rotation_lambda/app.py
index b8e54ed4e8d..70ee5da01f4 100644
--- a/tests/ci/token_lambda/app.py
+++ b/tests/ci/runner_token_rotation_lambda/app.py
@@ -1,12 +1,14 @@
 #!/usr/bin/env python3
 
-import requests
 import argparse
-import jwt
 import sys
 import json
 import time
 
+import boto3  # type: ignore
+import jwt
+import requests  # type: ignore
+
 
 def get_installation_id(jwt_token):
     headers = {
@@ -51,8 +53,6 @@ def get_runner_registration_token(access_token):
 
 
 def get_key_and_app_from_aws():
-    import boto3
-
     secret_name = "clickhouse_github_secret_key"
     session = boto3.session.Session()
     client = session.client(
diff --git a/tests/ci/runner_token_rotation_lambda/build_and_deploy_archive.sh b/tests/ci/runner_token_rotation_lambda/build_and_deploy_archive.sh
new file mode 120000
index 00000000000..96ba3fa024e
--- /dev/null
+++ b/tests/ci/runner_token_rotation_lambda/build_and_deploy_archive.sh
@@ -0,0 +1 @@
+../team_keys_lambda/build_and_deploy_archive.sh
\ No newline at end of file
diff --git a/tests/ci/token_lambda/requirements.txt b/tests/ci/runner_token_rotation_lambda/requirements.txt
similarity index 100%
rename from tests/ci/token_lambda/requirements.txt
rename to tests/ci/runner_token_rotation_lambda/requirements.txt
diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index defa400453f..1ea2935c445 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -2,10 +2,13 @@
 set -xeo pipefail
 
 WORKDIR=$(dirname "$0")
+WORKDIR=$(readlink -f "${WORKDIR}")
 cd "$WORKDIR"
 
-PY_EXEC=python3.9
-LAMBDA_NAME=$(basename "$PWD")
+PY_VERSION=3.9
+PY_EXEC="python${PY_VERSION}"
+DOCKER_IMAGE="python:${PY_VERSION}-slim"
+LAMBDA_NAME=$(basename "$WORKDIR")
 LAMBDA_NAME=${LAMBDA_NAME//_/-}
 PACKAGE=lambda-package
 rm -rf "$PACKAGE" "$PACKAGE".zip
@@ -14,10 +17,12 @@ cp app.py "$PACKAGE"
 if [ -f requirements.txt ]; then
   VENV=lambda-venv
   rm -rf "$VENV" lambda-package.zip
-  "$PY_EXEC" -m venv "$VENV"
-  # shellcheck disable=SC1091
-  source "$VENV/bin/activate"
-  pip install -r requirements.txt
+  docker run --rm --user="${UID}" --volume="${WORKDIR}:/lambda" --workdir="/lambda" "${DOCKER_IMAGE}" \
+    /bin/bash -c "
+      '$PY_EXEC' -m venv '$VENV' &&
+      source '$VENV/bin/activate' &&
+      pip install -r requirements.txt
+    "
   cp -rT "$VENV/lib/$PY_EXEC/site-packages/" "$PACKAGE"
   rm -r "$PACKAGE"/{pip,pip-*,setuptools,setuptools-*}
 fi
diff --git a/tests/ci/token_lambda/Dockerfile b/tests/ci/token_lambda/Dockerfile
deleted file mode 100644
index 0d50224c51d..00000000000
--- a/tests/ci/token_lambda/Dockerfile
+++ /dev/null
@@ -1,13 +0,0 @@
-FROM public.ecr.aws/lambda/python:3.9
-
-# Install the function's dependencies using file requirements.txt
-# from your project folder.
-
-COPY requirements.txt  .
-RUN  pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
-
-# Copy function code
-COPY app.py ${LAMBDA_TASK_ROOT}
-
-# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
-CMD [ "app.handler" ]
diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh
index 66a38a6a37d..64f11b41777 100644
--- a/tests/ci/worker/init_runner.sh
+++ b/tests/ci/worker/init_runner.sh
@@ -46,15 +46,17 @@ curl "${TEAM_KEYS_URL}" > /home/ubuntu/.ssh/authorized_keys2
 chown ubuntu: /home/ubuntu/.ssh -R
 
 
-# Create a pre-run script that will restart docker daemon before the job started
+# Create a pre-run script that will provide diagnostics info
 mkdir -p /tmp/actions-hooks
-cat > /tmp/actions-hooks/pre-run.sh << 'EOF'
+cat > /tmp/actions-hooks/pre-run.sh << EOF
 #!/bin/bash
-set -xuo pipefail
+set -uo pipefail
 
 echo "Runner's public DNS: $(ec2metadata --public-hostname)"
+echo "Runner's labels: ${LABELS}"
 EOF
 
+# Create a post-run script that will restart docker daemon before the job started
 cat > /tmp/actions-hooks/post-run.sh << 'EOF'
 #!/bin/bash
 set -xuo pipefail
diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index f2b785840d8..23e808b0861 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -491,6 +491,12 @@ def main(event):
 def handler(event, _):
     try:
         main(event)
+
+        return {
+            "statusCode": 200,
+            "headers": {"Content-Type": "application/json"},
+            "body": '{"status": "OK"}',
+        }
     except Exception:
         print("Received event: ", event)
         raise
diff --git a/tests/config/config.d/clusters.xml b/tests/config/config.d/clusters.xml
index 5d14bc7e980..9d58606c02f 100644
--- a/tests/config/config.d/clusters.xml
+++ b/tests/config/config.d/clusters.xml
@@ -57,5 +57,20 @@
                 </replica>
             </shard>
         </test_cluster_interserver_secret>
+        <test_cluster_two_shards_different_databases_with_local>
+            <shard>
+                <replica>
+                    <host>127.0.0.1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <default_database>shard_1</default_database>
+                    <host>127.0.0.2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster_two_shards_different_databases_with_local>
     </remote_servers>
 </clickhouse>
diff --git a/tests/config/config.d/compressed_marks_and_index.xml b/tests/config/config.d/compressed_marks_and_index.xml
new file mode 100644
index 00000000000..ba8bdfe9658
--- /dev/null
+++ b/tests/config/config.d/compressed_marks_and_index.xml
@@ -0,0 +1,6 @@
+<clickhouse>
+  <merge_tree>
+    <compress_marks>true</compress_marks>
+    <compress_primary_key>true</compress_primary_key>
+  </merge_tree>
+</clickhouse>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 51865665f59..8963860e465 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -52,6 +52,7 @@ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/con
 ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/
+ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/
 
 # Not supported with fasttest.
 if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ]
diff --git a/tests/config/users.d/access_management.xml b/tests/config/users.d/access_management.xml
index 3399ef5915a..8f4d82805be 100644
--- a/tests/config/users.d/access_management.xml
+++ b/tests/config/users.d/access_management.xml
@@ -2,6 +2,7 @@
     <users>
         <default>
             <access_management>1</access_management>
+            <show_named_collections>1</show_named_collections>
         </default>
     </users>
 </clickhouse>
diff --git a/tests/config/users.d/insert_keeper_retries.xml b/tests/config/users.d/insert_keeper_retries.xml
new file mode 100644
index 00000000000..462c9df5248
--- /dev/null
+++ b/tests/config/users.d/insert_keeper_retries.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <insert_keeper_max_retries>20</insert_keeper_max_retries>
+            <insert_keeper_fault_injection_probability>0.01</insert_keeper_fault_injection_probability>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict
index e4f347babf8..e2668d7d093 100644
--- a/tests/fuzz/dictionaries/functions.dict
+++ b/tests/fuzz/dictionaries/functions.dict
@@ -249,7 +249,7 @@
 "cosh"
 "basename"
 "evalMLMethod"
-"filesystemFree"
+"filesystemUnreserved"
 "filesystemCapacity"
 "reinterpretAsDate"
 "filesystemAvailable"
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index a190126a8ff..95d405266ae 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -3332,7 +3332,7 @@ class ClickHouseInstance:
         except Exception as e:
             logging.warning(f"Stop ClickHouse raised an error {e}")
 
-    def start_clickhouse(self, start_wait_sec=60):
+    def start_clickhouse(self, start_wait_sec=60, retry_start=True):
         if not self.stay_alive:
             raise Exception(
                 "ClickHouse can be started again only with stay_alive=True instance"
@@ -3364,6 +3364,8 @@ class ClickHouseInstance:
                     self.exec_in_container(
                         ["bash", "-c", f"kill -9 {pid}"], user="root", nothrow=True
                     )
+                    if not retry_start:
+                        raise
                     time.sleep(time_to_sleep)
 
         raise Exception("Cannot start ClickHouse, see additional info in logs")
diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index 681407e5e8c..3b909194b63 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -39,3 +39,15 @@ def wait_until_quorum_lost(cluster, node, port=9181):
 def wait_nodes(cluster, nodes):
     for node in nodes:
         wait_until_connected(cluster, node)
+
+
+def is_leader(cluster, node, port=9181):
+    stat = send_4lw_cmd(cluster, node, "stat", port)
+    return "Mode: leader" in stat
+
+
+def get_leader(cluster, nodes):
+    for node in nodes:
+        if is_leader(cluster, node):
+            return node
+    raise Exception("No leader in Keeper cluster.")
diff --git a/tests/integration/helpers/wait_for_helpers.py b/tests/integration/helpers/wait_for_helpers.py
new file mode 100644
index 00000000000..b041de99282
--- /dev/null
+++ b/tests/integration/helpers/wait_for_helpers.py
@@ -0,0 +1,30 @@
+import time
+from helpers.test_tools import assert_eq_with_retry
+
+
+def _parse_table_database(table, database):
+    if database is not None:
+        return table, database
+
+    if "." in table:
+        return reversed(table.split(".", 1))
+
+    return table, "default"
+
+
+def wait_for_delete_inactive_parts(node, table, database=None, **kwargs):
+    table, database = _parse_table_database(table, database)
+    inactive_parts_query = (
+        f"SELECT count() FROM system.parts "
+        f"WHERE not active AND table = '{table}' AND database = '{database}';"
+    )
+    assert_eq_with_retry(node, inactive_parts_query, "0\n", **kwargs)
+
+
+def wait_for_delete_empty_parts(node, table, database=None, **kwargs):
+    table, database = _parse_table_database(table, database)
+    empty_parts_query = (
+        f"SELECT count() FROM system.parts "
+        f"WHERE active AND rows = 0 AND table = '{table}' AND database = '{database}'"
+    )
+    assert_eq_with_retry(node, empty_parts_query, "0\n", **kwargs)
diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py
index f544a0c6e0a..ecda9e73216 100644
--- a/tests/integration/test_attach_backup_from_s3_plain/test.py
+++ b/tests/integration/test_attach_backup_from_s3_plain/test.py
@@ -30,9 +30,7 @@ def start_cluster():
         pytest.param("wide", "backup_wide", "s3_backup_wide", int(0), id="wide"),
     ],
 )
-def test_attach_compact_part(
-    table_name, backup_name, storage_policy, min_bytes_for_wide_part
-):
+def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide_part):
     node.query(
         f"""
     -- Catch any errors (NOTE: warnings are ok)
@@ -61,9 +59,6 @@ def test_attach_compact_part(
 
     node.query(
         f"""
-    -- NOTE: be aware not to DROP the table, but DETACH first to keep it in S3.
-    detach table ordinary_db.{table_name};
-
     -- NOTE: DROP DATABASE cannot be done w/o this due to metadata leftovers
     set force_remove_data_recursively_on_drop=1;
     drop database ordinary_db sync;
diff --git a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml
index 04d34327fef..c823dd02d5a 100644
--- a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml
+++ b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml
@@ -1,5 +1,7 @@
 <clickhouse>
     <merge_tree>
         <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+        <compress_marks>0</compress_marks>
+        <compress_primary_key>0</compress_primary_key>
     </merge_tree>    
 </clickhouse>
diff --git a/tests/integration/test_backward_compatibility/test_aggregate_function_state.py b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py
new file mode 100644
index 00000000000..1f6d405603a
--- /dev/null
+++ b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py
@@ -0,0 +1,228 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance(
+    "node1",
+    with_zookeeper=False,
+    image="yandex/clickhouse-server",
+    tag="19.16.9.37",
+    stay_alive=True,
+    with_installed_binary=True,
+)
+node2 = cluster.add_instance(
+    "node2",
+    with_zookeeper=False,
+    image="yandex/clickhouse-server",
+    tag="19.16.9.37",
+    stay_alive=True,
+    with_installed_binary=True,
+)
+node3 = cluster.add_instance("node3", with_zookeeper=False)
+node4 = cluster.add_instance("node4", with_zookeeper=False)
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+# We will test that serialization of internal state of "avg" function is compatible between different versions.
+# TODO Implement versioning of serialization format for aggregate function states.
+# NOTE This test is too ad-hoc.
+
+
+def test_backward_compatability_for_avg(start_cluster):
+    node1.query("create table tab (x UInt64) engine = Memory")
+    node2.query("create table tab (x UInt64) engine = Memory")
+    node3.query("create table tab (x UInt64) engine = Memory")
+    node4.query("create table tab (x UInt64) engine = Memory")
+
+    node1.query("INSERT INTO tab VALUES (1)")
+    node2.query("INSERT INTO tab VALUES (2)")
+    node3.query("INSERT INTO tab VALUES (3)")
+    node4.query("INSERT INTO tab VALUES (4)")
+
+    assert (
+        node1.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
+    )
+    assert (
+        node2.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
+    )
+    assert (
+        node3.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
+    )
+    assert (
+        node4.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
+    )
+
+    # Also check with persisted aggregate function state
+
+    node1.query("create table state (x AggregateFunction(avg, UInt64)) engine = Log")
+    node1.query(
+        "INSERT INTO state SELECT avgState(arrayJoin(CAST([1, 2, 3, 4] AS Array(UInt64))))"
+    )
+
+    assert node1.query("SELECT avgMerge(x) FROM state") == "2.5\n"
+
+    node1.restart_with_latest_version(fix_metadata=True)
+
+    assert node1.query("SELECT avgMerge(x) FROM state") == "2.5\n"
+
+    node1.query("drop table tab")
+    node1.query("drop table state")
+    node2.query("drop table tab")
+    node3.query("drop table tab")
+    node4.query("drop table tab")
+
+
+@pytest.mark.parametrize("uniq_keys", [1000, 500000])
+def test_backward_compatability_for_uniq_exact(start_cluster, uniq_keys):
+    node1.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64) Engine = Memory")
+    node2.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64) Engine = Memory")
+    node3.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64) Engine = Memory")
+    node4.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64) Engine = Memory")
+
+    node1.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number FROM numbers_mt(0, {uniq_keys})"
+    )
+    node2.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number FROM numbers_mt(1, {uniq_keys})"
+    )
+    node3.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number FROM numbers_mt(2, {uniq_keys})"
+    )
+    node4.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number FROM numbers_mt(3, {uniq_keys})"
+    )
+
+    assert (
+        node1.query(
+            f"SELECT uniqExact(x) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+    assert (
+        node2.query(
+            f"SELECT uniqExact(x) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+    assert (
+        node3.query(
+            f"SELECT uniqExact(x) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+    assert (
+        node4.query(
+            f"SELECT uniqExact(x) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+
+    # Also check with persisted aggregate function state
+
+    node1.query(
+        f"CREATE TABLE state_{uniq_keys} (x AggregateFunction(uniqExact, UInt64)) Engine = Log"
+    )
+    node1.query(
+        f"INSERT INTO state_{uniq_keys} SELECT uniqExactState(number) FROM numbers_mt({uniq_keys})"
+    )
+
+    assert (
+        node1.query(f"SELECT uniqExactMerge(x) FROM state_{uniq_keys}")
+        == f"{uniq_keys}\n"
+    )
+
+    node1.restart_with_latest_version()
+
+    assert (
+        node1.query(f"SELECT uniqExactMerge(x) FROM state_{uniq_keys}")
+        == f"{uniq_keys}\n"
+    )
+
+    node1.query(f"DROP TABLE state_{uniq_keys}")
+    node1.query(f"DROP TABLE tab_{uniq_keys}")
+    node2.query(f"DROP TABLE tab_{uniq_keys}")
+    node3.query(f"DROP TABLE tab_{uniq_keys}")
+    node4.query(f"DROP TABLE tab_{uniq_keys}")
+
+
+@pytest.mark.parametrize("uniq_keys", [1000, 500000])
+def test_backward_compatability_for_uniq_exact_variadic(start_cluster, uniq_keys):
+    node1.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64, y UInt64) Engine = Memory")
+    node2.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64, y UInt64) Engine = Memory")
+    node3.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64, y UInt64) Engine = Memory")
+    node4.query(f"CREATE TABLE tab_{uniq_keys} (x UInt64, y UInt64) Engine = Memory")
+
+    node1.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number, number/2 FROM numbers_mt(0, {uniq_keys})"
+    )
+    node2.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number, number/2 FROM numbers_mt(1, {uniq_keys})"
+    )
+    node3.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number, number/2 FROM numbers_mt(2, {uniq_keys})"
+    )
+    node4.query(
+        f"INSERT INTO tab_{uniq_keys} SELECT number, number/2 FROM numbers_mt(3, {uniq_keys})"
+    )
+
+    assert (
+        node1.query(
+            f"SELECT uniqExact(x, y) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+    assert (
+        node2.query(
+            f"SELECT uniqExact(x, y) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+    assert (
+        node3.query(
+            f"SELECT uniqExact(x, y) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+    assert (
+        node4.query(
+            f"SELECT uniqExact(x, y) FROM remote('node{{1..4}}', default, tab_{uniq_keys})"
+        )
+        == f"{uniq_keys + 3}\n"
+    )
+
+    # Also check with persisted aggregate function state
+
+    node1.query(
+        f"CREATE TABLE state_{uniq_keys} (x AggregateFunction(uniqExact, UInt64, UInt64)) Engine = Log"
+    )
+    node1.query(
+        f"INSERT INTO state_{uniq_keys} SELECT uniqExactState(number, intDiv(number,2)) FROM numbers_mt({uniq_keys})"
+    )
+
+    assert (
+        node1.query(f"SELECT uniqExactMerge(x) FROM state_{uniq_keys}")
+        == f"{uniq_keys}\n"
+    )
+
+    node1.restart_with_latest_version()
+
+    assert (
+        node1.query(f"SELECT uniqExactMerge(x) FROM state_{uniq_keys}")
+        == f"{uniq_keys}\n"
+    )
+
+    node1.query(f"DROP TABLE state_{uniq_keys}")
+    node1.query(f"DROP TABLE tab_{uniq_keys}")
+    node2.query(f"DROP TABLE tab_{uniq_keys}")
+    node3.query(f"DROP TABLE tab_{uniq_keys}")
+    node4.query(f"DROP TABLE tab_{uniq_keys}")
diff --git a/tests/integration/test_backward_compatibility/test_aggregate_function_state_avg.py b/tests/integration/test_backward_compatibility/test_aggregate_function_state_avg.py
deleted file mode 100644
index 1e54e6220d7..00000000000
--- a/tests/integration/test_backward_compatibility/test_aggregate_function_state_avg.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import pytest
-
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-node1 = cluster.add_instance(
-    "node1",
-    with_zookeeper=False,
-    image="yandex/clickhouse-server",
-    tag="19.16.9.37",
-    stay_alive=True,
-    with_installed_binary=True,
-)
-node2 = cluster.add_instance(
-    "node2",
-    with_zookeeper=False,
-    image="yandex/clickhouse-server",
-    tag="19.16.9.37",
-    stay_alive=True,
-    with_installed_binary=True,
-)
-node3 = cluster.add_instance("node3", with_zookeeper=False)
-node4 = cluster.add_instance("node4", with_zookeeper=False)
-
-
-@pytest.fixture(scope="module")
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-# We will test that serialization of internal state of "avg" function is compatible between different versions.
-# TODO Implement versioning of serialization format for aggregate function states.
-# NOTE This test is too ad-hoc.
-
-
-def test_backward_compatability(start_cluster):
-    node1.query("create table tab (x UInt64) engine = Memory")
-    node2.query("create table tab (x UInt64) engine = Memory")
-    node3.query("create table tab (x UInt64) engine = Memory")
-    node4.query("create table tab (x UInt64) engine = Memory")
-
-    node1.query("INSERT INTO tab VALUES (1)")
-    node2.query("INSERT INTO tab VALUES (2)")
-    node3.query("INSERT INTO tab VALUES (3)")
-    node4.query("INSERT INTO tab VALUES (4)")
-
-    assert (
-        node1.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
-    )
-    assert (
-        node2.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
-    )
-    assert (
-        node3.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
-    )
-    assert (
-        node4.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
-    )
-
-    # Also check with persisted aggregate function state
-
-    node1.query("create table state (x AggregateFunction(avg, UInt64)) engine = Log")
-    node1.query(
-        "INSERT INTO state SELECT avgState(arrayJoin(CAST([1, 2, 3, 4] AS Array(UInt64))))"
-    )
-
-    assert node1.query("SELECT avgMerge(x) FROM state") == "2.5\n"
-
-    node1.restart_with_latest_version(fix_metadata=True)
-
-    assert node1.query("SELECT avgMerge(x) FROM state") == "2.5\n"
-
-    node1.query("drop table tab")
-    node1.query("drop table state")
-    node2.query("drop table tab")
-    node3.query("drop table tab")
-    node4.query("drop table tab")
diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py
index fe1c0ea7108..afb19901e74 100644
--- a/tests/integration/test_backward_compatibility/test_functions.py
+++ b/tests/integration/test_backward_compatibility/test_functions.py
@@ -13,7 +13,11 @@ upstream = cluster.add_instance("upstream")
 backward = cluster.add_instance(
     "backward",
     image="clickhouse/clickhouse-server",
-    tag="22.9",
+    # Note that a bug changed the string representation of several aggregations in 22.9 and 22.10 and some minor
+    # releases of 22.8, 22.7 and 22.3
+    # See https://github.com/ClickHouse/ClickHouse/issues/42916
+    # Affected at least: singleValueOrNull, last_value, min, max, any, anyLast, anyHeavy, first_value, argMin, argMax
+    tag="22.6",
     with_installed_binary=True,
 )
 
@@ -139,6 +143,9 @@ def test_string_functions(start_cluster):
         "substring",
         "CAST",
         # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument
+        # 22.8 Backward Incompatible Change: Extended range of Date32
+        "toDate32OrZero",
+        "toDate32OrDefault",
     ]
     functions = filter(lambda x: x not in excludes, functions)
 
@@ -149,14 +156,15 @@ def test_string_functions(start_cluster):
     failed = 0
     passed = 0
 
-    def get_function_value(node, function_name, value="foo"):
+    def get_function_value(node, function_name, value):
         return node.query(f"select {function_name}('{value}')").strip()
 
+    v = "foo"
     for function in functions:
-        logging.info("Checking %s", function)
+        logging.info("Checking %s('%s')", function, v)
 
         try:
-            backward_value = get_function_value(backward, function)
+            backward_value = get_function_value(backward, function, v)
         except QueryRuntimeException as e:
             error_message = str(e)
             allowed_errors = [
@@ -199,11 +207,12 @@ def test_string_functions(start_cluster):
             failed += 1
             continue
 
-        upstream_value = get_function_value(upstream, function)
+        upstream_value = get_function_value(upstream, function, v)
         if upstream_value != backward_value:
-            logging.info(
-                "Failed %s, %s (backward) != %s (upstream)",
+            logging.warning(
+                "Failed %s('%s') %s (backward) != %s (upstream)",
                 function,
+                v,
                 backward_value,
                 upstream_value,
             )
diff --git a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py
new file mode 100644
index 00000000000..94c788f8f91
--- /dev/null
+++ b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py
@@ -0,0 +1,85 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance(
+    "node1",
+    with_zookeeper=False,
+    image="yandex/clickhouse-server",
+    tag="21.1",
+    stay_alive=True,
+    with_installed_binary=True,
+)
+node2 = cluster.add_instance(
+    "node2",
+    with_zookeeper=False,
+    image="yandex/clickhouse-server",
+    tag="21.1",
+    stay_alive=True,
+    with_installed_binary=True,
+)
+node3 = cluster.add_instance("node3", with_zookeeper=False)
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_backward_compatability(start_cluster):
+    node1.query("create table t (a UInt64) engine = MergeTree order by a")
+    node2.query("create table t (a UInt64) engine = MergeTree order by a")
+    node3.query("create table t (a UInt64) engine = MergeTree order by a")
+
+    node1.query("insert into t select number % 100000 from numbers_mt(1000000)")
+    node2.query("insert into t select number % 100000 from numbers_mt(1000000)")
+    node3.query("insert into t select number % 100000 from numbers_mt(1000000)")
+
+    assert (
+        node1.query(
+            """
+            select count()
+            from remote('node{1,2,3}', default, t)
+            group by a
+            limit 1 offset 12345
+            settings optimize_aggregation_in_order = 1
+        """
+        )
+        == "30\n"
+    )
+
+    assert (
+        node2.query(
+            """
+            select count()
+            from remote('node{1,2,3}', default, t)
+            group by a
+            limit 1 offset 12345
+            settings optimize_aggregation_in_order = 1
+        """
+        )
+        == "30\n"
+    )
+
+    assert (
+        node3.query(
+            """
+            select count()
+            from remote('node{1,2,3}', default, t)
+            group by a
+            limit 1 offset 12345
+            settings optimize_aggregation_in_order = 1
+        """
+        )
+        == "30\n"
+    )
+
+    node1.query("drop table t")
+    node2.query("drop table t")
+    node3.query("drop table t")
diff --git a/tests/integration/test_detached_parts_metrics/test.py b/tests/integration/test_detached_parts_metrics/test.py
index 62b70ebd430..fb312f8d224 100644
--- a/tests/integration/test_detached_parts_metrics/test.py
+++ b/tests/integration/test_detached_parts_metrics/test.py
@@ -2,6 +2,8 @@ import time
 import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import assert_eq_with_retry
+from helpers.wait_for_helpers import wait_for_delete_inactive_parts
+from helpers.wait_for_helpers import wait_for_delete_empty_parts
 
 
 cluster = ClickHouseCluster(__file__)
@@ -20,7 +22,7 @@ def started_cluster():
         cluster.shutdown()
 
 
-def test_event_time_microseconds_field(started_cluster):
+def test_numbers_of_detached_parts(started_cluster):
     cluster.start()
     query_create = """
     CREATE TABLE t
@@ -68,6 +70,7 @@ def test_event_time_microseconds_field(started_cluster):
 
     # detach some parts and wait until asynchronous metrics notice it
     node1.query("ALTER TABLE t DETACH PARTITION '20220901';")
+    wait_for_delete_empty_parts(node1, "t")
 
     assert 2 == int(node1.query(query_count_detached_parts))
     assert 1 == int(node1.query(query_count_active_parts))
@@ -81,6 +84,7 @@ def test_event_time_microseconds_field(started_cluster):
 
     # detach the rest parts and wait until asynchronous metrics notice it
     node1.query("ALTER TABLE t DETACH PARTITION ALL")
+    wait_for_delete_empty_parts(node1, "t")
 
     assert 3 == int(node1.query(query_count_detached_parts))
     assert 0 == int(node1.query(query_count_active_parts))
diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py
index 2ccc17db4f4..363df4595b2 100644
--- a/tests/integration/test_disk_over_web_server/test.py
+++ b/tests/integration/test_disk_over_web_server/test.py
@@ -13,7 +13,10 @@ def cluster():
             "node1", main_configs=["configs/storage_conf.xml"], with_nginx=True
         )
         cluster.add_instance(
-            "node2", main_configs=["configs/storage_conf_web.xml"], with_nginx=True
+            "node2",
+            main_configs=["configs/storage_conf_web.xml"],
+            with_nginx=True,
+            stay_alive=True,
         )
         cluster.add_instance(
             "node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True
@@ -192,3 +195,53 @@ def test_cache(cluster, node_name):
 
         node2.query("DROP TABLE test{} SYNC".format(i))
         print(f"Ok {i}")
+
+
+def test_unavailable_server(cluster):
+    """
+    Regression test for the case when clickhouse-server simply ignore when
+    server is unavailable on start and later will simply return 0 rows for
+    SELECT from table on web disk.
+    """
+    node2 = cluster.instances["node2"]
+    global uuids
+    node2.query(
+        """
+        ATTACH TABLE test0 UUID '{}'
+        (id Int32) ENGINE = MergeTree() ORDER BY id
+        SETTINGS storage_policy = 'web';
+    """.format(
+            uuids[0]
+        )
+    )
+    node2.stop_clickhouse()
+    try:
+        # NOTE: you cannot use separate disk instead, since MergeTree engine will
+        # try to lookup parts on all disks (to look unexpected disks with parts)
+        # and fail because of unavailable server.
+        node2.exec_in_container(
+            [
+                "bash",
+                "-c",
+                "sed -i 's#http://nginx:80/test1/#http://nginx:8080/test1/#' /etc/clickhouse-server/config.d/storage_conf_web.xml",
+            ]
+        )
+        with pytest.raises(Exception):
+            # HTTP retries with backup can take awhile
+            node2.start_clickhouse(start_wait_sec=120, retry_start=False)
+        assert node2.contains_in_log(
+            "Caught exception while loading metadata.*Connection refused"
+        )
+        assert node2.contains_in_log(
+            "HTTP request to \`http://nginx:8080/test1/.*\` failed at try 1/10 with bytes read: 0/unknown. Error: Connection refused."
+        )
+    finally:
+        node2.exec_in_container(
+            [
+                "bash",
+                "-c",
+                "sed -i 's#http://nginx:8080/test1/#http://nginx:80/test1/#' /etc/clickhouse-server/config.d/storage_conf_web.xml",
+            ]
+        )
+        node2.start_clickhouse()
+        node2.query("DROP TABLE test0 SYNC")
diff --git a/tests/integration/test_disk_types/configs/storage.xml b/tests/integration/test_disk_types/configs/storage.xml
index c55d589d19e..3c8f16a5226 100644
--- a/tests/integration/test_disk_types/configs/storage.xml
+++ b/tests/integration/test_disk_types/configs/storage.xml
@@ -12,7 +12,7 @@
             </disk_memory>
             <disk_hdfs>
                 <type>hdfs</type>
-                <endpoint>hdfs://hdfs1:9000/data/</endpoint>
+                <endpoint>hdfs://hdfs1:9000/</endpoint>
             </disk_hdfs>
             <disk_encrypted>
                 <type>encrypted</type>
diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py
index 099821bf494..683caf3497b 100644
--- a/tests/integration/test_disk_types/test.py
+++ b/tests/integration/test_disk_types/test.py
@@ -22,6 +22,7 @@ def cluster():
             with_hdfs=True,
         )
         cluster.start()
+
         yield cluster
     finally:
         cluster.shutdown()
diff --git a/tests/integration/test_mask_sensitive_info_in_logs/__init__.py b/tests/integration/test_drop_is_lock_free/__init__.py
similarity index 100%
rename from tests/integration/test_mask_sensitive_info_in_logs/__init__.py
rename to tests/integration/test_drop_is_lock_free/__init__.py
diff --git a/tests/integration/test_drop_is_lock_free/configs/keeper.xml b/tests/integration/test_drop_is_lock_free/configs/keeper.xml
new file mode 100644
index 00000000000..f4fde78cc97
--- /dev/null
+++ b/tests/integration/test_drop_is_lock_free/configs/keeper.xml
@@ -0,0 +1,30 @@
+<clickhouse>
+     <zookeeper>
+        <node index="1">
+            <host>localhost</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+
+    <keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>10000</operation_timeout_ms>
+            <session_timeout_ms>30000</session_timeout_ms>
+            <force_sync>false</force_sync>
+            <startup_timeout>60000</startup_timeout>
+            <!-- we want all logs for complex problems investigation -->
+            <reserved_log_items>1000000000000000</reserved_log_items>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>9234</port>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_drop_is_lock_free/configs/transactions.xml b/tests/integration/test_drop_is_lock_free/configs/transactions.xml
new file mode 100644
index 00000000000..a8d3e8fbf6d
--- /dev/null
+++ b/tests/integration/test_drop_is_lock_free/configs/transactions.xml
@@ -0,0 +1,14 @@
+<clickhouse>
+    <allow_experimental_transactions>42</allow_experimental_transactions>
+
+    <merge_tree>
+        <old_parts_lifetime>100500</old_parts_lifetime>
+        <remove_rolled_back_parts_immediately>0</remove_rolled_back_parts_immediately>
+    </merge_tree>
+
+    <transactions_info_log>
+        <database>system</database>
+        <table>transactions_info_log</table>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+    </transactions_info_log>
+</clickhouse>
diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py
new file mode 100644
index 00000000000..8d92d784226
--- /dev/null
+++ b/tests/integration/test_drop_is_lock_free/test.py
@@ -0,0 +1,222 @@
+import time
+import pytest
+import logging
+from contextlib import contextmanager
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import assert_eq_with_retry
+
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance(
+    "node",
+    stay_alive=True,
+    with_zookeeper=False,
+    main_configs=[
+        "configs/keeper.xml",
+        "configs/transactions.xml",
+    ],
+)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+@pytest.fixture(scope="function")
+def test_name(request):
+    return request.node.name
+
+
+@pytest.fixture(scope="function")
+def exclusive_table(test_name):
+    normalized = (
+        test_name.replace("[", "_")
+        .replace("]", "_")
+        .replace(" ", "_")
+        .replace("-", "_")
+    )
+    return "table_" + normalized
+
+
+def get_event_select_count():
+    return int(
+        node.query(
+            """
+                SELECT value FROM system.events WHERE event = 'SelectQuery';
+            """
+        )
+    )
+
+
+def get_query_processes_count(query_id):
+    q = f"""
+            SELECT count() FROM system.processes WHERE query_id = '{query_id}';
+        """
+    return q
+
+
+def is_query_running(query_id):
+    return 1 == int(node.query(get_query_processes_count(query_id)))
+
+
+def wait_select_start(query_id):
+    assert_eq_with_retry(
+        node,
+        get_query_processes_count(query_id),
+        "1\n",
+    )
+
+
+LOCK_FREE_QUERIES = {
+    "detach table": "DETACH TABLE {table};",
+    "drop part": "ALTER TABLE {table} DROP PART 'all_1_1_0';",
+    "detach part": "ALTER TABLE {table} DETACH PART 'all_1_1_0';",
+    "truncate": "TRUNCATE TABLE {table};",
+}
+
+
+@pytest.mark.parametrize(
+    "lock_free_query", LOCK_FREE_QUERIES.values(), ids=LOCK_FREE_QUERIES.keys()
+)
+def test_query_is_lock_free(lock_free_query, exclusive_table):
+    node.query(
+        f"""
+            CREATE TABLE {exclusive_table}
+            (a Int64)
+            Engine=MergeTree ORDER BY a;
+        """
+    )
+    node.query(
+        f"""
+            INSERT INTO {exclusive_table} SELECT number FROM numbers(50);
+        """
+    )
+
+    query_id = "select-" + exclusive_table
+
+    select_handler = node.get_query_request(
+        f"""
+            SELECT sleepEachRow(3) FROM {exclusive_table};
+        """,
+        query_id=query_id,
+    )
+    wait_select_start(query_id)
+
+    for _ in [1, 2, 3, 4, 5]:
+        assert is_query_running(query_id)
+        assert select_handler.process.poll() is None
+        time.sleep(1)
+
+    node.query(lock_free_query.format(table=exclusive_table))
+
+    assert is_query_running(query_id)
+
+    if "DETACH TABLE" in lock_free_query:
+        result = node.query_and_get_error(
+            f"""
+                SELECT count() FROM {exclusive_table};
+            """
+        )
+        assert f"Table default.{exclusive_table} doesn't exist" in result
+    else:
+        assert 0 == int(
+            node.query(
+                f"""
+                        SELECT count() FROM {exclusive_table};
+                    """
+            )
+        )
+
+
+PERMANENT_QUERIES = {
+    "truncate": ("TRUNCATE TABLE {table};", 0),
+    "detach-partition-all": ("ALTER TABLE {table} DETACH PARTITION ALL;", 0),
+    "detach-part": ("ALTER TABLE {table} DETACH PARTITION '20221001';", 49),
+    "drop-part": ("ALTER TABLE {table} DROP PART '20220901_1_1_0';", 49),
+}
+
+
+@pytest.mark.parametrize(
+    "transaction", ["NoTx", "TxCommit", "TxRollback", "TxNotFinished"]
+)
+@pytest.mark.parametrize(
+    "permanent", PERMANENT_QUERIES.values(), ids=PERMANENT_QUERIES.keys()
+)
+def test_query_is_permanent(transaction, permanent, exclusive_table):
+    node.query(
+        f"""
+            CREATE TABLE {exclusive_table}
+            (
+                a Int64,
+                date Date
+            )
+            Engine=MergeTree
+            PARTITION BY date
+            ORDER BY a;
+        """
+    )
+    node.query(
+        f"""
+            INSERT INTO {exclusive_table} SELECT number, toDate('2022-09-01') + INTERVAL number DAY FROM numbers(50);
+        """
+    )
+
+    query_id = "select-" + exclusive_table
+
+    select_handler = node.get_query_request(
+        f"""
+            SELECT sleepEachRow(3) FROM {exclusive_table};
+        """,
+        query_id=query_id,
+    )
+    wait_select_start(query_id)
+
+    for _ in [1, 2, 3, 4, 5]:
+        assert is_query_running(query_id)
+        assert select_handler.process.poll() is None
+        time.sleep(1)
+
+    permanent_query = permanent[0]
+    result = permanent[1]
+    statement = permanent_query.format(table=exclusive_table)
+    if transaction == "TxCommit":
+        query = f"""
+            BEGIN TRANSACTION;
+            {statement}
+            COMMIT;
+            """
+    elif transaction == "TxRollback":
+        query = f"""
+            BEGIN TRANSACTION;
+            {statement}
+            ROLLBACK;
+            """
+        result = 50
+    elif transaction == "TxNotFinished":
+        query = f"""
+            BEGIN TRANSACTION;
+            {statement}
+            """
+        result = 50
+    else:
+        query = statement
+
+    node.query(query)
+
+    node.restart_clickhouse(kill=True)
+
+    assert result == int(
+        node.query(
+            f"""
+                SELECT count() FROM {exclusive_table};
+            """
+        )
+    )
diff --git a/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml b/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml
index 6f83a570ccc..a51009542a3 100644
--- a/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml
+++ b/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml
@@ -1,3 +1,4 @@
 <clickhouse>
     <max_server_memory_usage>2000000000</max_server_memory_usage>
+    <allow_use_jemalloc_memory>false</allow_use_jemalloc_memory>
 </clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py
index a498320ed5b..791ae03f9f6 100644
--- a/tests/integration/test_hive_query/test.py
+++ b/tests/integration/test_hive_query/test.py
@@ -1,8 +1,14 @@
+import pytest
+
+# FIXME This test is too flaky
+# https://github.com/ClickHouse/ClickHouse/issues/43541
+
+pytestmark = pytest.mark.skip
+
 import logging
 import os
 
 import time
-import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
 
diff --git a/tests/integration/test_keeper_four_word_command/configs/enable_keeper1.xml b/tests/integration/test_keeper_four_word_command/configs/enable_keeper1.xml
index 095bb8a9530..a686c96e426 100644
--- a/tests/integration/test_keeper_four_word_command/configs/enable_keeper1.xml
+++ b/tests/integration/test_keeper_four_word_command/configs/enable_keeper1.xml
@@ -33,7 +33,7 @@
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>9234</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
                 <start_as_follower>true</start_as_follower>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_keeper_four_word_command/configs/enable_keeper2.xml b/tests/integration/test_keeper_four_word_command/configs/enable_keeper2.xml
index 33ca15c227d..9818d32a74a 100644
--- a/tests/integration/test_keeper_four_word_command/configs/enable_keeper2.xml
+++ b/tests/integration/test_keeper_four_word_command/configs/enable_keeper2.xml
@@ -33,7 +33,7 @@
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>9234</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
                 <start_as_follower>true</start_as_follower>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_keeper_four_word_command/configs/enable_keeper3.xml b/tests/integration/test_keeper_four_word_command/configs/enable_keeper3.xml
index 2a3f0b3c279..5a883fac3f6 100644
--- a/tests/integration/test_keeper_four_word_command/configs/enable_keeper3.xml
+++ b/tests/integration/test_keeper_four_word_command/configs/enable_keeper3.xml
@@ -33,7 +33,7 @@
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>9234</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
                 <start_as_follower>true</start_as_follower>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index c425c18158b..bc6e227e861 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -148,10 +148,11 @@ def test_cmd_mntr(started_cluster):
         wait_nodes()
         clear_znodes()
 
+        leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
         # reset stat first
-        reset_node_stats(node1)
+        reset_node_stats(leader)
 
-        zk = get_fake_zk(node1.name, timeout=30.0)
+        zk = get_fake_zk(leader.name, timeout=30.0)
         do_some_action(
             zk,
             create_cnt=10,
@@ -162,7 +163,7 @@ def test_cmd_mntr(started_cluster):
             delete_cnt=2,
         )
 
-        data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr")
+        data = keeper_utils.send_4lw_cmd(cluster, leader, cmd="mntr")
 
         # print(data.decode())
         reader = csv.reader(data.split("\n"), delimiter="\t")
@@ -307,12 +308,13 @@ def test_cmd_srvr(started_cluster):
         wait_nodes()
         clear_znodes()
 
-        reset_node_stats(node1)
+        leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
+        reset_node_stats(leader)
 
-        zk = get_fake_zk(node1.name, timeout=30.0)
+        zk = get_fake_zk(leader.name, timeout=30.0)
         do_some_action(zk, create_cnt=10)
 
-        data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr")
+        data = keeper_utils.send_4lw_cmd(cluster, leader, cmd="srvr")
 
         print("srvr output -------------------------------------")
         print(data)
@@ -329,7 +331,7 @@ def test_cmd_srvr(started_cluster):
         assert result["Received"] == "10"
         assert result["Sent"] == "10"
         assert int(result["Connections"]) == 1
-        assert int(result["Zxid"]) > 14
+        assert int(result["Zxid"]) > 10
         assert result["Mode"] == "leader"
         assert result["Node count"] == "13"
 
@@ -342,13 +344,15 @@ def test_cmd_stat(started_cluster):
     try:
         wait_nodes()
         clear_znodes()
-        reset_node_stats(node1)
-        reset_conn_stats(node1)
 
-        zk = get_fake_zk(node1.name, timeout=30.0)
+        leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
+        reset_node_stats(leader)
+        reset_conn_stats(leader)
+
+        zk = get_fake_zk(leader.name, timeout=30.0)
         do_some_action(zk, create_cnt=10)
 
-        data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat")
+        data = keeper_utils.send_4lw_cmd(cluster, leader, cmd="stat")
 
         print("stat output -------------------------------------")
         print(data)
@@ -604,6 +608,10 @@ def test_cmd_csnp(started_cluster):
         wait_nodes()
         zk = get_fake_zk(node1.name, timeout=30.0)
         data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="csnp")
+
+        print("csnp output -------------------------------------")
+        print(data)
+
         try:
             int(data)
             assert True
@@ -623,7 +631,10 @@ def test_cmd_lgif(started_cluster):
         do_some_action(zk, create_cnt=100)
 
         data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="lgif")
+
+        print("lgif output -------------------------------------")
         print(data)
+
         reader = csv.reader(data.split("\n"), delimiter="\t")
         result = {}
 
@@ -641,3 +652,28 @@ def test_cmd_lgif(started_cluster):
         assert int(result["last_snapshot_idx"]) >= 1
     finally:
         destroy_zk_client(zk)
+
+
+def test_cmd_rqld(started_cluster):
+    wait_nodes()
+    # node2 can not be leader
+    for node in [node1, node3]:
+        data = keeper_utils.send_4lw_cmd(cluster, node, cmd="rqld")
+        assert data == "Sent leadership request to leader."
+
+        print("rqld output -------------------------------------")
+        print(data)
+
+        if not keeper_utils.is_leader(cluster, node):
+            # pull wait to become leader
+            retry = 0
+            # TODO not a restrict way
+            while not keeper_utils.is_leader(cluster, node) and retry < 30:
+                time.sleep(1)
+                retry += 1
+            if retry == 30:
+                print(
+                    node.name
+                    + " does not become leader after 30s, maybe there is something wrong."
+                )
+        assert keeper_utils.is_leader(cluster, node)
diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py
index 8f515077e8f..71f6343101a 100644
--- a/tests/integration/test_keeper_map/test.py
+++ b/tests/integration/test_keeper_map/test.py
@@ -5,7 +5,7 @@ import random
 from itertools import count
 from sys import stdout
 
-from multiprocessing import Pool
+from multiprocessing.dummy import Pool
 
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import assert_eq_with_retry, assert_logs_contain
diff --git a/tests/integration/test_log_family_hdfs/configs/storage_conf.xml b/tests/integration/test_log_family_hdfs/configs/storage_conf.xml
index 82cea6730ff..74270320508 100644
--- a/tests/integration/test_log_family_hdfs/configs/storage_conf.xml
+++ b/tests/integration/test_log_family_hdfs/configs/storage_conf.xml
@@ -4,6 +4,8 @@
             <hdfs>
                 <type>hdfs</type>
                 <endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
+                <!-- FIXME: chicken and egg problem with current cluster.py -->
+                <skip_access_check>true</skip_access_check>
             </hdfs>
         </disks>
     </storage_configuration>
diff --git a/tests/integration/test_mask_sensitive_info/__init__.py b/tests/integration/test_mask_sensitive_info/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_mask_sensitive_info_in_logs/test.py b/tests/integration/test_mask_sensitive_info/test.py
similarity index 88%
rename from tests/integration/test_mask_sensitive_info_in_logs/test.py
rename to tests/integration/test_mask_sensitive_info/test.py
index 48f11fbf7a1..f546c559f66 100644
--- a/tests/integration/test_mask_sensitive_info_in_logs/test.py
+++ b/tests/integration/test_mask_sensitive_info/test.py
@@ -1,6 +1,7 @@
 import pytest
 import random, string
 from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
 
 cluster = ClickHouseCluster(__file__)
 node = cluster.add_instance("node", with_zookeeper=True)
@@ -110,6 +111,22 @@ def test_create_table():
     for i, table_engine in enumerate(table_engines):
         node.query(f"CREATE TABLE table{i} (x int) ENGINE = {table_engine}")
 
+    assert (
+        node.query("SHOW CREATE TABLE table0")
+        == "CREATE TABLE default.table0\\n(\\n    `x` Int32\\n)\\nENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')\n"
+    )
+
+    assert node.query(
+        "SELECT create_table_query, engine_full FROM system.tables WHERE name = 'table0'"
+    ) == TSV(
+        [
+            [
+                "CREATE TABLE default.table0 (`x` Int32) ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')",
+                "MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')",
+            ],
+        ]
+    )
+
     check_logs(
         must_contain=[
             "CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
@@ -189,6 +206,22 @@ def test_table_functions():
     for i, table_function in enumerate(table_functions):
         node.query(f"CREATE TABLE tablefunc{i} (x int) AS {table_function}")
 
+    assert (
+        node.query("SHOW CREATE TABLE tablefunc0")
+        == "CREATE TABLE default.tablefunc0\\n(\\n    `x` Int32\\n) AS mysql(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')\n"
+    )
+
+    assert node.query(
+        "SELECT create_table_query, engine_full FROM system.tables WHERE name = 'tablefunc0'"
+    ) == TSV(
+        [
+            [
+                "CREATE TABLE default.tablefunc0 (`x` Int32) AS mysql(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')",
+                "",
+            ],
+        ]
+    )
+
     check_logs(
         must_contain=[
             "CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
@@ -268,6 +301,16 @@ def test_create_dictionary():
         f"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
     )
 
+    assert (
+        node.query("SHOW CREATE TABLE dict1")
+        == "CREATE DICTIONARY default.dict1\\n(\\n    `n` int DEFAULT 0,\\n    `m` int DEFAULT 1\\n)\\nPRIMARY KEY n\\nSOURCE(CLICKHOUSE(HOST \\'localhost\\' PORT 9000 USER \\'user1\\' TABLE \\'test\\' PASSWORD \\'[HIDDEN]\\' DB \\'default\\'))\\nLIFETIME(MIN 0 MAX 10)\\nLAYOUT(FLAT())\n"
+    )
+
+    assert (
+        node.query("SELECT create_table_query FROM system.tables WHERE name = 'dict1'")
+        == "CREATE DICTIONARY default.dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST \\'localhost\\' PORT 9000 USER \\'user1\\' TABLE \\'test\\' PASSWORD \\'[HIDDEN]\\' DB \\'default\\')) LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())\n"
+    )
+
     check_logs(
         must_contain=[
             "CREATE DICTIONARY dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n "
diff --git a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py
index bed7772a3dd..5b75b0dfc38 100644
--- a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py
+++ b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py
@@ -2184,3 +2184,44 @@ def savepoint(clickhouse_node, mysql_node, mysql_host):
     mysql_node.query(f"INSERT INTO {db}.t1 VALUES (2)")
     mysql_node.query("ROLLBACK TO savepoint_1")
     mysql_node.query("COMMIT")
+
+
+def dropddl(clickhouse_node, mysql_node, mysql_host):
+    db = "dropddl"
+    clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
+    mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
+    mysql_node.query(f"CREATE DATABASE {db}")
+    mysql_node.query(f"CREATE TABLE {db}.t1 (a INT PRIMARY KEY, b INT)")
+    mysql_node.query(f"CREATE TABLE {db}.t2 (a INT PRIMARY KEY, b INT)")
+    mysql_node.query(f"CREATE TABLE {db}.t3 (a INT PRIMARY KEY, b INT)")
+    mysql_node.query(f"CREATE TABLE {db}.t4 (a INT PRIMARY KEY, b INT)")
+    mysql_node.query(f"CREATE VIEW {db}.v1 AS SELECT * FROM {db}.t1")
+    mysql_node.query(f"INSERT INTO {db}.t1(a, b) VALUES(1, 1)")
+
+    clickhouse_node.query(
+        f"CREATE DATABASE {db} ENGINE = MaterializeMySQL('{mysql_host}:3306', '{db}', 'root', 'clickhouse')"
+    )
+    check_query(
+        clickhouse_node,
+        f"SELECT count() FROM system.tables where database = '{db}' FORMAT TSV",
+        "4\n",
+    )
+    check_query(clickhouse_node, f"SELECT * FROM {db}.t1 FORMAT TSV", "1\t1\n")
+    mysql_node.query(f"DROP EVENT IF EXISTS {db}.event_name")
+    mysql_node.query(f"DROP VIEW IF EXISTS {db}.view_name")
+    mysql_node.query(f"DROP FUNCTION IF EXISTS {db}.function_name")
+    mysql_node.query(f"DROP TRIGGER IF EXISTS {db}.trigger_name")
+    mysql_node.query(f"DROP INDEX `PRIMARY` ON {db}.t2")
+    mysql_node.query(f"DROP TABLE {db}.t3")
+    mysql_node.query(f"DROP TABLE if EXISTS {db}.t3,{db}.t4")
+    mysql_node.query(f"TRUNCATE TABLE {db}.t1")
+    mysql_node.query(f"INSERT INTO {db}.t2(a, b) VALUES(1, 1)")
+    check_query(clickhouse_node, f"SELECT * FROM {db}.t2 FORMAT TSV", "1\t1\n")
+    check_query(clickhouse_node, f"SELECT count() FROM {db}.t1 FORMAT TSV", "0\n")
+    check_query(
+        clickhouse_node,
+        f"SELECT name FROM system.tables where database = '{db}' FORMAT TSV",
+        "t1\nt2\n",
+    )
+    mysql_node.query(f"DROP DATABASE {db}")
+    clickhouse_node.query(f"DROP DATABASE {db}")
diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py
index 0e33c01a6c9..a22d73061ae 100644
--- a/tests/integration/test_materialized_mysql_database/test.py
+++ b/tests/integration/test_materialized_mysql_database/test.py
@@ -516,3 +516,10 @@ def test_savepoint_query(
 ):
     materialize_with_ddl.savepoint(clickhouse_node, started_mysql_8_0, "mysql80")
     materialize_with_ddl.savepoint(clickhouse_node, started_mysql_5_7, "mysql57")
+
+
+def test_materialized_database_mysql_drop_ddl(
+    started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node
+):
+    materialize_with_ddl.dropddl(clickhouse_node, started_mysql_8_0, "mysql80")
+    materialize_with_ddl.dropddl(clickhouse_node, started_mysql_5_7, "mysql57")
diff --git a/tests/integration/test_merge_tree_empty_parts/test.py b/tests/integration/test_merge_tree_empty_parts/test.py
index 57bf49e6803..0f611408a67 100644
--- a/tests/integration/test_merge_tree_empty_parts/test.py
+++ b/tests/integration/test_merge_tree_empty_parts/test.py
@@ -24,8 +24,10 @@ def started_cluster():
 
 def test_empty_parts_alter_delete(started_cluster):
     node1.query(
-        "CREATE TABLE empty_parts_delete (d Date, key UInt64, value String) \
-        ENGINE = ReplicatedMergeTree('/clickhouse/tables/empty_parts_delete', 'r1') PARTITION BY toYYYYMM(d) ORDER BY key"
+        "CREATE TABLE empty_parts_delete (d Date, key UInt64, value String) "
+        "ENGINE = ReplicatedMergeTree('/clickhouse/tables/empty_parts_delete', 'r1') "
+        "PARTITION BY toYYYYMM(d) ORDER BY key "
+        "SETTINGS old_parts_lifetime = 1"
     )
 
     node1.query("INSERT INTO empty_parts_delete VALUES (toDate('2020-10-10'), 1, 'a')")
@@ -43,8 +45,10 @@ def test_empty_parts_alter_delete(started_cluster):
 
 def test_empty_parts_summing(started_cluster):
     node1.query(
-        "CREATE TABLE empty_parts_summing (d Date, key UInt64, value Int64) \
-        ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/empty_parts_summing', 'r1') PARTITION BY toYYYYMM(d) ORDER BY key"
+        "CREATE TABLE empty_parts_summing (d Date, key UInt64, value Int64) "
+        "ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/empty_parts_summing', 'r1') "
+        "PARTITION BY toYYYYMM(d) ORDER BY key "
+        "SETTINGS old_parts_lifetime = 1"
     )
 
     node1.query("INSERT INTO empty_parts_summing VALUES (toDate('2020-10-10'), 1, 1)")
diff --git a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml
index 7f816724c43..890c396ed95 100644
--- a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml
@@ -4,6 +4,8 @@
             <hdfs>
                 <type>hdfs</type>
                 <endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
+                <!-- FIXME: chicken and egg problem with current cluster.py -->
+                <skip_access_check>true</skip_access_check>
             </hdfs>
             <hdd>
                 <type>local</type>
diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py
index 132e1027586..9edb71ec15a 100644
--- a/tests/integration/test_merge_tree_hdfs/test.py
+++ b/tests/integration/test_merge_tree_hdfs/test.py
@@ -5,6 +5,8 @@ import os
 import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.utility import generate_values
+from helpers.wait_for_helpers import wait_for_delete_inactive_parts
+from helpers.wait_for_helpers import wait_for_delete_empty_parts
 
 from pyhdfs import HdfsClient
 
@@ -209,6 +211,8 @@ def test_attach_detach_partition(cluster):
 
     node.query("ALTER TABLE hdfs_test DETACH PARTITION '2020-01-03'")
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)"
+    wait_for_delete_inactive_parts(node, "hdfs_test")
+    wait_for_delete_empty_parts(node, "hdfs_test")
 
     hdfs_objects = fs.listdir("/clickhouse")
     assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
@@ -221,6 +225,8 @@ def test_attach_detach_partition(cluster):
 
     node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'")
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)"
+    wait_for_delete_inactive_parts(node, "hdfs_test")
+    wait_for_delete_empty_parts(node, "hdfs_test")
 
     hdfs_objects = fs.listdir("/clickhouse")
     assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE
@@ -231,6 +237,8 @@ def test_attach_detach_partition(cluster):
         settings={"allow_drop_detached": 1},
     )
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)"
+    wait_for_delete_inactive_parts(node, "hdfs_test")
+    wait_for_delete_empty_parts(node, "hdfs_test")
 
     hdfs_objects = fs.listdir("/clickhouse")
     assert len(hdfs_objects) == FILES_OVERHEAD
@@ -297,6 +305,8 @@ def test_table_manipulations(cluster):
 
     node.query("TRUNCATE TABLE hdfs_test")
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)"
+    wait_for_delete_inactive_parts(node, "hdfs_test")
+    wait_for_delete_empty_parts(node, "hdfs_test")
 
     hdfs_objects = fs.listdir("/clickhouse")
     assert len(hdfs_objects) == FILES_OVERHEAD
diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index b2e93db2606..002bc8ec9d7 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -5,6 +5,9 @@ import os
 import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.utility import generate_values, replace_config, SafeThread
+from helpers.wait_for_helpers import wait_for_delete_inactive_parts
+from helpers.wait_for_helpers import wait_for_delete_empty_parts
+
 
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 
@@ -320,6 +323,8 @@ def test_attach_detach_partition(cluster, node_name):
     )
 
     node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-03'")
+    wait_for_delete_inactive_parts(node, "s3_test")
+    wait_for_delete_empty_parts(node, "s3_test")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)"
     assert (
         len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
@@ -334,13 +339,22 @@ def test_attach_detach_partition(cluster, node_name):
     )
 
     node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'")
+    wait_for_delete_inactive_parts(node, "s3_test")
+    wait_for_delete_empty_parts(node, "s3_test")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)"
     assert (
         len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE
+        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1
     )
 
     node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-04'")
+    wait_for_delete_inactive_parts(node, "s3_test")
+    wait_for_delete_empty_parts(node, "s3_test")
+    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)"
+    assert (
+        len(list(minio.list_objects(cluster.minio_bucket, "data/")))
+        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1
+    )
     node.query(
         "ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'",
         settings={"allow_drop_detached": 1},
@@ -348,7 +362,7 @@ def test_attach_detach_partition(cluster, node_name):
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)"
     assert (
         len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD
+        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 0
     )
 
 
@@ -417,6 +431,8 @@ def test_table_manipulations(cluster, node_name):
     )
 
     node.query("TRUNCATE TABLE s3_test")
+    wait_for_delete_inactive_parts(node, "s3_test")
+    wait_for_delete_empty_parts(node, "s3_test")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)"
     assert (
         len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
@@ -530,6 +546,8 @@ def test_freeze_unfreeze(cluster, node_name):
     node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup2'")
 
     node.query("TRUNCATE TABLE s3_test")
+    wait_for_delete_inactive_parts(node, "s3_test")
+    wait_for_delete_empty_parts(node, "s3_test")
     assert (
         len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
@@ -568,6 +586,8 @@ def test_freeze_system_unfreeze(cluster, node_name):
     node.query("ALTER TABLE s3_test_removed FREEZE WITH NAME 'backup3'")
 
     node.query("TRUNCATE TABLE s3_test")
+    wait_for_delete_inactive_parts(node, "s3_test")
+    wait_for_delete_empty_parts(node, "s3_test")
     node.query("DROP TABLE s3_test_removed NO DELAY")
     assert (
         len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
@@ -763,7 +783,7 @@ def test_cache_setting_compatibility(cluster, node_name):
     node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
 
     node.query(
-        "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_r';"
+        "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_r', compress_marks=false, compress_primary_key=false;"
     )
     node.query(
         "INSERT INTO s3_test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 500"
diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py
index 0652c31951d..d29bb1e34ac 100644
--- a/tests/integration/test_merge_tree_s3_restore/test.py
+++ b/tests/integration/test_merge_tree_s3_restore/test.py
@@ -6,6 +6,8 @@ import time
 
 import pytest
 from helpers.cluster import ClickHouseCluster
+from helpers.wait_for_helpers import wait_for_delete_empty_parts
+from helpers.wait_for_helpers import wait_for_delete_inactive_parts
 
 
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@@ -103,8 +105,8 @@ def create_table(
         ORDER BY (dt, id)
         SETTINGS
             storage_policy='s3',
-            old_parts_lifetime=600,
-            index_granularity=512
+            index_granularity=512,
+            old_parts_lifetime=1
         """.format(
         create="ATTACH" if attach else "CREATE",
         table_name=table_name,
@@ -142,6 +144,7 @@ def create_restore_file(node, revision=None, bucket=None, path=None, detached=No
     node.exec_in_container(
         ["bash", "-c", "mkdir -p /var/lib/clickhouse/disks/s3/"], user="root"
     )
+
     node.exec_in_container(
         ["bash", "-c", "touch /var/lib/clickhouse/disks/s3/restore"], user="root"
     )
@@ -270,6 +273,7 @@ def test_restore_another_bucket_path(cluster, db_atomic):
 
     # To ensure parts have merged
     node.query("OPTIMIZE TABLE s3.test")
+    wait_for_delete_inactive_parts(node, "s3.test", retry_count=120)
 
     assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(
         4096 * 4
@@ -336,6 +340,9 @@ def test_restore_different_revisions(cluster, db_atomic):
 
     # To ensure parts have merged
     node.query("OPTIMIZE TABLE s3.test")
+    wait_for_delete_inactive_parts(node, "s3.test", retry_count=120)
+
+    assert node.query("SELECT count(*) from system.parts where table = 'test'") == "3\n"
 
     node.query("ALTER TABLE s3.test FREEZE")
     revision3 = get_revision_counter(node, 3)
@@ -344,7 +351,7 @@ def test_restore_different_revisions(cluster, db_atomic):
         4096 * 4
     )
     assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
-    assert node.query("SELECT count(*) from system.parts where table = 'test'") == "5\n"
+    assert node.query("SELECT count(*) from system.parts where table = 'test'") == "3\n"
 
     node_another_bucket = cluster.instances["node_another_bucket"]
 
@@ -403,7 +410,7 @@ def test_restore_different_revisions(cluster, db_atomic):
         node_another_bucket.query(
             "SELECT count(*) from system.parts where table = 'test'"
         )
-        == "5\n"
+        == "3\n"
     )
 
 
@@ -593,6 +600,8 @@ def test_restore_to_detached(cluster, replicated, db_atomic):
 
     # Detach some partition.
     node.query("ALTER TABLE s3.test DETACH PARTITION '2020-01-07'")
+    wait_for_delete_empty_parts(node, "s3.test", retry_count=120)
+    wait_for_delete_inactive_parts(node, "s3.test", retry_count=120)
 
     node.query("ALTER TABLE s3.test FREEZE")
     revision = get_revision_counter(node, 1)
@@ -623,10 +632,10 @@ def test_restore_to_detached(cluster, replicated, db_atomic):
     node_another_bucket.query("ALTER TABLE s3.test ATTACH PARTITION '2020-01-04'")
     node_another_bucket.query("ALTER TABLE s3.test ATTACH PARTITION '2020-01-05'")
     node_another_bucket.query("ALTER TABLE s3.test ATTACH PARTITION '2020-01-06'")
-
     assert node_another_bucket.query(
         "SELECT count(*) FROM s3.test FORMAT Values"
     ) == "({})".format(4096 * 4)
+
     assert node_another_bucket.query(
         "SELECT sum(id) FROM s3.test FORMAT Values"
     ) == "({})".format(0)
diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py
index d7117e2546a..9b7bad2b256 100644
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@@ -1244,10 +1244,16 @@ def test_concurrent_alter_move_and_drop(start_cluster, name, engine):
         def alter_drop(num):
             for i in range(num):
                 partition = random.choice([201903, 201904])
-                drach = random.choice(["drop", "detach"])
-                node1.query(
-                    "ALTER TABLE {} {} PARTITION {}".format(name, drach, partition)
-                )
+                op = random.choice(["drop", "detach"])
+                try:
+                    node1.query(
+                        "ALTER TABLE {} {} PARTITION {}".format(name, op, partition)
+                    )
+                except QueryRuntimeException as e:
+                    if "Code: 650" in e.stderr:
+                        pass
+                    else:
+                        raise e
 
         insert(100)
         p = Pool(15)
@@ -1655,7 +1661,7 @@ def test_freeze(start_cluster):
             ) ENGINE = MergeTree
             ORDER BY tuple()
             PARTITION BY toYYYYMM(d)
-            SETTINGS storage_policy='small_jbod_with_external'
+            SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false
         """
         )
 
diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py
index c53bc5a9d0d..6bd224851e7 100644
--- a/tests/integration/test_partition/test.py
+++ b/tests/integration/test_partition/test.py
@@ -3,6 +3,8 @@ import logging
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
 from helpers.test_tools import assert_eq_with_retry
+from helpers.wait_for_helpers import wait_for_delete_inactive_parts
+from helpers.wait_for_helpers import wait_for_delete_empty_parts
 
 cluster = ClickHouseCluster(__file__)
 instance = cluster.add_instance(
@@ -36,7 +38,7 @@ def partition_table_simple(started_cluster):
     q(
         "CREATE TABLE test.partition_simple (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) "
         "ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) "
-        "SETTINGS index_granularity=8192, index_granularity_bytes=0"
+        "SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
     )
     q("INSERT INTO test.partition_simple ( x ) VALUES ( now() )")
     q("INSERT INTO test.partition_simple ( x ) VALUES ( now()+1 )")
@@ -115,7 +117,7 @@ def partition_table_complex(started_cluster):
     q("DROP TABLE IF EXISTS test.partition_complex")
     q(
         "CREATE TABLE test.partition_complex (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) "
-        "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0"
+        "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
     )
     q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(31), 1)")
     q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(1), 2)")
@@ -153,7 +155,7 @@ def test_partition_complex(partition_table_complex):
 def cannot_attach_active_part_table(started_cluster):
     q("DROP TABLE IF EXISTS test.attach_active")
     q(
-        "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n"
+        "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
     )
     q("INSERT INTO test.attach_active SELECT number FROM system.numbers LIMIT 16")
 
@@ -181,7 +183,7 @@ def attach_check_all_parts_table(started_cluster):
     q("SYSTEM STOP MERGES")
     q("DROP TABLE IF EXISTS test.attach_partition")
     q(
-        "CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n"
+        "CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
     )
     q(
         "INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
@@ -199,6 +201,9 @@ def attach_check_all_parts_table(started_cluster):
 def test_attach_check_all_parts(attach_check_all_parts_table):
     q("ALTER TABLE test.attach_partition DETACH PARTITION 0")
 
+    wait_for_delete_inactive_parts(instance, "test.attach_partition")
+    wait_for_delete_empty_parts(instance, "test.attach_partition")
+
     path_to_detached = path_to_data + "data/test/attach_partition/detached/"
     instance.exec_in_container(["mkdir", "{}".format(path_to_detached + "0_5_5_0")])
     instance.exec_in_container(
@@ -226,7 +231,8 @@ def test_attach_check_all_parts(attach_check_all_parts_table):
     )
 
     parts = q(
-        "SElECT name FROM system.parts WHERE table='attach_partition' AND database='test' ORDER BY name"
+        "SElECT name FROM system.parts "
+        "WHERE table='attach_partition' AND database='test' AND active ORDER BY name"
     )
     assert TSV(parts) == TSV("1_2_2_0\n1_4_4_0")
     detached = q(
@@ -259,7 +265,7 @@ def drop_detached_parts_table(started_cluster):
     q("SYSTEM STOP MERGES")
     q("DROP TABLE IF EXISTS test.drop_detached")
     q(
-        "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n"
+        "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
     )
     q(
         "INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
@@ -329,9 +335,15 @@ def test_drop_detached_parts(drop_detached_parts_table):
 
 
 def test_system_detached_parts(drop_detached_parts_table):
-    q("create table sdp_0 (n int, x int) engine=MergeTree order by n")
-    q("create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x")
-    q("create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x")
+    q(
+        "create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false"
+    )
+    q(
+        "create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
+    )
+    q(
+        "create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
+    )
     q(
         "create table sdp_3 (n int, x Enum('broken' = 0, 'all' = 1)) engine=MergeTree order by n partition by x"
     )
@@ -449,15 +461,20 @@ def test_system_detached_parts(drop_detached_parts_table):
 
 
 def test_detached_part_dir_exists(started_cluster):
-    q("create table detached_part_dir_exists (n int) engine=MergeTree order by n")
+    q(
+        "create table detached_part_dir_exists (n int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false"
+    )
     q("insert into detached_part_dir_exists select 1")  # will create all_1_1_0
     q(
         "alter table detached_part_dir_exists detach partition id 'all'"
-    )  # will move all_1_1_0 to detached/all_1_1_0
+    )  # will move all_1_1_0 to detached/all_1_1_0 and create all_1_1_1
+
+    wait_for_delete_empty_parts(instance, "detached_part_dir_exists")
+
     q("detach table detached_part_dir_exists")
     q("attach table detached_part_dir_exists")
-    q("insert into detached_part_dir_exists select 1")  # will create all_1_1_0
     q("insert into detached_part_dir_exists select 1")  # will create all_2_2_0
+    q("insert into detached_part_dir_exists select 1")  # will create all_3_3_0
     instance.exec_in_container(
         [
             "bash",
@@ -488,7 +505,7 @@ def test_detached_part_dir_exists(started_cluster):
 
 def test_make_clone_in_detached(started_cluster):
     q(
-        "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n"
+        "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false"
     )
 
     path = path_to_data + "data/default/clone_in_detached/"
diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py
index 32b5e531fa8..361b4855747 100644
--- a/tests/integration/test_polymorphic_parts/test.py
+++ b/tests/integration/test_polymorphic_parts/test.py
@@ -728,7 +728,7 @@ def test_polymorphic_parts_index(start_cluster):
         """
         CREATE TABLE test_index.index_compact(a UInt32, s String)
         ENGINE = MergeTree ORDER BY a
-        SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100"""
+        SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false"""
     )
 
     node1.query(
diff --git a/tests/integration/test_read_only_table/test.py b/tests/integration/test_read_only_table/test.py
index 28abbf6601e..914c6a99508 100644
--- a/tests/integration/test_read_only_table/test.py
+++ b/tests/integration/test_read_only_table/test.py
@@ -84,6 +84,8 @@ def test_restart_zookeeper(start_cluster):
     time.sleep(5)
 
     for table_id in range(NUM_TABLES):
-        node1.query(
-            f"INSERT INTO test_table_{table_id} VALUES (6), (7), (8), (9), (10);"
+        node1.query_with_retry(
+            sql=f"INSERT INTO test_table_{table_id} VALUES (6), (7), (8), (9), (10);",
+            retry_count=10,
+            sleep_time=1,
         )
diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml
index 1b1ead2d7cb..cb444c728c9 100644
--- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml
@@ -4,14 +4,20 @@
             <hdfs1>
                 <type>hdfs</type>
                 <endpoint>hdfs://hdfs1:9000/clickhouse1/</endpoint>
+                <!-- FIXME: chicken and egg problem with current cluster.py -->
+                <skip_access_check>true</skip_access_check>
             </hdfs1>
             <hdfs1_again>
                 <type>hdfs</type>
                 <endpoint>hdfs://hdfs1:9000/clickhouse1/</endpoint>
+                <!-- FIXME: chicken and egg problem with current cluster.py -->
+                <skip_access_check>true</skip_access_check>
             </hdfs1_again>
             <hdfs2>
                 <type>hdfs</type>
                 <endpoint>hdfs://hdfs1:9000/clickhouse2/</endpoint>
+                <!-- FIXME: chicken and egg problem with current cluster.py -->
+                <skip_access_check>true</skip_access_check>
             </hdfs2>
         </disks>
         <policies>
diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py
index a63244df814..3f9da071281 100644
--- a/tests/integration/test_storage_delta/test.py
+++ b/tests/integration/test_storage_delta/test.py
@@ -1,7 +1,6 @@
 import logging
 import os
 import json
-
 import helpers.client
 import pytest
 from helpers.cluster import ClickHouseCluster
@@ -143,3 +142,25 @@ def test_select_query(started_cluster):
             ),
         ).splitlines()
         assert len(result) > 0
+
+
+def test_describe_query(started_cluster):
+    instance = started_cluster.instances["main_server"]
+    bucket = started_cluster.minio_bucket
+    result = instance.query(
+        f"DESCRIBE deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/test_table/', 'minio', 'minio123') FORMAT TSV",
+    )
+
+    assert result == TSV(
+        [
+            ["begin_lat", "Nullable(Float64)"],
+            ["begin_lon", "Nullable(Float64)"],
+            ["driver", "Nullable(String)"],
+            ["end_lat", "Nullable(Float64)"],
+            ["end_lon", "Nullable(Float64)"],
+            ["fare", "Nullable(Float64)"],
+            ["rider", "Nullable(String)"],
+            ["ts", "Nullable(Int64)"],
+            ["uuid", "Nullable(String)"],
+        ]
+    )
diff --git a/tests/integration/test_storage_hdfs/configs/cluster.xml b/tests/integration/test_storage_hdfs/configs/cluster.xml
new file mode 100644
index 00000000000..9efe0ebf273
--- /dev/null
+++ b/tests/integration/test_storage_hdfs/configs/cluster.xml
@@ -0,0 +1,18 @@
+<clickhouse>
+    <remote_servers>
+        <cluster_non_existent_port>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>19000</port>
+                </replica>
+            </shard>
+        </cluster_non_existent_port>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 34243e4b58d..d4752d6cf2e 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -9,7 +9,11 @@ from pyhdfs import HdfsClient
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
     "node1",
-    main_configs=["configs/macro.xml", "configs/schema_cache.xml"],
+    main_configs=[
+        "configs/macro.xml",
+        "configs/schema_cache.xml",
+        "configs/cluster.xml",
+    ],
     with_hdfs=True,
 )
 
@@ -783,6 +787,32 @@ def test_schema_inference_cache(started_cluster):
     check_cache_misses(node1, files, 4)
 
 
+def test_hdfsCluster_skip_unavailable_shards(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    node = started_cluster.instances["node1"]
+    data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
+    hdfs_api.write_data("/skip_unavailable_shards", data)
+
+    assert (
+        node1.query(
+            "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/skip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64') settings skip_unavailable_shards = 1"
+        )
+        == data
+    )
+
+
+def test_hdfsCluster_unskip_unavailable_shards(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    node = started_cluster.instances["node1"]
+    data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
+    hdfs_api.write_data("/unskip_unavailable_shards", data)
+    error = node.query_and_get_error(
+        "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/unskip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')"
+    )
+
+    assert "NETWORK_ERROR" in error
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")
diff --git a/tests/integration/test_storage_hudi/test.py b/tests/integration/test_storage_hudi/test.py
index dd870aae42e..3328f859406 100644
--- a/tests/integration/test_storage_hudi/test.py
+++ b/tests/integration/test_storage_hudi/test.py
@@ -161,7 +161,7 @@ def test_select_query(started_cluster):
     result = run_query(instance, distinct_select_query)
     result_table_function = run_query(
         instance,
-        distinct_select_query.format(
+        distinct_select_table_function_query.format(
             ip=started_cluster.minio_ip, port=started_cluster.minio_port, bucket=bucket
         ),
     )
@@ -173,3 +173,31 @@ def test_select_query(started_cluster):
 
     assert TSV(result) == TSV(expected)
     assert TSV(result_table_function) == TSV(expected)
+
+
+def test_describe_query(started_cluster):
+    instance = started_cluster.instances["main_server"]
+    bucket = started_cluster.minio_bucket
+    result = instance.query(
+        f"DESCRIBE hudi('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/test_table/', 'minio', 'minio123') FORMAT TSV",
+    )
+
+    assert result == TSV(
+        [
+            ["_hoodie_commit_time", "Nullable(String)"],
+            ["_hoodie_commit_seqno", "Nullable(String)"],
+            ["_hoodie_record_key", "Nullable(String)"],
+            ["_hoodie_partition_path", "Nullable(String)"],
+            ["_hoodie_file_name", "Nullable(String)"],
+            ["begin_lat", "Nullable(Float64)"],
+            ["begin_lon", "Nullable(Float64)"],
+            ["driver", "Nullable(String)"],
+            ["end_lat", "Nullable(Float64)"],
+            ["end_lon", "Nullable(Float64)"],
+            ["fare", "Nullable(Float64)"],
+            ["partitionpath", "Nullable(String)"],
+            ["rider", "Nullable(String)"],
+            ["ts", "Nullable(Int64)"],
+            ["uuid", "Nullable(String)"],
+        ]
+    )
diff --git a/tests/integration/test_storage_policies/__init__.py b/tests/integration/test_storage_policies/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_storage_policies/configs/disk2_only.xml b/tests/integration/test_storage_policies/configs/disk2_only.xml
new file mode 100644
index 00000000000..3dccf59bf44
--- /dev/null
+++ b/tests/integration/test_storage_policies/configs/disk2_only.xml
@@ -0,0 +1,18 @@
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <disk2>
+                <path>/var/lib/clickhouse2/</path>
+            </disk2>
+        </disks>
+        <policies>
+            <test_policy>
+                <volumes>
+                    <volume2>
+                        <disk>disk2</disk>
+                    </volume2>
+                </volumes>
+            </test_policy>
+        </policies>
+    </storage_configuration>
+</clickhouse>
diff --git a/tests/integration/test_storage_policies/configs/disks.xml b/tests/integration/test_storage_policies/configs/disks.xml
new file mode 100644
index 00000000000..3331fee4e4f
--- /dev/null
+++ b/tests/integration/test_storage_policies/configs/disks.xml
@@ -0,0 +1,24 @@
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <disk1>
+                <path>/var/lib/clickhouse1/</path>
+            </disk1>
+            <disk2>
+                <path>/var/lib/clickhouse2/</path>
+            </disk2>
+        </disks>
+        <policies>
+            <test_policy>
+                <volumes>
+                    <volume1>
+                        <disk>disk1</disk>
+                    </volume1>
+                    <volume2>
+                        <disk>disk2</disk>
+                    </volume2>
+                </volumes>
+            </test_policy>
+        </policies>
+    </storage_configuration>
+</clickhouse>
diff --git a/tests/integration/test_storage_policies/test.py b/tests/integration/test_storage_policies/test.py
new file mode 100644
index 00000000000..389146b2171
--- /dev/null
+++ b/tests/integration/test_storage_policies/test.py
@@ -0,0 +1,40 @@
+import os
+
+import pytest
+from helpers.test_tools import TSV
+from helpers.cluster import ClickHouseCluster
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs")
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node", main_configs=["configs/disks.xml"], stay_alive=True)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_storage_policy_configuration_change(started_cluster):
+    node.query(
+        "CREATE TABLE a (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS storage_policy = 'test_policy'"
+    )
+
+    node.stop_clickhouse()
+    node.copy_file_to_container(
+        os.path.join(CONFIG_DIR, "disk2_only.xml"),
+        "/etc/clickhouse-server/config.d/disks.xml",
+    )
+    node.start_clickhouse()
+
+    node.stop_clickhouse()
+    node.copy_file_to_container(
+        os.path.join(CONFIG_DIR, "disks.xml"),
+        "/etc/clickhouse-server/config.d/disks.xml",
+    )
+    node.start_clickhouse()
diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py
index daa4c287982..7902d168707 100644
--- a/tests/integration/test_transactions/test.py
+++ b/tests/integration/test_transactions/test.py
@@ -104,6 +104,8 @@ def test_rollback_unfinished_on_restart1(start_cluster):
         "0_4_4_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n"
         "0_8_8_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n"
         "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n"
+        "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n"
+        "1_1_1_1_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n"
         "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n"
         "1_3_3_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n"
         "1_5_5_0\t1\ttid6\tcsn_6\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n"
@@ -190,5 +192,6 @@ def test_rollback_unfinished_on_restart2(start_cluster):
         "0_4_4_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n"
         "0_5_5_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n"
         "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n"
+        "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n"
         "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n"
     )
diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index cacd9ef0c78..aa4a09f1269 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -4,6 +4,8 @@ import helpers.client as client
 import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV, exec_query_with_retry
+from helpers.wait_for_helpers import wait_for_delete_inactive_parts
+from helpers.wait_for_helpers import wait_for_delete_empty_parts
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance("node1", with_zookeeper=True)
@@ -420,7 +422,8 @@ def test_ttl_empty_parts(started_cluster):
             ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_empty_parts', '{replica}')
             ORDER BY id
             SETTINGS max_bytes_to_merge_at_min_space_in_pool = 1, max_bytes_to_merge_at_max_space_in_pool = 1,
-                cleanup_delay_period = 1, cleanup_delay_period_random_add = 0
+                cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, old_parts_lifetime = 1
+
         """.format(
                 replica=node.name
             )
@@ -445,7 +448,10 @@ def test_ttl_empty_parts(started_cluster):
 
     assert node1.query("SELECT count() FROM test_ttl_empty_parts") == "3000\n"
 
-    time.sleep(3)  # Wait for cleanup thread
+    # Wait for cleanup thread
+    wait_for_delete_inactive_parts(node1, "test_ttl_empty_parts")
+    wait_for_delete_empty_parts(node1, "test_ttl_empty_parts")
+
     assert (
         node1.query(
             "SELECT name FROM system.parts WHERE table = 'test_ttl_empty_parts' AND active ORDER BY name"
diff --git a/tests/performance/general_purpose_hashes.xml b/tests/performance/general_purpose_hashes.xml
index f34554360cf..ba4e8f93859 100644
--- a/tests/performance/general_purpose_hashes.xml
+++ b/tests/performance/general_purpose_hashes.xml
@@ -15,6 +15,7 @@
                <value>hiveHash</value>
                <value>xxHash32</value>
                <value>xxHash64</value>
+               <value>xxh3</value>
                <value>CRC32</value>
            </values>
         </substitution>
diff --git a/tests/performance/low_cardinality_from_json.xml b/tests/performance/low_cardinality_from_json.xml
new file mode 100644
index 00000000000..ac6542ac503
--- /dev/null
+++ b/tests/performance/low_cardinality_from_json.xml
@@ -0,0 +1,73 @@
+<test>
+
+    <substitutions>
+        <substitution>
+            <name>string_json</name>
+            <values>
+                <value>'{"a": "hi", "b": "hello", "c": "hola", "d": "see you, bye, bye"}'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>int_json</name>
+            <values>
+                <value>'{"a": 11, "b": 2222, "c": 33333333, "d": 4444444444444444}'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>uuid_json</name>
+            <values>
+                <value>'{"a": "2d49dc6e-ddce-4cd0-afb8-790956df54c4", "b": "2d49dc6e-ddce-4cd0-afb8-790956df54c3", "c": "2d49dc6e-ddce-4cd0-afb8-790956df54c1", "d": "2d49dc6e-ddce-4cd0-afb8-790956df54c1"}'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>low_cardinality_tuple_string</name>
+            <values>
+                <value>'Tuple(a LowCardinality(String), b LowCardinality(String), c LowCardinality(String), d LowCardinality(String) )'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>low_cardinality_tuple_fixed_string</name>
+            <values>
+                <value>'Tuple(a LowCardinality(FixedString(20)), b LowCardinality(FixedString(20)), c LowCardinality(FixedString(20)), d LowCardinality(FixedString(20)) )'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>low_cardinality_tuple_int8</name>
+            <values>
+                <value>'Tuple(a LowCardinality(Int8), b LowCardinality(Int8), c LowCardinality(Int8), d LowCardinality(Int8) )'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>low_cardinality_tuple_int16</name>
+            <values>
+                <value>'Tuple(a LowCardinality(Int16), b LowCardinality(Int16), c LowCardinality(Int16), d LowCardinality(Int16) )'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>low_cardinality_tuple_int32</name>
+            <values>
+                <value>'Tuple(a LowCardinality(Int32), b LowCardinality(Int32), c LowCardinality(Int32), d LowCardinality(Int32) )'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>low_cardinality_tuple_int64</name>
+            <values>
+                <value>'Tuple(a LowCardinality(Int64), b LowCardinality(Int64), c LowCardinality(Int64), d LowCardinality(Int64) )'</value>
+            </values>
+        </substitution>
+        <substitution>
+            <name>low_cardinality_tuple_uuid</name>
+            <values>
+                <value>'Tuple(a LowCardinality(UUID), b LowCardinality(UUID), c LowCardinality(UUID), d LowCardinality(UUID) )'</value>
+            </values>
+        </substitution>
+    </substitutions>
+
+    <query>SELECT 'fixed_string_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({string_json}), {low_cardinality_tuple_fixed_string})) FORMAT Null </query>
+    <query>SELECT 'string_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({string_json}), {low_cardinality_tuple_string})) FORMAT Null </query>
+    <query>SELECT 'int8_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int8})) FORMAT Null </query>
+    <query>SELECT 'int16_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int16})) FORMAT Null </query>
+    <query>SELECT 'int32_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int32})) FORMAT Null </query>
+    <query>SELECT 'int64_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({int_json}), {low_cardinality_tuple_int64})) FORMAT Null </query>
+    <query>SELECT 'uuid_json' FROM zeros(500000) WHERE NOT ignore(JSONExtract(materialize({uuid_json}), {low_cardinality_tuple_uuid})) FORMAT Null </query>
+</test>
\ No newline at end of file
diff --git a/tests/performance/memory_bound_merging.xml b/tests/performance/memory_bound_merging.xml
new file mode 100644
index 00000000000..3b13400151c
--- /dev/null
+++ b/tests/performance/memory_bound_merging.xml
@@ -0,0 +1,17 @@
+<test>
+  <settings>
+    <enable_memory_bound_merging_of_aggregation_results>1</enable_memory_bound_merging_of_aggregation_results>
+    <optimize_aggregation_in_order>1</optimize_aggregation_in_order>
+  </settings>
+
+  <create_query>create table t_mbm(a UInt64) engine=MergeTree order by a</create_query>
+
+  <fill_query>insert into t_mbm select * from numbers_mt(5e6)</fill_query>
+  <fill_query>optimize table t_mbm final</fill_query>
+
+  <query>select avg(a) from remote('127.0.0.{{1,2}}', default, t_mbm) group by a format Null</query>
+
+  <query>select * from remote('127.0.0.{{1,2}}', default, t_mbm) group by a format Null settings allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, use_hedged_requests = 0</query>
+
+  <drop_query>drop table t_mbm</drop_query>
+</test>
diff --git a/tests/performance/uniq_without_key.xml b/tests/performance/uniq_without_key.xml
new file mode 100644
index 00000000000..4394aef7889
--- /dev/null
+++ b/tests/performance/uniq_without_key.xml
@@ -0,0 +1,33 @@
+<test>
+    <substitutions>
+        <substitution>
+           <name>uniq_keys</name>
+           <values>
+               <value>10000</value>
+               <value>50000</value>
+               <value>100000</value>
+               <value>250000</value>
+               <value>500000</value>
+               <value>1000000</value>
+           </values>
+        </substitution>
+    </substitutions>
+
+    <create_query>create table t_{uniq_keys}(a UInt64) engine=MergeTree order by tuple()</create_query>
+
+    <fill_query>insert into t_{uniq_keys} select number % {uniq_keys} from numbers_mt(5e7)</fill_query>
+
+    <query>SELECT count(distinct a) FROM t_{uniq_keys} GROUP BY a FORMAT Null</query>
+    <query>SELECT uniqExact(a) FROM t_{uniq_keys} GROUP BY a FORMAT Null</query>
+
+    <query>SELECT count(distinct a) FROM t_{uniq_keys}</query>
+    <query>SELECT uniqExact(a) FROM t_{uniq_keys}</query>
+
+    <query>SELECT uniqExact(number) from numbers_mt(1e7)</query>
+    <query>SELECT uniqExact(number) from numbers_mt(5e7)</query>
+
+    <query>SELECT uniqExact(number, number) from numbers_mt(5e6)</query>
+    <query>SELECT uniqExact(number, number) from numbers_mt(1e7)</query>
+
+    <drop_query>drop table t_{uniq_keys}</drop_query>
+</test>
diff --git a/tests/queries/0_stateless/00502_sum_map.reference b/tests/queries/0_stateless/00502_sum_map.reference
index 31b067a2bc9..b1cd0303004 100644
--- a/tests/queries/0_stateless/00502_sum_map.reference
+++ b/tests/queries/0_stateless/00502_sum_map.reference
@@ -1,26 +1,70 @@
+-- { echoOn }
+DROP TABLE IF EXISTS sum_map;
+CREATE TABLE sum_map(date Date, timeslot DateTime, statusMap Nested(status UInt16, requests UInt64)) ENGINE = Log;
+INSERT INTO sum_map VALUES ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
+SELECT * FROM sum_map ORDER BY timeslot, statusMap.status, statusMap.requests;
 2000-01-01	2000-01-01 00:00:00	[1,2,3]	[10,10,10]
 2000-01-01	2000-01-01 00:00:00	[3,4,5]	[10,10,10]
 2000-01-01	2000-01-01 00:01:00	[4,5,6]	[10,10,10]
 2000-01-01	2000-01-01 00:01:00	[6,7,8]	[10,10,10]
+SELECT sumMap(statusMap.status, statusMap.requests) FROM sum_map;
 ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
+SELECT sumMap((statusMap.status, statusMap.requests)) FROM sum_map;
 ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
+SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM sum_map);
 ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10])
+SELECT timeslot, sumMap(statusMap.status, statusMap.requests) FROM sum_map GROUP BY timeslot ORDER BY timeslot;
 2000-01-01 00:00:00	([1,2,3,4,5],[10,10,20,10,10])
 2000-01-01 00:01:00	([4,5,6,7,8],[10,10,20,10,10])
+SELECT timeslot, sumMap(statusMap.status, statusMap.requests).1, sumMap(statusMap.status, statusMap.requests).2 FROM sum_map GROUP BY timeslot ORDER BY timeslot;
 2000-01-01 00:00:00	[1,2,3,4,5]	[10,10,20,10,10]
 2000-01-01 00:01:00	[4,5,6,7,8]	[10,10,20,10,10]
+SELECT sumMapFiltered([1])(statusMap.status, statusMap.requests) FROM sum_map;
 ([1],[10])
+SELECT sumMapFiltered([1, 4, 8])(statusMap.status, statusMap.requests) FROM sum_map;
 ([1,4,8],[10,20,10])
+DROP TABLE sum_map;
+DROP TABLE IF EXISTS sum_map_overflow;
+CREATE TABLE sum_map_overflow(events Array(UInt8), counts Array(UInt8)) ENGINE = Log;
+INSERT INTO sum_map_overflow VALUES ([1], [255]), ([1], [2]);
+SELECT sumMap(events, counts) FROM sum_map_overflow;
 ([1],[257])
+SELECT sumMapWithOverflow(events, counts) FROM sum_map_overflow;
 ([1],[1])
+DROP TABLE sum_map_overflow;
+select sumMap(val, cnt) from ( SELECT [ CAST(1, 'UInt64') ] as val, [1] as cnt );
 ([1],[1])
+select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Float64') ] as val, [1] as cnt );
 ([1],[1])
+select sumMap(val, cnt) from ( SELECT [ CAST('a', 'Enum16(\'a\'=1)') ] as val, [1] as cnt );
 (['a'],[1])
+select sumMap(val, cnt) from ( SELECT [ CAST(1, 'DateTime(\'Asia/Istanbul\')') ] as val, [1] as cnt );
 (['1970-01-01 02:00:01'],[1])
+select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Date') ] as val, [1] as cnt );
 (['1970-01-02'],[1])
+select sumMap(val, cnt) from ( SELECT [ CAST('01234567-89ab-cdef-0123-456789abcdef', 'UUID') ] as val, [1] as cnt );
 (['01234567-89ab-cdef-0123-456789abcdef'],[1])
+select sumMap(val, cnt) from ( SELECT [ CAST(1.01, 'Decimal(10,2)') ] as val, [1] as cnt );
 ([1.01],[1])
+select sumMap(val, cnt) from ( SELECT [ CAST('a', 'FixedString(1)'), CAST('b', 'FixedString(1)' ) ] as val, [1, 2] as cnt );
 (['a','b'],[1,2])
+select sumMap(val, cnt) from ( SELECT [ CAST('abc', 'String'), CAST('ab', 'String'), CAST('a', 'String') ] as val, [1, 2, 3] as cnt );
 (['a','ab','abc'],[3,2,1])
+DROP TABLE IF EXISTS sum_map_decimal;
+CREATE TABLE sum_map_decimal(
+    statusMap Nested(
+        goal_id UInt16,
+        revenue Decimal32(5)
+    )
+) ENGINE = Log;
+INSERT INTO sum_map_decimal VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]);
+SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
 ([1,2,3,4,5,6,7,8],[1,2,6,8,10,12,7,8])
+SELECT sumMapWithOverflow(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
 ([1,2,3,4,5,6,7,8],[1,2,6,8,10,12,7,8])
+DROP TABLE sum_map_decimal;
+CREATE TABLE sum_map_decimal_nullable (`statusMap` Array(Tuple(goal_id UInt16, revenue Nullable(Decimal(9, 5))))) engine=Log;
+INSERT INTO sum_map_decimal_nullable VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]);
+SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal_nullable;
+([1,2,3,4,5,6,7,8],[1,2,6,8,10,12,7,8])
+DROP TABLE sum_map_decimal_nullable;
diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql
index acc87cc5f16..30037d49784 100644
--- a/tests/queries/0_stateless/00502_sum_map.sql
+++ b/tests/queries/0_stateless/00502_sum_map.sql
@@ -1,5 +1,6 @@
 SET send_logs_level = 'fatal';
 
+-- { echoOn }
 DROP TABLE IF EXISTS sum_map;
 CREATE TABLE sum_map(date Date, timeslot DateTime, statusMap Nested(status UInt16, requests UInt64)) ENGINE = Log;
 
@@ -54,3 +55,8 @@ SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
 SELECT sumMapWithOverflow(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
 
 DROP TABLE sum_map_decimal;
+
+CREATE TABLE sum_map_decimal_nullable (`statusMap` Array(Tuple(goal_id UInt16, revenue Nullable(Decimal(9, 5))))) engine=Log;
+INSERT INTO sum_map_decimal_nullable VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]);
+SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal_nullable;
+DROP TABLE sum_map_decimal_nullable;
diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
index e1392d299dc..bbc7bedcb4f 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
@@ -12,7 +12,7 @@ CREATE TABLE check_system_tables
     ORDER BY name1
     PARTITION BY name2
     SAMPLE BY name1
-    SETTINGS min_bytes_for_wide_part = 0;
+    SETTINGS min_bytes_for_wide_part = 0, compress_marks=false, compress_primary_key=false;
 
 SELECT name, partition_key, sorting_key, primary_key, sampling_key, storage_policy, total_rows
 FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase()
@@ -36,7 +36,8 @@ CREATE TABLE check_system_tables
     sign Int8
   ) ENGINE = VersionedCollapsingMergeTree(sign, version)
     PARTITION BY date
-    ORDER BY date;
+    ORDER BY date
+    SETTINGS compress_marks=false, compress_primary_key=false;
 
 SELECT name, partition_key, sorting_key, primary_key, sampling_key
 FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase()
diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql
index 044d60aeafb..ca9bb1b177e 100644
--- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql
+++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql
@@ -9,12 +9,12 @@ DROP TABLE IF EXISTS default_codec_synthetic;
 CREATE TABLE delta_codec_synthetic
 (
     id UInt64 Codec(Delta, ZSTD(3))
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
 
 CREATE TABLE default_codec_synthetic
 (
     id UInt64 Codec(ZSTD(3))
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
 
 INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
 INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
@@ -47,12 +47,12 @@ DROP TABLE IF EXISTS default_codec_float;
 CREATE TABLE delta_codec_float
 (
     id Float64 Codec(Delta, LZ4HC)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
 
 CREATE TABLE default_codec_float
 (
     id Float64 Codec(LZ4HC)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
 
 INSERT INTO delta_codec_float SELECT number FROM numbers(1547510400, 500000) WHERE number % 3 == 0 OR number % 5 == 0 OR number % 7 == 0 OR number % 11 == 0;
 INSERT INTO default_codec_float SELECT * from delta_codec_float;
@@ -85,12 +85,12 @@ DROP TABLE IF EXISTS default_codec_string;
 CREATE TABLE delta_codec_string
 (
     id Float64 Codec(Delta, LZ4)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
 
 CREATE TABLE default_codec_string
 (
     id Float64 Codec(LZ4)
-) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0;
+) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
 
 INSERT INTO delta_codec_string SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000);
 INSERT INTO default_codec_string SELECT * from delta_codec_string;
diff --git a/tests/queries/0_stateless/00824_filesystem.sql b/tests/queries/0_stateless/00824_filesystem.sql
index cd4d69a703e..c8ac9179d42 100644
--- a/tests/queries/0_stateless/00824_filesystem.sql
+++ b/tests/queries/0_stateless/00824_filesystem.sql
@@ -1 +1 @@
-SELECT filesystemCapacity() >= filesystemFree() AND filesystemFree() >= filesystemAvailable() AND filesystemAvailable() >= 0;
+SELECT filesystemCapacity() >= filesystemAvailable() AND filesystemAvailable() >= 0 AND filesystemUnreserved() >= 0;
diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.reference b/tests/queries/0_stateless/00956_sensitive_data_masking.reference
index 86323ec45e8..457ab9118f1 100644
--- a/tests/queries/0_stateless/00956_sensitive_data_masking.reference
+++ b/tests/queries/0_stateless/00956_sensitive_data_masking.reference
@@ -1,11 +1,14 @@
 1
 2
 3
+3.1
 4
 5
 5.1
 6
 7
+7.1
+7.2
 8
 9
 text_log non empty
diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh
index e36031c54be..ccd9bbcf10e 100755
--- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh
+++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh
@@ -37,12 +37,20 @@ rm -f "$tmp_file" >/dev/null 2>&1
 echo 3
 # failure at before query start
 $CLICKHOUSE_CLIENT \
-  --query="SELECT 'find_me_TOPSECRET=TOPSECRET' FROM non_existing_table FORMAT Null" \
+  --query="SELECT 1 FROM system.numbers WHERE credit_card_number='find_me_TOPSECRET=TOPSECRET' FORMAT Null" \
   --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > "$tmp_file"
 
 grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 3a'
 grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 3b'
 
+echo '3.1'
+echo "SELECT 1 FROM system.numbers WHERE credit_card_number='find_me_TOPSECRET=TOPSECRET' FORMAT Null" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- >"$tmp_file" 2>&1
+
+grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 3.1a'
+grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 3.1b'
+
+#echo "SELECT 1 FROM system.numbers WHERE credit_card_number='find_me_TOPSECRET=TOPSECRET' FORMAT Null" | curl -sSg http://172.17.0.3:8123/ -d @-
+
 rm -f "$tmp_file" >/dev/null 2>&1
 echo 4
 # failure at the end of query
@@ -100,6 +108,21 @@ $CLICKHOUSE_CLIENT \
   --server_logs_file=/dev/null \
   --query="select * from system.query_log where current_database = currentDatabase() AND event_date >= yesterday() and query like '%TOPSECRET%';"
 
+echo '7.1'
+# query_log exceptions
+$CLICKHOUSE_CLIENT \
+  --server_logs_file=/dev/null \
+  --query="select * from system.query_log where current_database = currentDatabase() AND event_date >= yesterday() and exception like '%TOPSECRET%'"
+
+echo '7.2'
+
+# not perfect: when run in parallel with other tests that check can give false-negative result
+# because other tests can overwrite the last_error_message, where we check the absence of sensitive data.
+# But it's still good enough for CI - in case of regressions it will start flapping (normally it shouldn't)
+$CLICKHOUSE_CLIENT \
+  --server_logs_file=/dev/null \
+  --query="select * from system.errors where last_error_message like '%TOPSECRET%';"
+
 
 rm -f "$tmp_file" >/dev/null 2>&1
 echo 8
diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
index 792bf62f9b1..cf9fd3cad12 100644
--- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
+++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql
@@ -1,7 +1,7 @@
 DROP TABLE IF EXISTS test_00961;
 
 CREATE TABLE test_00961 (d Date, a String, b UInt8, x String, y Int8, z UInt32)
-    ENGINE = MergeTree PARTITION BY d ORDER BY (a, b) SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0;
+    ENGINE = MergeTree PARTITION BY d ORDER BY (a, b) SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks=false, compress_primary_key=false;
 
 INSERT INTO test_00961 VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
 
diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference
index 9b130f11df6..450eaee4b2f 100644
--- a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference
+++ b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference
@@ -1,5 +1,5 @@
 =DICTIONARY in Ordinary DB
-CREATE DICTIONARY db_01018.dict1\n(\n    `key_column` UInt64 DEFAULT 0,\n    `second_column` UInt8 DEFAULT 1,\n    `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
+CREATE DICTIONARY db_01018.dict1\n(\n    `key_column` UInt64 DEFAULT 0,\n    `second_column` UInt8 DEFAULT 1,\n    `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'[HIDDEN]\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
 dict1
 1
 db_01018	dict1
@@ -12,7 +12,7 @@ db_01018	dict1
 ==DROP DICTIONARY
 0
 =DICTIONARY in Memory DB
-CREATE DICTIONARY memory_db.dict2\n(\n    `key_column` UInt64 DEFAULT 0 INJECTIVE,\n    `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n    `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
+CREATE DICTIONARY memory_db.dict2\n(\n    `key_column` UInt64 DEFAULT 0 INJECTIVE,\n    `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n    `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'[HIDDEN]\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
 dict2
 1
 memory_db	dict2
diff --git a/tests/queries/0_stateless/01047_nullable_rand.sql b/tests/queries/0_stateless/01047_nullable_rand.sql
index 9d3c361c543..e5633637db6 100644
--- a/tests/queries/0_stateless/01047_nullable_rand.sql
+++ b/tests/queries/0_stateless/01047_nullable_rand.sql
@@ -1,13 +1,13 @@
 select toTypeName(rand(cast(4 as Nullable(UInt8))));
-select toTypeName(canonicalRand(CAST(4 as Nullable(UInt8))));
+select toTypeName(randCanonical(CAST(4 as Nullable(UInt8))));
 select toTypeName(randConstant(CAST(4 as Nullable(UInt8))));
 select toTypeName(rand(Null));
-select toTypeName(canonicalRand(Null));
+select toTypeName(randCanonical(Null));
 select toTypeName(randConstant(Null));
 
 select rand(cast(4 as Nullable(UInt8))) * 0;
-select canonicalRand(cast(4 as Nullable(UInt8))) * 0;
+select randCanonical(cast(4 as Nullable(UInt8))) * 0;
 select randConstant(CAST(4 as Nullable(UInt8))) * 0;
 select rand(Null) * 0;
-select canonicalRand(Null) * 0;
+select randCanonical(Null) * 0;
 select randConstant(Null) * 0;
diff --git a/tests/queries/0_stateless/01085_max_distributed_connections.sh b/tests/queries/0_stateless/01085_max_distributed_connections.sh
index 34862289d1e..c5570fea9eb 100755
--- a/tests/queries/0_stateless/01085_max_distributed_connections.sh
+++ b/tests/queries/0_stateless/01085_max_distributed_connections.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: distributed
+# Tags: distributed, no-random-settings
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -12,7 +12,7 @@ while [[ $i -lt $retries ]]; do
     opts=(
         --max_distributed_connections 20
         --max_threads 1
-        --query "SELECT sleepEachRow(1) FROM remote('127.{2..21}', system.one)"
+        --query "SELECT sum(sleepEachRow(1)) FROM remote('127.{2..21}', system.one)"
         --format Null
     )
     # 10 less then 20 seconds (20 streams), but long enough to cover possible load peaks
diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
index a95029de257..983cb515d8e 100755
--- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
+++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
@@ -16,7 +16,7 @@ function create_db()
         # So CREATE TABLE queries will fail on all replicas except one. But it's still makes sense for a stress test.
         $CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 --query \
         "create database if not exists ${CLICKHOUSE_DATABASE}_repl_$SUFFIX engine=Replicated('/test/01111/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '$SHARD', '$REPLICA')" \
-         2>&1| grep -Fa "Exception: " | grep -Fv "REPLICA_IS_ALREADY_EXIST" | grep -Fiv "Will not try to start it up" | \
+         2>&1| grep -Fa "Exception: " | grep -Fv "REPLICA_ALREADY_EXISTS" | grep -Fiv "Will not try to start it up" | \
          grep -Fv "Coordination::Exception" | grep -Fv "already contains some data and it does not look like Replicated database path"
         sleep 0.$RANDOM
     done
diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference b/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference
index b9daa88b4ca..44cbbed3f57 100644
--- a/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference
+++ b/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference
@@ -2,35 +2,59 @@
 1	3	bar
 2	4	aa
 2	5	bb
-3	6	qq
-3	7	ww
-==================
+2	6	cc
+3	7	qq
+3	8	ww
+3	9	ee
+3	10	rr
+1_1_1_0	InMemory	2
+2_2_2_0	InMemory	3
+3_3_3_0	InMemory	4
+^ init ==================
 2	4	aa
 2	5	bb
-3	6	qq
-3	7	ww
-==================
-3	6	qq
-3	7	ww
-==================
+2	6	cc
+3	7	qq
+3	8	ww
+3	9	ee
+3	10	rr
+2_2_2_0	InMemory	3
+3_3_3_0	InMemory	4
+^ drop 1 ==================
+3	7	qq
+3	8	ww
+3	9	ee
+3	10	rr
+3_3_3_0	InMemory	4
+^ detach 2 ==================
 2	4	aa
 2	5	bb
-3	6	qq
-3	7	ww
-2_4_4_0	Compact
-3_3_3_0	InMemory
-==================
+2	6	cc
+3	7	qq
+3	8	ww
+3	9	ee
+3	10	rr
+2_4_4_0	Compact	3
+3_3_3_0	InMemory	4
+^ attach 2 =================
 2	4	aa
 2	5	bb
-3	6	qq
-3	7	ww
-==================
+2	6	cc
+3	7	qq
+3	8	ww
+3	9	ee
+3	10	rr
+2_4_4_0	Compact	3
+3_3_3_0	InMemory	4
+^ detach attach ==================
 2	4	aa
 2	5	bb
-3	6	cc
-3	7	dd
-t2	2_4_4_0	Compact
-t2	3_6_6_0	Compact
-t3	3_1_1_0	InMemory
-==================
-3_1_1_0	InMemory	1
+2	6	cc
+3	11	tt
+3	12	yy
+t2	2_4_4_0	Compact	3
+t2	3_6_6_0	Compact	2
+t3	3_1_1_0	InMemory	2
+^ replace ==================
+3_1_1_0	InMemory	1	2
+^ freeze ==================
diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql b/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql
index aa6f281e0eb..b1ba8bc5560 100644
--- a/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql
+++ b/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql
@@ -9,30 +9,34 @@ CREATE TABLE t2(id UInt32, a UInt64, s String)
 SYSTEM STOP MERGES t2;
 
 INSERT INTO t2 VALUES (1, 2, 'foo'), (1, 3, 'bar');
-INSERT INTO t2 VALUES (2, 4, 'aa'), (2, 5, 'bb');
-INSERT INTO t2 VALUES (3, 6, 'qq'), (3, 7, 'ww');
+INSERT INTO t2 VALUES (2, 4, 'aa'), (2, 5, 'bb'), (2, 6, 'cc');
+INSERT INTO t2 VALUES (3, 7, 'qq'), (3, 8, 'ww'), (3, 9, 'ee'), (3, 10, 'rr');
 
 SELECT * FROM t2 ORDER BY a;
-SELECT '==================';
+SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
+SELECT '^ init ==================';
 
 ALTER TABLE t2 DROP PARTITION 1;
 SELECT * FROM t2 ORDER BY a;
-SELECT '==================';
+SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
+SELECT '^ drop 1 ==================';
 
 ALTER TABLE t2 DETACH PARTITION 2;
 SELECT * FROM t2 ORDER BY a;
-SELECT '==================';
+SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
+SELECT '^ detach 2 ==================';
 
 ALTER TABLE t2 ATTACH PARTITION 2;
 SELECT * FROM t2 ORDER BY a;
-SELECT name, part_type FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
-SELECT '==================';
+SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
+SELECT '^ attach 2 =================';
 
 DETACH TABLE t2;
 ATTACH TABLE t2;
 
 SELECT * FROM t2 ORDER BY a;
-SELECT '==================';
+SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
+SELECT '^ detach attach ==================';
 
 DROP TABLE IF EXISTS t3;
 
@@ -40,15 +44,16 @@ CREATE TABLE t3(id UInt32, a UInt64, s String)
     ENGINE = MergeTree ORDER BY a PARTITION BY id
     SETTINGS min_rows_for_compact_part = 1000, min_rows_for_wide_part = 2000;
 
-INSERT INTO t3 VALUES (3, 6, 'cc'), (3, 7, 'dd');
+INSERT INTO t3 VALUES (3, 11, 'tt'), (3, 12, 'yy');
 ALTER TABLE t2 REPLACE PARTITION 3 FROM t3;
 SELECT * FROM t2 ORDER BY a;
-SELECT table, name, part_type FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
-SELECT table, name, part_type FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name;
-SELECT '==================';
+SELECT table, name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name;
+SELECT table, name, part_type, rows FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name;
+SELECT '^ replace ==================';
 
 ALTER TABLE t3 FREEZE PARTITION 3;
-SELECT name, part_type, is_frozen FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name;
+SELECT name, part_type, is_frozen, rows FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name;
+SELECT '^ freeze ==================';
 
 DROP TABLE t2;
 DROP TABLE t3;
diff --git a/tests/queries/0_stateless/01167_isolation_hermitage.sh b/tests/queries/0_stateless/01167_isolation_hermitage.sh
index 3f2c8308216..1d1e8006d1d 100755
--- a/tests/queries/0_stateless/01167_isolation_hermitage.sh
+++ b/tests/queries/0_stateless/01167_isolation_hermitage.sh
@@ -8,24 +8,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 # shellcheck source=./transactions.lib
 . "$CURDIR"/transactions.lib
+# shellcheck source=./parts.lib
+. "$CURDIR"/parts.lib
 set -e
 
 # https://github.com/ept/hermitage
 
-$CLICKHOUSE_CLIENT -q "drop table if exists test"
-$CLICKHOUSE_CLIENT -q "create table test (id int, value int) engine=MergeTree order by id"
+function hard_reset_table()
+{
+    # Merges aren;t blocked, when they runs they left parts which are removed after old_parts_lifetime
+    # Test have to set old_parts_lifetime in low value in order to be able to wait deleting empty parts
+    $CLICKHOUSE_CLIENT -q "drop table if exists test"
+    $CLICKHOUSE_CLIENT -q "create table test (id int, value int) engine=MergeTree order by id SETTINGS old_parts_lifetime = 5"
+    $CLICKHOUSE_CLIENT -q "insert into test (id, value) values (1, 10);"
+    $CLICKHOUSE_CLIENT -q "insert into test (id, value) values (2, 20);"
+}
 
 function reset_table()
 {
     $CLICKHOUSE_CLIENT -q "truncate table test;"
     $CLICKHOUSE_CLIENT -q "insert into test (id, value) values (1, 10);"
     $CLICKHOUSE_CLIENT -q "insert into test (id, value) values (2, 20);"
+
+    # The is a chance that old parts are held by the oldest snapshot existed on a node
+    # In order not to wait too long (>60s) there is used a fallback to table recreation
+    wait_for_delete_empty_parts "test" $CLICKHOUSE_DATABASE 1>/dev/null 2>&1 || hard_reset_table
 }
 
 # TODO update test after implementing Read Committed
 
 # G0
-reset_table
+hard_reset_table
 tx 1 "begin transaction"
 tx 2                                            "begin transaction"
 tx 1 "alter table test update value=11 where id=1"
@@ -109,6 +122,7 @@ tx_wait 12
 tx_wait 13
 $CLICKHOUSE_CLIENT -q "select 16, * from test order by id"
 
+
 # PMP write
 reset_table
 tx 14 "begin transaction"
diff --git a/tests/queries/0_stateless/01168_mutations_isolation.reference b/tests/queries/0_stateless/01168_mutations_isolation.reference
index 1b3e3f145b1..f9ebd1c5f83 100644
--- a/tests/queries/0_stateless/01168_mutations_isolation.reference
+++ b/tests/queries/0_stateless/01168_mutations_isolation.reference
@@ -21,18 +21,18 @@ tx7	7	20	all_1_1_0_13
 tx7	7	40	all_14_14_0
 tx7	7	60	all_7_7_0_13
 tx7	7	80	all_12_12_0_13
-tx7	8	20	all_1_14_1_13
-tx7	8	40	all_1_14_1_13
-tx7	8	60	all_1_14_1_13
-tx7	8	80	all_1_14_1_13
+tx7	8	20	all_1_14_2_13
+tx7	8	40	all_1_14_2_13
+tx7	8	60	all_1_14_2_13
+tx7	8	80	all_1_14_2_13
 Serialization error
 INVALID_TRANSACTION
-tx11	9	21	all_1_14_1_17
-tx11	9	41	all_1_14_1_17
-tx11	9	61	all_1_14_1_17
-tx11	9	81	all_1_14_1_17
+tx11	9	21	all_1_14_2_17
+tx11	9	41	all_1_14_2_17
+tx11	9	61	all_1_14_2_17
+tx11	9	81	all_1_14_2_17
 1	1	RUNNING
-tx14	10	22	all_1_14_1_18
-tx14	10	42	all_1_14_1_18
-tx14	10	62	all_1_14_1_18
-tx14	10	82	all_1_14_1_18
+tx14	10	22	all_1_14_2_18
+tx14	10	42	all_1_14_2_18
+tx14	10	62	all_1_14_2_18
+tx14	10	82	all_1_14_2_18
diff --git a/tests/queries/0_stateless/01168_mutations_isolation.sh b/tests/queries/0_stateless/01168_mutations_isolation.sh
index ebfdffdaeee..5d014e030f1 100755
--- a/tests/queries/0_stateless/01168_mutations_isolation.sh
+++ b/tests/queries/0_stateless/01168_mutations_isolation.sh
@@ -53,6 +53,9 @@ tx 6                                            "alter table mt update n=n*10 wh
 tx 6                                            "insert into mt values (40)"
 tx 6                                            "commit"
 
+function accept_both_parts() {
+  sed 's/all_1_14_1_1/all_1_14_2_1/g'
+}
 
 tx 7 "begin transaction"
 tx 7 "select 7, n, _part from mt order by n"
@@ -61,7 +64,7 @@ tx_async 8                                      "alter table mt update n = 0 whe
 $CLICKHOUSE_CLIENT -q "kill mutation where database=currentDatabase() and mutation_id='mutation_15.txt' format Null" 2>&1| grep -Fv "probably it finished"
 tx_sync 8                                            "rollback"
 tx 7 "optimize table mt final"
-tx 7 "select 8, n, _part from mt order by n"
+tx 7 "select 8, n, _part from mt order by n" | accept_both_parts
 tx 10                                           "begin transaction"
 tx 10                                           "alter table mt update n = 0 where 1" | grep -Eo "Serialization error" | uniq
 tx 7 "alter table mt update n=n+1 where 1"
@@ -71,7 +74,7 @@ tx 7 "commit"
 
 
 tx_async 11 "begin transaction"
-tx_async 11 "select 9, n, _part from mt order by n"
+tx_async 11 "select 9, n, _part from mt order by n" | accept_both_parts
 tx_async 12                                           "begin transaction"
 tx_async 11 "alter table mt update n=n+1 where 1" >/dev/null
 tx_async 12                                           "alter table mt update n=n+1 where 1" >/dev/null
@@ -88,6 +91,6 @@ $CLICKHOUSE_CLIENT -q "kill transaction where tid=$tid_to_kill format Null"
 tx_sync 13                                            "rollback"
 
 tx 14 "begin transaction"
-tx 14 "select 10, n, _part from mt order by n"
+tx 14 "select 10, n, _part from mt order by n" | accept_both_parts
 
 $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table mt"
diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh
index 32ad78dead6..3fb3730f758 100755
--- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh
+++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh
@@ -6,8 +6,10 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
+# shellcheck source=./transactions.lib
+. "$CURDIR"/transactions.lib
 
-set -e
+set -eu
 
 $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src";
 $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst";
@@ -16,7 +18,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE dst (n UInt64, type UInt8) ENGINE=Merge
 
 function thread_insert()
 {
-    set -e
+    set -eu
     val=1
     while true; do
         $CLICKHOUSE_CLIENT --multiquery --query "
@@ -29,64 +31,184 @@ function thread_insert()
     done
 }
 
+function is_tx_aborted_with()
+{
+    grep_args=""
+    for pattern in "${@}"; do
+      grep_args="$grep_args -Fe $pattern"
+    done
+
+    grep $grep_args >/dev/null
+}
+
+function is_tx_failed()
+{
+    grep -Fe 'DB::Exception:' > /dev/null
+}
+
+function is_tx_ok()
+{
+    is_tx_failed && return 1
+}
 
 # NOTE
 # ALTER PARTITION query stops merges,
-# but serialization error is still possible if some merge was assigned (and committed) between BEGIN and ALTER.
+# but parts could be deleted (SERIALIZATION_ERROR) if some merge was assigned (and committed) between BEGIN and ALTER.
 function thread_partition_src_to_dst()
 {
-    set -e
+    set -eu
     count=0
     sum=0
     for i in {1..20}; do
-        out=$(
-        $CLICKHOUSE_CLIENT --multiquery --query "
-        BEGIN TRANSACTION;
-        INSERT INTO src VALUES /* ($i, 3) */ ($i, 3);
-        INSERT INTO dst SELECT * FROM src;
-        ALTER TABLE src DROP PARTITION ID 'all';
-        SET throw_on_unsupported_query_inside_transaction=0;
-        SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=3) != ($count + 1, $sum + $i)) FORMAT Null;
-        COMMIT;" 2>&1) ||:
+        session_id="_src_to_dst_$i"
+        session_id_debug="_src_to_dst_debug_$i"
+
+        tx $session_id "BEGIN TRANSACTION"
+        tx_id=$(tx $session_id "select transactionID().1" | awk '{print $2}')
+
+        tx $session_id "INSERT INTO src VALUES /* ($i, 3) */ ($i, 3)"
+        tx $session_id "INSERT INTO dst SELECT * FROM src"
+
+        output=$(tx $session_id "ALTER TABLE src DROP PARTITION ID 'all'" ||:)
+        if echo "$output" | is_tx_aborted_with "SERIALIZATION_ERROR" "PART_IS_TEMPORARILY_LOCKED" "PART_IS_TEMPORARILY_LOCKED"
+        then
+            tx $session_id "ROLLBACK"
+            continue
+        fi
+
+        if echo "$output" | is_tx_failed
+        then
+            echo "thread_partition_src_to_dst tx_id: $tx_id session_id: $session_id" >&2
+            echo "drop part has failed with unexpected status" >&2
+            echo -e "output:\n $output" >&2
+            return 1
+        fi
+
+        tx $session_id "SET throw_on_unsupported_query_inside_transaction=0"
+
+        trace_output=""
+        output=$(tx $session_id "select transactionID()")
+        trace_output="$trace_output $output\n"
+
+        tx $session_id_debug "begin transaction"
+        tx $session_id_debug "set transaction snapshot 3"
+        output=$(tx $session_id_debug "select 'src_to_dst', $i, 'src', type, n, _part from src order by type, n")
+        trace_output="$trace_output $output\n"
+        output=$(tx $session_id_debug "select 'src_to_dst', $i, 'dst', type, n, _part from dst order by type, n")
+        trace_output="$trace_output $output\n"
+        tx $session_id_debug "commit"
+
+        output=$(tx $session_id "SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=3) != ($count + 1, $sum + $i)) FORMAT Null" ||:)
+        if echo "$output" | is_tx_aborted_with "FUNCTION_THROW_IF_VALUE_IS_NON_ZERO"
+        then
+            echo "thread_partition_src_to_dst tx_id: $tx_id session_id: $session_id" >&2
+            echо "select throwIf has failed with FUNCTION_THROW_IF_VALUE_IS_NON_ZERO" >&2
+            echo -e "trace_output:\n $trace_output" >&2
+            echo -e "output:\n $output" >&2
+            return 1
+        fi
+
+        if echo "$output" | is_tx_failed
+        then
+            echo "thread_partition_src_to_dst tx_id: $tx_id session_id: $session_id" >&2
+            echo "select throwIf has failed with unexpected status" >&2
+            echo -e "trace_output:\n $trace_output" >&2
+            echo -e "output:\n $output" >&2
+            return 1
+        fi
+
+        tx $session_id "COMMIT"
+
+        count=$((count + 1))
+        sum=$((sum + i))
 
-        echo "$out" | grep -Fv "SERIALIZATION_ERROR" | grep -F "Received from " && $CLICKHOUSE_CLIENT --multiquery --query "
-                                                                                   begin transaction;
-                                                                                   set transaction snapshot 3;
-                                                                                   select $i, 'src', type, n, _part from src order by type, n;
-                                                                                   select $i, 'dst', type, n, _part from dst order by type, n;
-                                                                                   rollback" ||:
-        echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || count=$((count+1))
-        echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || sum=$((sum+i))
     done
 }
 
 function thread_partition_dst_to_src()
 {
-    set -e
-    for i in {1..20}; do
+    set -eu
+    i=0
+    while (( i <= 20 )); do
+        session_id="_dst_to_src_$i"
+        session_id_debug="_dst_to_src_debug_$i"
+
+        tx $session_id "SYSTEM STOP MERGES dst"
+        tx $session_id "ALTER TABLE dst DROP PARTITION ID 'nonexistent';"
+        tx $session_id "SYSTEM SYNC TRANSACTION LOG"
+
+        tx $session_id "BEGIN TRANSACTION"
+        tx_id=$(tx $session_id "select transactionID().1" | awk '{print $2}')
+
+        tx $session_id "INSERT INTO dst VALUES /* ($i, 4) */ ($i, 4)"
+        tx $session_id "INSERT INTO src SELECT * FROM dst"
+
+        output=$(tx $session_id "ALTER TABLE dst DROP PARTITION ID 'all'" ||:)
+        if echo "$output" | is_tx_aborted_with "PART_IS_TEMPORARILY_LOCKED"
+        then
+            # this is legit case, just retry
+            tx $session_id "ROLLBACK"
+            continue
+        fi
+
+        if echo "$output" | is_tx_failed
+        then
+            echo "thread_partition_dst_to_src tx_id: $tx_id session_id: $session_id" >&2
+            echo "drop part has failed with unexpected status" >&2
+            echo "output $output" >&2
+            return 1
+        fi
+
+        tx $session_id "SET throw_on_unsupported_query_inside_transaction=0"
+        tx $session_id "SYSTEM START MERGES dst"
+
+        trace_output=""
+        output=$(tx $session_id "select transactionID()")
+        trace_output="$trace_output $output"
+
+        tx $session_id_debug "begin transaction"
+        tx $session_id_debug "set transaction snapshot 3"
+        output=$(tx $session_id_debug "select 'dst_to_src', $i, 'src', type, n, _part from src order by type, n")
+        trace_output="$trace_output $output"
+        output=$(tx $session_id_debug "select 'dst_to_src', $i, 'dst', type, n, _part from dst order by type, n")
+        trace_output="$trace_output $output"
+        tx $session_id_debug "commit"
+
+        output=$(tx $session_id "SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=4) != (toUInt8($i/2 + 1), (select sum(number) from numbers(1, $i) where number % 2 or number=$i))) FORMAT Null" ||:)
+        if echo "$output" | is_tx_aborted_with "FUNCTION_THROW_IF_VALUE_IS_NON_ZERO"
+        then
+            echo "thread_partition_dst_to_src tx_id: $tx_id session_id: $session_id" >&2
+            echo "select throwIf has failed with FUNCTION_THROW_IF_VALUE_IS_NON_ZERO" >&2
+            echo -e "trace_output:\n $trace_output" >&2
+            echo -e "output:\n $output" >&2
+            return 1
+        fi
+
+        if echo "$output" | is_tx_failed
+        then
+            echo "thread_partition_dst_to_src tx_id: $tx_id session_id: $session_id" >&2
+            echo "SELECT throwIf has failed with unexpected status" >&2
+            echo -e "trace_output:\n $trace_output" >&2
+            echo -e "output:\n $output" >&2
+            return 1
+        fi
+
         action="ROLLBACK"
         if (( i % 2 )); then
             action="COMMIT"
         fi
-        $CLICKHOUSE_CLIENT --multiquery --query "
-        SYSTEM STOP MERGES dst;
-        ALTER TABLE dst DROP PARTITION ID 'nonexistent';  -- STOP MERGES doesn't wait for started merges to finish, so we use this trick
-        SYSTEM SYNC TRANSACTION LOG;
-        BEGIN TRANSACTION;
-        INSERT INTO dst VALUES /* ($i, 4) */ ($i, 4);
-        INSERT INTO src SELECT * FROM dst;
-        ALTER TABLE dst DROP PARTITION ID 'all';
-        SET throw_on_unsupported_query_inside_transaction=0;
-        SYSTEM START MERGES dst;
-        SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=4) != (toUInt8($i/2 + 1), (select sum(number) from numbers(1, $i) where number % 2 or number=$i))) FORMAT Null;
-        $action;"
+
+        tx $session_id "$action"
+
+        i=$((i + 1))
     done
 }
 
 function thread_select()
 {
-    set -e
+    set -eu
     while true; do
+        output=$(
         $CLICKHOUSE_CLIENT --multiquery --query "
         BEGIN TRANSACTION;
         -- no duplicates
@@ -94,10 +216,14 @@ function thread_select()
         SELECT type, throwIf(count(n) != countDistinct(n)) FROM dst GROUP BY type FORMAT Null;
         -- rows inserted by thread_insert moved together
         SET throw_on_unsupported_query_inside_transaction=0;
+
         SELECT _table, throwIf(arraySort(groupArrayIf(n, type=1)) != arraySort(groupArrayIf(n, type=2))) FROM merge(currentDatabase(), '') GROUP BY _table FORMAT Null;
+
         -- all rows are inserted in insert_thread
         SELECT type, throwIf(count(n) != max(n)), throwIf(sum(n) != max(n)*(max(n)+1)/2) FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type FORMAT Null;
-        COMMIT;"
+        COMMIT;" 2>&1 ||:)
+
+        echo "$output" | grep -F "Received from " > /dev/null && echo "$output">&2 && return 1
     done
 }
 
@@ -106,11 +232,13 @@ thread_select & PID_2=$!
 
 thread_partition_src_to_dst & PID_3=$!
 thread_partition_dst_to_src & PID_4=$!
-wait $PID_3 && wait $PID_4
+wait $PID_3
+wait $PID_4
 
 kill -TERM $PID_1
 kill -TERM $PID_2
-wait
+wait ||:
+
 wait_for_queries_to_finish
 
 $CLICKHOUSE_CLIENT -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type"
@@ -118,6 +246,5 @@ $CLICKHOUSE_CLIENT -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arra
 $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4"
 $CLICKHOUSE_CLIENT -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type"
 
-
 $CLICKHOUSE_CLIENT --query "DROP TABLE src";
 $CLICKHOUSE_CLIENT --query "DROP TABLE dst";
diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference
index 3a167e76817..3099fae4a42 100644
--- a/tests/queries/0_stateless/01172_transaction_counters.reference
+++ b/tests/queries/0_stateless/01172_transaction_counters.reference
@@ -28,9 +28,13 @@
 4	1	Commit	1	1	1	0	
 5	1	Begin	1	1	1	1	
 5	1	AddPart	1	1	1	1	all_5_5_0
+5	1	AddPart	1	1	1	1	all_1_1_1
 5	1	LockPart	1	1	1	1	all_1_1_0
+5	1	AddPart	1	1	1	1	all_3_3_1
 5	1	LockPart	1	1	1	1	all_3_3_0
+5	1	AddPart	1	1	1	1	all_4_4_1
 5	1	LockPart	1	1	1	1	all_4_4_0
+5	1	AddPart	1	1	1	1	all_5_5_1
 5	1	LockPart	1	1	1	1	all_5_5_0
 5	1	UnlockPart	1	1	1	1	all_1_1_0
 5	1	UnlockPart	1	1	1	1	all_3_3_0
diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
index c18514d0ecc..d2695e602c5 100755
--- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
+++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
@@ -22,7 +22,7 @@ function run_until_out_contains()
     PATTERN=$1
     shift
 
-    for ((i=MIN_TIMEOUT; i<10; i++))
+    for ((i=MIN_TIMEOUT; i<33; i=i*2))
     do
         "$@" --distributed_ddl_task_timeout="$i" > "$TMP_OUT" 2>&1
         if grep -q "$PATTERN" "$TMP_OUT"
@@ -37,7 +37,7 @@ RAND_COMMENT="01175_DDL_$RANDOM"
 LOG_COMMENT="${CLICKHOUSE_LOG_COMMENT}_$RAND_COMMENT"
 
 CLICKHOUSE_CLIENT_WITH_SETTINGS=${CLICKHOUSE_CLIENT/--log_comment ${CLICKHOUSE_LOG_COMMENT}/--log_comment ${LOG_COMMENT}}
-CLICKHOUSE_CLIENT_WITH_SETTINGS+=" --output_format_parallel_formatting=0 "
+CLICKHOUSE_CLIENT_WITH_SETTINGS+=" --output_format_parallel_formatting=0 --database_atomic_wait_for_drop_and_detach_synchronously=0 "
 
 CLIENT=${CLICKHOUSE_CLIENT_WITH_SETTINGS}
 CLIENT+=" --distributed_ddl_task_timeout=$TIMEOUT "
diff --git a/tests/queries/0_stateless/01190_full_attach_syntax.reference b/tests/queries/0_stateless/01190_full_attach_syntax.reference
index f924c2ec780..61a0603975a 100644
--- a/tests/queries/0_stateless/01190_full_attach_syntax.reference
+++ b/tests/queries/0_stateless/01190_full_attach_syntax.reference
@@ -1,5 +1,5 @@
-CREATE DICTIONARY test_01190.dict\n(\n    `key` UInt64 DEFAULT 0,\n    `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
-CREATE DICTIONARY test_01190.dict\n(\n    `key` UInt64 DEFAULT 0,\n    `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
+CREATE DICTIONARY test_01190.dict\n(\n    `key` UInt64 DEFAULT 0,\n    `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'[HIDDEN]\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
+CREATE DICTIONARY test_01190.dict\n(\n    `key` UInt64 DEFAULT 0,\n    `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'[HIDDEN]\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
 CREATE TABLE test_01190.log\n(\n    `s` String\n)\nENGINE = Log
 CREATE TABLE test_01190.log\n(\n    `s` String\n)\nENGINE = Log
 test
diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference
index d80501b3f4d..e5b8ffd7732 100644
--- a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference
+++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference
@@ -6,7 +6,7 @@ CREATE DICTIONARY dict_db_01224.dict
     `val` UInt64 DEFAULT 10
 )
 PRIMARY KEY key
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224'))
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '[HIDDEN]' DB 'dict_db_01224'))
 LIFETIME(MIN 0 MAX 0)
 LAYOUT(FLAT())
 NOT_LOADED
@@ -17,7 +17,7 @@ CREATE TABLE dict_db_01224_dictionary.`dict_db_01224.dict`
 )
 ENGINE = Dictionary(`dict_db_01224.dict`)
 NOT_LOADED
-Dictionary	1	CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'dict_data\' PASSWORD \'\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT())
+Dictionary	1	CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'dict_data\' PASSWORD \'[HIDDEN]\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT())
 NOT_LOADED
 key	UInt64
 val	UInt64
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index 59c7d978493..dd9fa7abc1b 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -85,8 +85,8 @@ SHOW ROLES	['SHOW CREATE ROLE']	GLOBAL	SHOW ACCESS
 SHOW ROW POLICIES	['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY']	TABLE	SHOW ACCESS
 SHOW QUOTAS	['SHOW CREATE QUOTA']	GLOBAL	SHOW ACCESS
 SHOW SETTINGS PROFILES	['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE']	GLOBAL	SHOW ACCESS
-SHOW NAMED COLLECTIONS	['SHOW NAMED COLLECTIONS']	GLOBAL	SHOW ACCESS
 SHOW ACCESS	[]	\N	ACCESS MANAGEMENT
+SHOW NAMED COLLECTIONS	['SHOW NAMED COLLECTIONS']	\N	ACCESS MANAGEMENT
 ACCESS MANAGEMENT	[]	\N	ALL
 SYSTEM SHUTDOWN	['SYSTEM KILL','SHUTDOWN']	GLOBAL	SYSTEM
 SYSTEM DROP DNS CACHE	['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS']	GLOBAL	SYSTEM DROP CACHE
diff --git a/tests/queries/0_stateless/01451_detach_drop_part.reference b/tests/queries/0_stateless/01451_detach_drop_part.reference
index bc4f1b6be80..a34c308cb72 100644
--- a/tests/queries/0_stateless/01451_detach_drop_part.reference
+++ b/tests/queries/0_stateless/01451_detach_drop_part.reference
@@ -10,6 +10,8 @@ all_2_2_0
 -- drop part --
 0
 2
+all_1_1_0
+all_3_3_0
 -- resume merges --
 0
 2
diff --git a/tests/queries/0_stateless/01451_detach_drop_part.sql b/tests/queries/0_stateless/01451_detach_drop_part.sql
index a285730e45f..4c6cf54a6d9 100644
--- a/tests/queries/0_stateless/01451_detach_drop_part.sql
+++ b/tests/queries/0_stateless/01451_detach_drop_part.sql
@@ -31,6 +31,8 @@ ALTER TABLE mt_01451 ATTACH PART 'all_4_4_0'; -- { serverError 233 }
 
 SELECT v FROM mt_01451 ORDER BY v;
 
+SELECT name FROM system.parts WHERE table = 'mt_01451' AND active AND database = currentDatabase();
+
 SELECT '-- resume merges --';
 SYSTEM START MERGES mt_01451;
 OPTIMIZE TABLE mt_01451 FINAL;
diff --git a/tests/queries/0_stateless/01516_drop_table_stress_long.sh b/tests/queries/0_stateless/01516_drop_table_stress_long.sh
index d2149d7122c..3932d0ebbc9 100755
--- a/tests/queries/0_stateless/01516_drop_table_stress_long.sh
+++ b/tests/queries/0_stateless/01516_drop_table_stress_long.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-debug
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -11,26 +11,26 @@ function drop_database()
 {
     # redirect stderr since it is racy with DROP TABLE
     # and tries to remove ${CURR_DATABASE}.data too.
-    ${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS ${CURR_DATABASE}" 2>/dev/null
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP DATABASE IF EXISTS ${CURR_DATABASE}" 2>/dev/null
 }
 trap drop_database EXIT
 
 function drop_table()
 {
-    ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data3;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
-    ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data1;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
-    ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data2;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data3;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data1;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data2;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
 }
 
 function create()
 {
-    ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE IF NOT EXISTS ${CURR_DATABASE};"
-    ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data1 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
-    ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data2 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
-    ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data3 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE DATABASE IF NOT EXISTS ${CURR_DATABASE};"
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data1 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data2 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
+    ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data3 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed"
 }
 
-for _ in {1..50}; do
+for _ in {1..25}; do
     create
     drop_table &
     drop_database &
diff --git a/tests/queries/0_stateless/01660_system_parts_smoke.reference b/tests/queries/0_stateless/01660_system_parts_smoke.reference
index 36550f31bd0..b38d699c2b9 100644
--- a/tests/queries/0_stateless/01660_system_parts_smoke.reference
+++ b/tests/queries/0_stateless/01660_system_parts_smoke.reference
@@ -9,6 +9,6 @@ all_2_2_0	1
 1	Active
 2	Outdated
 # truncate
-Outdated
-Outdated
+HAVE PARTS	Active
+HAVE PARTS	Outdated
 # drop
diff --git a/tests/queries/0_stateless/01660_system_parts_smoke.sql b/tests/queries/0_stateless/01660_system_parts_smoke.sql
index cc925680425..64cba86b8f6 100644
--- a/tests/queries/0_stateless/01660_system_parts_smoke.sql
+++ b/tests/queries/0_stateless/01660_system_parts_smoke.sql
@@ -31,9 +31,11 @@ OPTIMIZE TABLE data_01660 FINAL;
 SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state ORDER BY _state;
 
 -- TRUNCATE does not remove parts instantly
+-- Empty active parts are clearing by async process
+-- Inactive parts are clearing by async process also
 SELECT '# truncate';
 TRUNCATE data_01660;
-SELECT _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660';
+SELECT if (count() > 0, 'HAVE PARTS', 'NO PARTS'), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state;
 
 -- But DROP does
 SELECT '# drop';
diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sh b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sh
new file mode 100755
index 00000000000..db53dbbce85
--- /dev/null
+++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} -q 'DROP TABLE IF EXISTS table_with_single_pk'
+
+${CLICKHOUSE_CLIENT} -q '
+    CREATE TABLE table_with_single_pk
+    (
+      key UInt8,
+      value String
+    )
+    ENGINE = MergeTree
+    ORDER BY key
+'
+
+${CLICKHOUSE_CLIENT} -q 'INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(1000000)'
+
+# Check NewPart
+${CLICKHOUSE_CLIENT} -q 'SYSTEM FLUSH LOGS'
+${CLICKHOUSE_CLIENT} -q "
+    WITH (
+         SELECT (event_time, event_time_microseconds)
+         FROM system.part_log
+         WHERE table = 'table_with_single_pk' AND database = currentDatabase() AND event_type = 'NewPart'
+         ORDER BY event_time DESC
+         LIMIT 1
+    ) AS time
+  SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail')"
+
+# Now let's check RemovePart
+${CLICKHOUSE_CLIENT} -q 'TRUNCATE TABLE table_with_single_pk'
+
+# Wait until parts are removed
+function get_inactive_parts_count() {
+    table_name=$1
+    ${CLICKHOUSE_CLIENT} -q "
+        SELECT
+            count()
+        FROM
+            system.parts
+        WHERE
+            table = 'table_with_single_pk'
+        AND
+            active = 0
+        AND
+            database = '${CLICKHOUSE_DATABASE}'
+    "
+}
+
+function wait_table_inactive_parts_are_gone() {
+    table_name=$1
+
+    while true
+    do
+        count=$(get_inactive_parts_count $table_name)
+        if [[ count -gt 0 ]]
+        then
+            sleep 1
+        else
+            break
+        fi
+    done
+}
+
+export -f get_inactive_parts_count
+export -f wait_table_inactive_parts_are_gone
+timeout 60 bash -c 'wait_table_inactive_parts_are_gone table_with_single_pk'
+
+${CLICKHOUSE_CLIENT} -q 'SYSTEM FLUSH LOGS;'
+${CLICKHOUSE_CLIENT} -q "
+    WITH (
+         SELECT (event_time, event_time_microseconds)
+         FROM system.part_log
+         WHERE table = 'table_with_single_pk' AND database = currentDatabase() AND event_type = 'RemovePart'
+         ORDER BY event_time DESC
+         LIMIT 1
+    ) AS time
+    SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail')"
+
+${CLICKHOUSE_CLIENT} -q 'DROP TABLE table_with_single_pk'
+
+
diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
deleted file mode 100644
index 6063be4d1da..00000000000
--- a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
+++ /dev/null
@@ -1,36 +0,0 @@
-DROP TABLE IF EXISTS table_with_single_pk;
-
-CREATE TABLE table_with_single_pk
-(
-  key UInt8,
-  value String
-)
-ENGINE = MergeTree
-ORDER BY key;
-
-INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(1000000);
-
--- Check NewPart
-SYSTEM FLUSH LOGS;
-WITH (
-         SELECT (event_time, event_time_microseconds)
-         FROM system.part_log
-         WHERE table = 'table_with_single_pk' AND database = currentDatabase() AND event_type = 'NewPart'
-         ORDER BY event_time DESC
-         LIMIT 1
-    ) AS time
-SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail');
-
--- Now let's check RemovePart
-TRUNCATE TABLE table_with_single_pk;
-SYSTEM FLUSH LOGS;
-WITH (
-         SELECT (event_time, event_time_microseconds)
-         FROM system.part_log
-         WHERE table = 'table_with_single_pk' AND database = currentDatabase() AND event_type = 'RemovePart'
-         ORDER BY event_time DESC
-         LIMIT 1
-    ) AS time
-SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail');
-
-DROP TABLE table_with_single_pk;
diff --git a/tests/queries/0_stateless/01710_projection_detach_part.sql b/tests/queries/0_stateless/01710_projection_detach_part.sql
index e3e6c7ac165..d28c0848d42 100644
--- a/tests/queries/0_stateless/01710_projection_detach_part.sql
+++ b/tests/queries/0_stateless/01710_projection_detach_part.sql
@@ -10,6 +10,6 @@ alter table t detach partition 1;
 
 alter table t attach partition 1;
 
-select count() from system.projection_parts where database = currentDatabase() and table = 't';
+select count() from system.projection_parts where database = currentDatabase() and table = 't' and active;
 
 drop table t;
diff --git a/tests/queries/0_stateless/01825_type_json_1.reference b/tests/queries/0_stateless/01825_type_json_1.reference
index 857c624fb9b..3f0eaf3854a 100644
--- a/tests/queries/0_stateless/01825_type_json_1.reference
+++ b/tests/queries/0_stateless/01825_type_json_1.reference
@@ -6,22 +6,26 @@ all_2_2_0	data	Tuple(k5 String)
 all_1_2_1	data	Tuple(k1 String, k2 Tuple(k3 String, k4 String), k5 String)
 ============
 1	['aaa','ddd']	[['bbb','ccc'],['eee','fff']]
+all_1_2_2	data	Tuple(_dummy UInt8)
 all_3_3_0	data	Tuple(k1 Nested(k2 String, k3 Nested(k4 String)))
 ============
 1	a	42
 2	b	4200
 4242
+all_1_2_3	data	Tuple(_dummy UInt8)
 all_4_4_0	data	Tuple(name String, value Int16)
 1	a	42
 2	b	4200
 3	a	42.123
+all_1_2_3	data	Tuple(_dummy UInt8)
 all_4_4_0	data	Tuple(name String, value Int16)
 all_5_5_0	data	Tuple(name String, value Float64)
 1	a	42
 2	b	4200
 3	a	42.123
 4	a	some
+all_1_2_3	data	Tuple(_dummy UInt8)
 all_4_4_0	data	Tuple(name String, value Int16)
 all_5_5_0	data	Tuple(name String, value Float64)
 all_6_6_0	data	Tuple(name String, value String)
-all_4_6_1	data	Tuple(name String, value String)
+all_1_6_4	data	Tuple(name String, value String)
diff --git a/tests/queries/0_stateless/01825_type_json_17.sql b/tests/queries/0_stateless/01825_type_json_17.sql
index e3c0c83322b..ee5cf590407 100644
--- a/tests/queries/0_stateless/01825_type_json_17.sql
+++ b/tests/queries/0_stateless/01825_type_json_17.sql
@@ -1,4 +1,4 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, no-parallel
 
 DROP TABLE IF EXISTS t_json_17;
 SET allow_experimental_object_type = 1;
diff --git a/tests/queries/0_stateless/01825_type_json_18.reference b/tests/queries/0_stateless/01825_type_json_18.reference
new file mode 100644
index 00000000000..d93f9bda63c
--- /dev/null
+++ b/tests/queries/0_stateless/01825_type_json_18.reference
@@ -0,0 +1,2 @@
+1	(1)	Tuple(k1 Int8)
+1	([1,2])	Tuple(k1 Array(Int8))
diff --git a/tests/queries/0_stateless/01825_type_json_18.sql b/tests/queries/0_stateless/01825_type_json_18.sql
new file mode 100644
index 00000000000..b493982a12c
--- /dev/null
+++ b/tests/queries/0_stateless/01825_type_json_18.sql
@@ -0,0 +1,16 @@
+-- Tags: no-fasttest
+
+SET allow_experimental_object_type = 1;
+
+DROP TABLE IF EXISTS t_json_2;
+
+CREATE TABLE t_json_2(id UInt64, data Object('JSON'))
+ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO t_json_2 FORMAT JSONEachRow {"id": 1, "data" : {"k1": 1}};
+SELECT id, data, toTypeName(data) FROM t_json_2 ORDER BY id;
+
+TRUNCATE TABLE t_json_2;
+
+INSERT INTO t_json_2 FORMAT JSONEachRow {"id": 1, "data" : {"k1": [1, 2]}};
+SELECT id, data, toTypeName(data) FROM t_json_2 ORDER BY id;
diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql
index 86468b4fcd6..92c7a27bc9a 100644
--- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql
+++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql
@@ -21,5 +21,5 @@ SYSTEM FLUSH LOGS;
 
 SELECT read_rows < 110000 FROM system.query_log
 WHERE type = 'QueryFinish' AND current_database = currentDatabase()
-AND event_time > now() - INTERVAL 10 SECOND
+AND event_date >= yesterday()
 AND lower(query) LIKE lower('SELECT s FROM order_by_desc ORDER BY u%');
diff --git a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql
index e77f88aa36f..1efb9cff6a4 100644
--- a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql
+++ b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql
@@ -7,7 +7,7 @@ CREATE TABLE test_table
     INDEX value_index value TYPE minmax GRANULARITY 1
 )
 Engine=MergeTree()
-ORDER BY key;
+ORDER BY key SETTINGS compress_marks=false;
 
 INSERT INTO test_table VALUES (0, 'Value');
 SELECT * FROM system.data_skipping_indices WHERE database = currentDatabase();
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 7a0c383b3fb..c206a41a03e 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -14,9 +14,7 @@ CREATE TABLE system.asynchronous_inserts
     `first_update` DateTime64(6),
     `total_bytes` UInt64,
     `entries.query_id` Array(String),
-    `entries.bytes` Array(UInt64),
-    `entries.finished` Array(UInt8),
-    `entries.exception` Array(String)
+    `entries.bytes` Array(UInt64)
 )
 ENGINE = SystemAsynchronousInserts
 COMMENT 'SYSTEM TABLE is built on the fly.'
@@ -190,6 +188,10 @@ CREATE TABLE system.disks
     `keep_free_space` UInt64,
     `type` String,
     `is_encrypted` UInt8,
+    `is_read_only` UInt8,
+    `is_write_once` UInt8,
+    `is_remote` UInt8,
+    `is_broken` UInt8,
     `cache_path` String
 )
 ENGINE = SystemDisks
@@ -282,7 +284,7 @@ CREATE TABLE system.grants
 (
     `user_name` Nullable(String),
     `role_name` Nullable(String),
-    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW NAMED COLLECTIONS' = 87, 'SHOW ACCESS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150),
+    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150),
     `database` Nullable(String),
     `table` Nullable(String),
     `column` Nullable(String),
@@ -367,6 +369,19 @@ CREATE TABLE system.metrics
 )
 ENGINE = SystemMetrics
 COMMENT 'SYSTEM TABLE is built on the fly.'
+CREATE TABLE system.moves
+(
+    `database` String,
+    `table` String,
+    `elapsed` Float64,
+    `target_disk_name` String,
+    `target_disk_path` String,
+    `part_name` String,
+    `part_size` UInt64,
+    `thread_id` UInt64
+)
+ENGINE = SystemMoves
+COMMENT 'SYSTEM TABLE is built on the fly.'
 CREATE TABLE system.mutations
 (
     `database` String,
@@ -545,10 +560,10 @@ ENGINE = SystemPartsColumns
 COMMENT 'SYSTEM TABLE is built on the fly.'
 CREATE TABLE system.privileges
 (
-    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW NAMED COLLECTIONS' = 87, 'SHOW ACCESS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150),
+    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150),
     `aliases` Array(String),
     `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)),
-    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW NAMED COLLECTIONS' = 87, 'SHOW ACCESS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150))
+    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150))
 )
 ENGINE = SystemPrivileges
 COMMENT 'SYSTEM TABLE is built on the fly.'
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.sql b/tests/queries/0_stateless/02117_show_create_table_system.sql
index 8b75ed60eec..37bf2667069 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.sql
+++ b/tests/queries/0_stateless/02117_show_create_table_system.sql
@@ -45,6 +45,7 @@ show create table macros format TSVRaw;
 show create table merge_tree_settings format TSVRaw;
 show create table merges format TSVRaw;
 show create table metrics format TSVRaw;
+show create table moves format TSVRaw;
 show create table mutations format TSVRaw;
 show create table numbers format TSVRaw;
 show create table numbers_mt format TSVRaw;
diff --git a/tests/queries/0_stateless/02124_buffer_insert_select_race.reference b/tests/queries/0_stateless/02124_buffer_insert_select_race.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02124_buffer_insert_select_race.sh b/tests/queries/0_stateless/02124_buffer_insert_select_race.sh
new file mode 100755
index 00000000000..22965a274c0
--- /dev/null
+++ b/tests/queries/0_stateless/02124_buffer_insert_select_race.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+# Regression test for 'Logical error: No column to rollback' in case of
+# exception while commiting batch into the Buffer, see [1].
+#
+#   [1]: https://github.com/ClickHouse/ClickHouse/issues/42740
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_string"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_string(key String) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)"
+
+# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors
+# --concurrency -- we need have SELECT and INSERT in parallel to have refcount
+#                  of the column in the Buffer block > 1, that way we will do
+#                  full clone and moving a column may throw.
+#
+# It reproduces the problem 100% with MemoryTrackerFaultInjectorInThread in the appendBlock()
+$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null <<EOL
+INSERT INTO t_buffer_string SELECT number::String from numbers(10000)
+SELECT * FROM t_buffer_string
+EOL
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE t_buffer_string"
diff --git a/tests/queries/0_stateless/02124_buffer_with_type_map_long.sh b/tests/queries/0_stateless/02124_buffer_with_type_map_long.sh
index 8a057038d21..b3b07d5b9a2 100755
--- a/tests/queries/0_stateless/02124_buffer_with_type_map_long.sh
+++ b/tests/queries/0_stateless/02124_buffer_with_type_map_long.sh
@@ -1,6 +1,10 @@
 #!/usr/bin/env bash
 # Tags: no-fasttest
 
+# Regression test for incorrect mutation of Map() column, see [1].
+#
+#   [1]: https://github.com/ClickHouse/ClickHouse/issues/30546
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
@@ -8,29 +12,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_map"
 $CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_map(m1 Map(String, UInt64), m2 Map(String, String)) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)"
 
-function insert1
-{
-    while true; do
-        $CLICKHOUSE_CLIENT -q "INSERT INTO t_buffer_map SELECT (range(10), range(10)), (range(10), range(10)) from numbers(100)"
-    done
-}
-
-function select1
-{
-    while true; do
-        $CLICKHOUSE_CLIENT -q "SELECT * FROM t_buffer_map" 2> /dev/null > /dev/null
-    done
-}
-
-TIMEOUT=10
-
-export -f insert1
-export -f select1
-
-timeout $TIMEOUT bash -c insert1 &
-timeout $TIMEOUT bash -c select1 &
-
-wait
+# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors
+$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null <<EOL
+INSERT INTO t_buffer_map SELECT (range(10), range(10)), (range(10), range(10)) from numbers(100)
+SELECT * FROM t_buffer_map
+EOL
 
 echo "OK"
 $CLICKHOUSE_CLIENT -q "DROP TABLE t_buffer_map"
diff --git a/tests/queries/0_stateless/02156_async_insert_query_log.reference b/tests/queries/0_stateless/02156_async_insert_query_log.reference
index 404dbfe753d..f4fd93b21b4 100644
--- a/tests/queries/0_stateless/02156_async_insert_query_log.reference
+++ b/tests/queries/0_stateless/02156_async_insert_query_log.reference
@@ -1,4 +1,4 @@
 1	a
 2	b
-INSERT INTO async_inserts_2156 VALUES 	1	Insert	1	0
-INSERT INTO async_inserts_2156 VALUES 	1	Insert	1	
+INSERT INTO async_inserts_2156 VALUES 	1	Insert	1
+INSERT INTO async_inserts_2156 VALUES 	1	Insert	1
diff --git a/tests/queries/0_stateless/02156_async_insert_query_log.sh b/tests/queries/0_stateless/02156_async_insert_query_log.sh
index d7177fbe70c..a0a2db312ad 100755
--- a/tests/queries/0_stateless/02156_async_insert_query_log.sh
+++ b/tests/queries/0_stateless/02156_async_insert_query_log.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts_2156"
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts_2156 (id UInt32, s String) ENGINE = Memory"
 
-${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" -d "INSERT INTO async_inserts_2156 VALUES (1, 'a')"
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" -d "INSERT INTO async_inserts_2156 VALUES (1, 'a')"
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" -d "INSERT INTO async_inserts_2156 VALUES (2, 'b')"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts_2156 ORDER BY id"
@@ -15,7 +15,7 @@ ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts_2156 ORDER BY id"
 ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT query, arrayExists(x -> x LIKE '%async_inserts_2156', tables), \
-        query_kind, Settings['async_insert'], Settings['wait_for_async_insert'] FROM system.query_log \
+        query_kind, Settings['async_insert'] FROM system.query_log \
     WHERE event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' \
     AND query ILIKE 'INSERT INTO async_inserts_2156 VALUES%' AND type = 'QueryFinish' \
     ORDER BY query_start_time_microseconds"
diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats.reference b/tests/queries/0_stateless/02187_async_inserts_all_formats.reference
index dd2549d25b0..2de728b4cb4 100644
--- a/tests/queries/0_stateless/02187_async_inserts_all_formats.reference
+++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.reference
@@ -1,6 +1,7 @@
 Arrow
 ArrowStream
 Avro
+BSONEachRow
 CSV
 CSVWithNames
 CSVWithNamesAndTypes
diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference
index de9ac10f641..997105c9da3 100644
--- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference
+++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference
@@ -3,7 +3,7 @@
 SYSTEM DROP FILESYSTEM CACHE;
 SET enable_filesystem_cache_on_write_operations=0;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT  * FROM test FORMAT Null;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;
diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql
index d3b3d3d7f4c..f6671b82291 100644
--- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql
+++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql
@@ -6,7 +6,7 @@ SYSTEM DROP FILESYSTEM CACHE;
 SET enable_filesystem_cache_on_write_operations=0;
 
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 
 SELECT  * FROM test FORMAT Null;
diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference
index 329ca122af1..48d91c6f142 100644
--- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference
+++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference
@@ -5,7 +5,7 @@ SET enable_filesystem_cache_on_write_operations=0;
 SET skip_download_if_exceeds_query_cache=1;
 SET max_query_cache_size=128;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT  * FROM test FORMAT Null;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;
diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql
index 2a4f4ae219c..7dd975b27ee 100644
--- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql
+++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql
@@ -8,7 +8,7 @@ SET skip_download_if_exceeds_query_cache=1;
 SET max_query_cache_size=128;
 
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 
 SELECT  * FROM test FORMAT Null;
diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference
index c67eecf8cf2..6b96da0be59 100644
--- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference
+++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference
@@ -4,7 +4,7 @@ Using storage policy: s3_cache
 SYSTEM DROP FILESYSTEM CACHE;
 SET enable_filesystem_cache_on_write_operations=0;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT  * FROM test FORMAT Null;
@@ -19,7 +19,7 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy
 SYSTEM DROP FILESYSTEM CACHE;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT  * FROM test FORMAT Null;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;
@@ -39,7 +39,7 @@ Using storage policy: local_cache
 SYSTEM DROP FILESYSTEM CACHE;
 SET enable_filesystem_cache_on_write_operations=0;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT  * FROM test FORMAT Null;
@@ -54,7 +54,7 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy
 SYSTEM DROP FILESYSTEM CACHE;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache_3', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT  * FROM test FORMAT Null;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;
diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference
index 5a1295db495..9405b9eb614 100644
--- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference
+++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference
@@ -3,7 +3,7 @@ Using storage policy: s3_cache
 
 SET enable_filesystem_cache_on_write_operations=1;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 SYSTEM DROP FILESYSTEM CACHE;
 SELECT file_segment_range_begin, file_segment_range_end, size, state
@@ -129,7 +129,7 @@ Using storage policy: local_cache
 
 SET enable_filesystem_cache_on_write_operations=1;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 SYSTEM DROP FILESYSTEM CACHE;
 SELECT file_segment_range_begin, file_segment_range_end, size, state
diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference
index 4a10ff02586..91587dc8e79 100644
--- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference
+++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference
@@ -6,7 +6,7 @@ SET enable_filesystem_cache_log=1;
 SET enable_filesystem_cache_on_write_operations=0;
 DROP TABLE IF EXISTS test;
 DROP TABLE IF EXISTS system.filesystem_cache_log;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100000);
 SELECT 2240, 's3_cache', * FROM test FORMAT Null;
@@ -27,7 +27,7 @@ SET enable_filesystem_cache_log=1;
 SET enable_filesystem_cache_on_write_operations=0;
 DROP TABLE IF EXISTS test;
 DROP TABLE IF EXISTS system.filesystem_cache_log;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100000);
 SELECT 2240, 'local_cache', * FROM test FORMAT Null;
diff --git a/tests/queries/0_stateless/02293_part_log_has_merge_reason.sh b/tests/queries/0_stateless/02293_part_log_has_merge_reason.sh
index 1a33e6db459..23c073d2f83 100755
--- a/tests/queries/0_stateless/02293_part_log_has_merge_reason.sh
+++ b/tests/queries/0_stateless/02293_part_log_has_merge_reason.sh
@@ -17,7 +17,7 @@ ${CLICKHOUSE_CLIENT} -q '
     ENGINE = MergeTree()
     ORDER BY tuple()
     TTL event_time + INTERVAL 3 MONTH
-    SETTINGS min_bytes_for_wide_part = 0, materialize_ttl_recalculate_only = true, max_number_of_merges_with_ttl_in_pool = 100
+    SETTINGS old_parts_lifetime = 1, min_bytes_for_wide_part = 0, materialize_ttl_recalculate_only = true, max_number_of_merges_with_ttl_in_pool = 100
 '
 
 ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_part_log_has_merge_type_table VALUES (now(), 1, 'username1');"
@@ -57,7 +57,7 @@ function wait_table_parts_are_merged_into_one_part() {
 export -f get_parts_count
 export -f wait_table_parts_are_merged_into_one_part
 
-timeout 30 bash -c 'wait_table_parts_are_merged_into_one_part t_part_log_has_merge_type_table'
+timeout 60 bash -c 'wait_table_parts_are_merged_into_one_part t_part_log_has_merge_type_table'
 
 ${CLICKHOUSE_CLIENT} -q 'SYSTEM FLUSH LOGS'
 
diff --git a/tests/queries/0_stateless/02345_filesystem_local.sh b/tests/queries/0_stateless/02345_filesystem_local.sh
index 6771df2ae2d..aac66f9f7b9 100755
--- a/tests/queries/0_stateless/02345_filesystem_local.sh
+++ b/tests/queries/0_stateless/02345_filesystem_local.sh
@@ -5,4 +5,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 # Checks that these functions are working inside clickhouse-local. Does not check specific values.
-$CLICKHOUSE_LOCAL --query "SELECT filesystemAvailable() > 0, filesystemFree() <= filesystemCapacity()"
+$CLICKHOUSE_LOCAL --query "SELECT filesystemAvailable() > 0, filesystemUnreserved() <= filesystemCapacity()"
diff --git a/tests/queries/0_stateless/02354_annoy.reference b/tests/queries/0_stateless/02354_annoy.reference
index 2cc62ef4c86..38678fb67c9 100644
--- a/tests/queries/0_stateless/02354_annoy.reference
+++ b/tests/queries/0_stateless/02354_annoy.reference
@@ -14,3 +14,13 @@
 1	[0,0,10]
 5	[0,0,10.2]
 4	[0,0,9.7]
+        Name: annoy_index
+            Name: annoy_index
+1	[0,0,10]
+2	[0.2,0,10]
+3	[-0.3,0,10]
+1	[0,0,10]
+2	[0.2,0,10]
+3	[-0.3,0,10]
+        Name: annoy_index
+            Name: annoy_index
diff --git a/tests/queries/0_stateless/02354_annoy.sh b/tests/queries/0_stateless/02354_annoy.sh
new file mode 100755
index 00000000000..526886ec68d
--- /dev/null
+++ b/tests/queries/0_stateless/02354_annoy.sh
@@ -0,0 +1,212 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-backward-compatibility-check
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# Check that index works correctly for L2Distance and with client parameters
+$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
+DROP TABLE IF EXISTS 02354_annoy_l2;
+
+CREATE TABLE 02354_annoy_l2
+(
+    id Int32,
+    embedding Array(Float32),
+    INDEX annoy_index embedding TYPE annoy() GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5;
+
+INSERT INTO 02354_annoy_l2 VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
+
+SELECT *
+FROM 02354_annoy_l2
+WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
+LIMIT 5;
+
+SELECT *
+FROM 02354_annoy_l2
+ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
+LIMIT 3;
+
+SET param_02354_target_vector='[0.0, 0.0, 10.0]';
+
+SELECT *
+FROM 02354_annoy_l2
+WHERE L2Distance(embedding, {02354_target_vector: Array(Float32)}) < 1.0
+LIMIT 5;
+
+SELECT *
+FROM 02354_annoy_l2
+ORDER BY L2Distance(embedding, {02354_target_vector: Array(Float32)})
+LIMIT 3;
+
+SELECT *
+FROM 02354_annoy_l2
+ORDER BY L2Distance(embedding, [0.0, 0.0])
+LIMIT 3; -- { serverError 80 }
+
+
+DROP TABLE IF EXISTS 02354_annoy_l2;
+"
+
+# Check that indexes are used
+$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
+DROP TABLE IF EXISTS 02354_annoy_l2;
+
+CREATE TABLE 02354_annoy_l2
+(
+    id Int32,
+    embedding Array(Float32),
+    INDEX annoy_index embedding TYPE annoy() GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5;
+
+INSERT INTO 02354_annoy_l2 VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
+
+EXPLAIN indexes=1
+SELECT *
+FROM 02354_annoy_l2
+WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
+LIMIT 5;
+
+EXPLAIN indexes=1
+SELECT *
+FROM 02354_annoy_l2
+ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
+LIMIT 3;
+DROP TABLE IF EXISTS 02354_annoy_l2;
+" | grep "annoy_index"
+
+
+# # Check that index works correctly for cosineDistance
+$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
+DROP TABLE IF EXISTS 02354_annoy_cosine;
+
+CREATE TABLE 02354_annoy_cosine
+(
+    id Int32,
+    embedding Array(Float32),
+    INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5;
+
+INSERT INTO 02354_annoy_cosine VALUES (1, [0.0, 0.0, 10.0]), (2, [0.2, 0.0, 10.0]), (3, [-0.3, 0.0, 10.0]), (4, [0.5, 0.0, 10.1]), (5, [0.8, 0.0, 10.0]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
+
+SELECT *
+FROM 02354_annoy_cosine
+WHERE cosineDistance(embedding, [0.0, 0.0, 10.0]) < 1.0
+LIMIT 3;
+
+SELECT *
+FROM 02354_annoy_cosine
+ORDER BY cosineDistance(embedding, [0.0, 0.0, 10.0])
+LIMIT 3;
+
+DROP TABLE IF EXISTS 02354_annoy_cosine;
+"
+
+# # Check that indexes are used
+$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
+DROP TABLE IF EXISTS 02354_annoy_cosine;
+
+CREATE TABLE 02354_annoy_cosine
+(
+    id Int32,
+    embedding Array(Float32),
+    INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5;
+
+INSERT INTO 02354_annoy_cosine VALUES (1, [0.0, 0.0, 10.0]), (2, [0.2, 0.0, 10.0]), (3, [-0.3, 0.0, 10.0]), (4, [0.5, 0.0, 10.1]), (5, [0.8, 0.0, 10.0]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
+
+EXPLAIN indexes=1
+SELECT *
+FROM 02354_annoy_cosine
+WHERE cosineDistance(embedding, [0.0, 0.0, 10.0]) < 1.0
+LIMIT 3;
+
+EXPLAIN indexes=1
+SELECT *
+FROM 02354_annoy_cosine
+ORDER BY cosineDistance(embedding, [0.0, 0.0, 10.0])
+LIMIT 3;
+DROP TABLE IF EXISTS 02354_annoy_cosine;
+" | grep "annoy_index"
+
+# # Check that weird base columns are rejected
+$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
+DROP TABLE IF EXISTS 02354_annoy;
+
+-- Index spans >1 column
+
+CREATE TABLE 02354_annoy
+(
+    id Int32,
+    embedding Array(Float32),
+    INDEX annoy_index (embedding, id) TYPE annoy(100) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5; -- {serverError 7 }
+
+-- Index must be created on Array(Float32) or Tuple(Float32)
+
+CREATE TABLE 02354_annoy
+(
+    id Int32,
+    embedding Float32,
+    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5; -- {serverError 44 }
+
+
+CREATE TABLE 02354_annoy
+(
+    id Int32,
+    embedding Array(Float64),
+    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5; -- {serverError 44 }
+
+CREATE TABLE 02354_annoy
+(
+    id Int32,
+    embedding Tuple(Float32, Float64),
+    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5; -- {serverError 44 }
+
+CREATE TABLE 02354_annoy
+(
+    id Int32,
+    embedding Array(LowCardinality(Float32)),
+    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5; -- {serverError 44 }
+
+CREATE TABLE 02354_annoy
+(
+    id Int32,
+    embedding Array(Nullable(Float32)),
+    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity=5; -- {serverError 44 }"
diff --git a/tests/queries/0_stateless/02354_annoy.sql b/tests/queries/0_stateless/02354_annoy.sql
deleted file mode 100644
index 654a4b545ea..00000000000
--- a/tests/queries/0_stateless/02354_annoy.sql
+++ /dev/null
@@ -1,114 +0,0 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-backward-compatibility-check
-
-SET allow_experimental_annoy_index = 1;
-
-DROP TABLE IF EXISTS 02354_annoy;
-
-CREATE TABLE 02354_annoy
-(
-    id Int32,
-    embedding Array(Float32),
-    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity=5;
-
-INSERT INTO 02354_annoy VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
-
-SELECT *
-FROM 02354_annoy
-WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
-LIMIT 5;
-
-SELECT *
-FROM 02354_annoy
-ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
-LIMIT 3;
-
-SET param_02354_target_vector='[0.0, 0.0, 10.0]';
-
-SELECT *
-FROM 02354_annoy
-WHERE L2Distance(embedding, {02354_target_vector: Array(Float32)}) < 1.0
-LIMIT 5;
-
-SELECT *
-FROM 02354_annoy
-ORDER BY L2Distance(embedding, {02354_target_vector: Array(Float32)})
-LIMIT 3;
-
-SELECT *
-FROM 02354_annoy
-ORDER BY L2Distance(embedding, [0.0, 0.0])
-LIMIT 3; -- { serverError 80 }
-
-DROP TABLE IF EXISTS 02354_annoy;
-
--- ------------------------------------
--- Check that weird base columns are rejected
-
--- Index spans >1 column
-
-CREATE TABLE 02354_annoy
-(
-    id Int32,
-    embedding Array(Float32),
-    INDEX annoy_index (embedding, id) TYPE annoy(100) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity=5; -- {serverError 7 }
-
--- Index must be created on Array(Float32) or Tuple(Float32)
-
-CREATE TABLE 02354_annoy
-(
-    id Int32,
-    embedding Float32,
-    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity=5; -- {serverError 44 }
-
-
-CREATE TABLE 02354_annoy
-(
-    id Int32,
-    embedding Array(Float64),
-    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity=5; -- {serverError 44 }
-
-CREATE TABLE 02354_annoy
-(
-    id Int32,
-    embedding Tuple(Float32, Float64),
-    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity=5; -- {serverError 44 }
-
-CREATE TABLE 02354_annoy
-(
-    id Int32,
-    embedding Array(LowCardinality(Float32)),
-    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity=5; -- {serverError 44 }
-
-CREATE TABLE 02354_annoy
-(
-    id Int32,
-    embedding Array(Nullable(Float32)),
-    INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity=5; -- {serverError 44 }
diff --git a/tests/queries/0_stateless/02366_explain_query_tree.sql b/tests/queries/0_stateless/02366_explain_query_tree.sql
index c942f0cac37..c38b2d819d1 100644
--- a/tests/queries/0_stateless/02366_explain_query_tree.sql
+++ b/tests/queries/0_stateless/02366_explain_query_tree.sql
@@ -1,6 +1,6 @@
 SET allow_experimental_analyzer = 1;
 
-EXPLAIN QUERY TREE SELECT 1;
+EXPLAIN QUERY TREE run_passes = 0 SELECT 1;
 
 SELECT '--';
 
@@ -13,7 +13,7 @@ CREATE TABLE test_table
 
 INSERT INTO test_table VALUES (0, 'Value');
 
-EXPLAIN QUERY TREE SELECT id, value FROM test_table;
+EXPLAIN QUERY TREE run_passes = 0 SELECT id, value FROM test_table;
 
 SELECT '--';
 
@@ -21,7 +21,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT id, value FROM test_table;
 
 SELECT '--';
 
-EXPLAIN QUERY TREE SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table;
+EXPLAIN QUERY TREE run_passes = 0 SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table;
 
 SELECT '--';
 
@@ -29,7 +29,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT arrayMap(x -> x + 1, [1, 2, 3]) FROM te
 
 SELECT '--';
 
-EXPLAIN QUERY TREE WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table;
+EXPLAIN QUERY TREE run_passes = 0 WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table;
 
 SELECT '--';
 
diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql
index cf5ca15adeb..0f1b4f638cb 100644
--- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql
+++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql
@@ -1,7 +1,7 @@
 -- Tags: no-backward-compatibility-check
 
 drop table if exists test_02381;
-create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b);
+create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks=false, compress_primary_key=false;
 insert into test_02381 select number, number * 10 from system.numbers limit 1000000;
 
 drop table if exists test_02381_compress;
diff --git a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference
index 7f79a172f4b..083f0f69dc8 100644
--- a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference
+++ b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference
@@ -3,7 +3,7 @@
 SET enable_filesystem_cache_on_write_operations=0;
 SYSTEM DROP FILESYSTEM CACHE;
 DROP TABLE IF EXISTS nopers;
-CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES nopers;
 INSERT INTO nopers SELECT number, toString(number) FROM numbers(10);
 SELECT * FROM nopers FORMAT Null;
@@ -22,7 +22,7 @@ ORDER BY file, cache, size;
 data.bin	0	114
 data.mrk3	0	80
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT * FROM test FORMAT Null;
@@ -47,7 +47,7 @@ data.bin	0	746
 data.mrk3	0	80
 data.mrk3	0_persistent	80
 DROP TABLE IF EXISTS test2;
-CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test2;
 INSERT INTO test2 SELECT number, toString(number) FROM numbers(100000);
 SELECT * FROM test2 FORMAT Null;
diff --git a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql
index d7171de48ad..6486840602e 100644
--- a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql
+++ b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql
@@ -7,7 +7,7 @@ SET enable_filesystem_cache_on_write_operations=0;
 SYSTEM DROP FILESYSTEM CACHE;
 
 DROP TABLE IF EXISTS nopers;
-CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
+CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES nopers;
 
 INSERT INTO nopers SELECT number, toString(number) FROM numbers(10);
@@ -26,7 +26,7 @@ ON data_paths.cache_path = caches.cache_path
 ORDER BY file, cache, size;
 
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
@@ -49,7 +49,7 @@ ON data_paths.cache_path = caches.cache_path
 ORDER BY file, cache, size;
 
 DROP TABLE IF EXISTS test2;
-CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test2;
 
 INSERT INTO test2 SELECT number, toString(number) FROM numbers(100000);
diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.reference b/tests/queries/0_stateless/02404_memory_bound_merging.reference
new file mode 100644
index 00000000000..47d3470ef6e
--- /dev/null
+++ b/tests/queries/0_stateless/02404_memory_bound_merging.reference
@@ -0,0 +1,141 @@
+-- { echoOn } --
+explain pipeline select a from remote(test_cluster_two_shards, currentDatabase(), t) group by a;
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  MergingAggregatedBucketTransform × 4
+    Resize 1 → 4
+      FinishAggregatingInOrderTransform 2 → 1
+        (Union)
+          (Aggregating)
+          SortingAggregatedForMemoryBoundMergingTransform 4 → 1
+            MergingAggregatedBucketTransform × 4
+              Resize 1 → 4
+                FinishAggregatingInOrderTransform 4 → 1
+                  AggregatingInOrderTransform × 4
+                    (Expression)
+                    ExpressionTransform × 4
+                      (ReadFromMergeTree)
+                      MergeTreeInOrder × 4 0 → 1
+          (ReadFromRemote)
+select a from remote(test_cluster_two_shards, currentDatabase(), t) group by a order by a limit 5 offset 100500;
+100500
+100501
+100502
+100503
+100504
+explain pipeline select a from remote(test_cluster_two_shards, currentDatabase(), dist_t) group by a;
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  MergingAggregatedBucketTransform × 4
+    Resize 1 → 4
+      FinishAggregatingInOrderTransform 2 → 1
+        (Union)
+          (MergingAggregated)
+          SortingAggregatedForMemoryBoundMergingTransform 4 → 1
+            MergingAggregatedBucketTransform × 4
+              Resize 1 → 4
+                FinishAggregatingInOrderTransform 2 → 1
+                  (Union)
+                    (Aggregating)
+                    SortingAggregatedForMemoryBoundMergingTransform 4 → 1
+                      MergingAggregatedBucketTransform × 4
+                        Resize 1 → 4
+                          FinishAggregatingInOrderTransform 4 → 1
+                            AggregatingInOrderTransform × 4
+                              (Expression)
+                              ExpressionTransform × 4
+                                (ReadFromMergeTree)
+                                MergeTreeInOrder × 4 0 → 1
+                    (ReadFromRemote)
+          (ReadFromRemote)
+select a from remote(test_cluster_two_shards, currentDatabase(), dist_t) group by a order by a limit 5 offset 100500;
+100500
+100501
+100502
+100503
+100504
+1
+-- { echoOn } --
+explain pipeline select a, count() from dist_t_different_dbs group by a order by a limit 5 offset 500;
+(Expression)
+ExpressionTransform
+  (Limit)
+  Limit
+    (Sorting)
+    MergingSortedTransform 4 → 1
+      MergeSortingTransform × 4
+        LimitsCheckingTransform × 4
+          PartialSortingTransform × 4
+            (Expression)
+            ExpressionTransform × 4
+              (MergingAggregated)
+              MergingAggregatedBucketTransform × 4
+                Resize 1 → 4
+                  FinishAggregatingInOrderTransform 2 → 1
+                    (Union)
+                      (Aggregating)
+                      SortingAggregatedForMemoryBoundMergingTransform 4 → 1
+                        MergingAggregatedBucketTransform × 4
+                          Resize 1 → 4
+                            FinishAggregatingInOrderTransform 4 → 1
+                              AggregatingInOrderTransform × 4
+                                (Expression)
+                                ExpressionTransform × 4
+                                  (ReadFromMergeTree)
+                                  MergeTreeInOrder × 4 0 → 1
+                      (ReadFromRemote)
+select a, count() from dist_t_different_dbs group by a order by a limit 5 offset 500;
+500	2000
+501	2000
+502	2000
+503	2000
+504	2000
+select a, count() from dist_t_different_dbs group by a, b order by a limit 5 offset 500;
+500	2000
+501	2000
+502	2000
+503	2000
+504	2000
+-- { echoOn } --
+explain pipeline select a from dist_pr_t group by a order by a limit 5 offset 500;
+(Expression)
+ExpressionTransform
+  (Limit)
+  Limit
+    (Sorting)
+    MergingSortedTransform 4 → 1
+      MergeSortingTransform × 4
+        LimitsCheckingTransform × 4
+          PartialSortingTransform × 4
+            (Expression)
+            ExpressionTransform × 4
+              (MergingAggregated)
+              MergingAggregatedBucketTransform × 4
+                Resize 1 → 4
+                  FinishAggregatingInOrderTransform 3 → 1
+                    (Union)
+                      (Aggregating)
+                      SortingAggregatedForMemoryBoundMergingTransform 4 → 1
+                        MergingAggregatedBucketTransform × 4
+                          Resize 1 → 4
+                            FinishAggregatingInOrderTransform 4 → 1
+                              AggregatingInOrderTransform × 4
+                                (Expression)
+                                ExpressionTransform × 4
+                                  (ReadFromMergeTree)
+                                  MergeTreeInOrder × 4 0 → 1
+                      (ReadFromRemoteParallelReplicas)
+select a, count() from dist_pr_t group by a order by a limit 5 offset 500;
+500	1000
+501	1000
+502	1000
+503	1000
+504	1000
+select a, count() from dist_pr_t group by a, b order by a limit 5 offset 500;
+500	1000
+501	1000
+502	1000
+503	1000
+504	1000
diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql
new file mode 100644
index 00000000000..c41e2d3abae
--- /dev/null
+++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql
@@ -0,0 +1,72 @@
+-- Tags: no-parallel
+
+create table t(a UInt64, b UInt64) engine=MergeTree order by a;
+system stop merges t;
+insert into t select number, number from numbers_mt(1e6);
+
+set enable_memory_bound_merging_of_aggregation_results = 1;
+set max_threads = 4;
+set optimize_aggregation_in_order = 1;
+set prefer_localhost_replica = 1;
+
+-- slightly different transforms will be generated by reading steps if we let settings randomisation to change this setting value --
+set read_in_order_two_level_merge_threshold = 1000;
+
+create table dist_t as t engine = Distributed(test_cluster_two_shards, currentDatabase(), t, a % 2);
+
+-- { echoOn } --
+explain pipeline select a from remote(test_cluster_two_shards, currentDatabase(), t) group by a;
+
+select a from remote(test_cluster_two_shards, currentDatabase(), t) group by a order by a limit 5 offset 100500;
+
+explain pipeline select a from remote(test_cluster_two_shards, currentDatabase(), dist_t) group by a;
+
+select a from remote(test_cluster_two_shards, currentDatabase(), dist_t) group by a order by a limit 5 offset 100500;
+
+-- { echoOff } --
+
+set aggregation_in_order_max_block_bytes = '1Mi';
+set max_block_size = 500;
+-- actual block size might be slightly bigger than the limit --
+select max(bs) < 70000 from (select avg(a), max(blockSize()) as bs from remote(test_cluster_two_shards, currentDatabase(), t) group by a);
+
+-- beautiful case when we have different sorting key definitions in tables involved in distributed query => different plans => different sorting properties of local aggregation results --
+create database if not exists shard_1;
+create table t_different_dbs(a UInt64, b UInt64) engine = MergeTree order by a;
+create table shard_1.t_different_dbs(a UInt64, b UInt64) engine = MergeTree order by tuple();
+
+insert into t_different_dbs select number % 1000, number % 1000 from numbers_mt(1e6);
+insert into shard_1.t_different_dbs select number % 1000, number % 1000 from numbers_mt(1e6);
+
+create table dist_t_different_dbs as t engine = Distributed(test_cluster_two_shards_different_databases_with_local, '', t_different_dbs);
+
+-- { echoOn } --
+explain pipeline select a, count() from dist_t_different_dbs group by a order by a limit 5 offset 500;
+
+select a, count() from dist_t_different_dbs group by a order by a limit 5 offset 500;
+select a, count() from dist_t_different_dbs group by a, b order by a limit 5 offset 500;
+
+-- { echoOff } --
+
+set allow_experimental_parallel_reading_from_replicas = 1;
+set max_parallel_replicas = 3;
+set use_hedged_requests = 0;
+
+create table pr_t(a UInt64, b UInt64) engine=MergeTree order by a;
+insert into pr_t select number % 1000, number % 1000 from numbers_mt(1e6);
+create table dist_pr_t as pr_t engine = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), pr_t);
+
+-- { echoOn } --
+explain pipeline select a from dist_pr_t group by a order by a limit 5 offset 500;
+
+select a, count() from dist_pr_t group by a order by a limit 5 offset 500;
+select a, count() from dist_pr_t group by a, b order by a limit 5 offset 500;
+
+-- { echoOff } --
+
+drop table dist_pr_t;
+drop table dist_t_different_dbs;
+drop table shard_1.t_different_dbs;
+drop table t_different_dbs;
+drop table dist_t;
+drop table t;
diff --git a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference
index bb8c8c2228a..2277e19cf25 100644
--- a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference
@@ -3,7 +3,6 @@ clusterAllReplicas
 dictionary
 executable
 file
-format
 generateRandom
 input
 jdbc
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 040a8c8d317..34180020680 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -322,7 +322,7 @@ farmHash64
 file
 filesystemAvailable
 filesystemCapacity
-filesystemFree
+filesystemUnreserved
 finalizeAggregation
 firstSignificantSubdomainCustom
 firstSignificantSubdomainCustomRFC
diff --git a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference
new file mode 100644
index 00000000000..a89ce339f6c
--- /dev/null
+++ b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference
@@ -0,0 +1,51 @@
+concurrent_drop_after
+tx11	3
+concurrent_drop_before
+tx21	3
+UNKNOWN_TABLE
+concurrent_insert
+2
+all_1_1_1	0
+all_2_2_1	0
+all_3_3_1	0
+all_4_4_1	0
+all_5_5_0	1
+all_6_6_1	0
+concurrent_drop_part_before
+SERIALIZATION_ERROR
+INVALID_TRANSACTION
+1
+3
+all_1_1_0	1
+all_2_2_1	0
+all_3_3_0	1
+read_from_snapshot
+tx51	3
+tx51	3
+tx52	0
+tx51	3
+0
+concurrent_drop_part_after
+NO_SUCH_DATA_PART
+INVALID_TRANSACTION
+all_1_1_1	0
+all_2_2_1	0
+all_3_3_1	0
+NewPart	all_1_1_0
+NewPart	all_1_1_1
+NewPart	all_2_2_0
+NewPart	all_2_2_1
+NewPart	all_3_3_0
+NewPart	all_3_3_1
+concurrent_truncate_notx_after
+tx71	3
+tx71	0
+0
+concurrent_truncate_notx_before
+tx81	3
+NO_SUCH_DATA_PART
+INVALID_TRANSACTION
+INVALID_TRANSACTION
+0
+concurrent_rollback_truncate
+3
diff --git a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.sh b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.sh
new file mode 100755
index 00000000000..b1e8500a4d4
--- /dev/null
+++ b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.sh
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-replicated-database, no-ordinary-database, long
+
+set -e -o pipefail
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+# shellcheck source=./transactions.lib
+. "$CURDIR"/transactions.lib
+
+
+function reset_table()
+{
+    table=${1:-"tt"}
+    $CLICKHOUSE_CLIENT -q "drop table if exists $table"
+    $CLICKHOUSE_CLIENT -q "create table $table (n int) engine=MergeTree order by tuple()"
+
+    # In order to preserve parts names merges have to be disabled
+    $CLICKHOUSE_CLIENT -q "system stop merges $table"
+
+    $CLICKHOUSE_CLIENT -q "insert into $table values (1)" # inserts all_1_1_0
+    $CLICKHOUSE_CLIENT -q "insert into $table values (2)" # inserts all_2_2_0
+    $CLICKHOUSE_CLIENT -q "insert into $table values (3)" # inserts all_3_3_0
+}
+
+function concurrent_drop_after()
+{
+    echo "concurrent_drop_after"
+
+    reset_table
+
+    tx 11 "begin transaction"
+    tx 11 "select count() from tt"
+    tx 11 "truncate table tt"
+    $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table tt"
+    tx 11 "commit"
+}
+
+concurrent_drop_after
+
+function concurrent_drop_before()
+{
+    echo "concurrent_drop_before"
+
+    reset_table
+
+    tx 21 "begin transaction"
+    tx 21 "select count() from tt"
+    $CLICKHOUSE_CLIENT -q                                 "drop table tt"
+    tx 21 "truncate table tt" | grep -Eo "UNKNOWN_TABLE" | uniq
+    tx 21 "rollback"
+}
+
+concurrent_drop_before
+
+function concurrent_insert()
+{
+    echo "concurrent_insert"
+
+    reset_table
+
+    tx 31 "begin transaction"
+    tx 32                                            "begin transaction"
+    tx 31 "insert into tt values (1)"                                               # inserts all_4_4_0
+    tx 32                                            "insert into tt values (2)"    # inserts all_5_5_0
+    tx 31 "insert into tt values (3)"                                               # inserts all_6_6_0
+    tx 31 "truncate table tt"                                                       # creates all_1_4_1 all_6_6_1
+    tx 31 "commit"
+    tx 32                                            "commit"
+
+    $CLICKHOUSE_CLIENT -q "select n from tt order by n"
+    $CLICKHOUSE_CLIENT -q "select name, rows from system.parts
+                              where table='tt' and database=currentDatabase() and active
+                              order by name"
+}
+
+concurrent_insert
+
+function concurrent_drop_part_before()
+{
+    echo "concurrent_drop_part_before"
+
+    reset_table
+
+    tx 41 "begin transaction"
+    tx 42                         "begin transaction"
+    tx 42                         "alter table tt drop part 'all_2_2_0'"
+    tx 41 "truncate table tt" | grep -Eo "SERIALIZATION_ERROR" | uniq
+    tx 41 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq
+    tx 42                         "commit"
+
+    $CLICKHOUSE_CLIENT -q "select n from tt order by n"
+    $CLICKHOUSE_CLIENT -q "select name, rows from system.parts
+                              where table='tt' and database=currentDatabase() and active
+                              order by name"
+
+    reset_table
+}
+
+concurrent_drop_part_before
+
+function read_from_snapshot()
+{
+    echo "read_from_snapshot"
+
+    reset_table
+
+    tx 51 "begin transaction"
+    tx 51 "select count() from tt"
+    tx 52                                            "begin transaction"
+    tx 52                                            "truncate table tt"
+    tx 51 "select count() from tt"
+    tx 52                                            "select count() from tt"
+    tx 52                                            "commit"
+    tx 51 "select count() from tt"
+    tx 51 "commit"
+
+    $CLICKHOUSE_CLIENT -q "select count() from tt"
+}
+
+read_from_snapshot
+
+
+function concurrent_drop_part_after()
+{
+    echo "concurrent_drop_part_after"
+
+    reset_table drop_part_after_table
+
+    tx 61 "begin transaction"
+    tx 62             "begin transaction"
+    tx 61 "truncate table drop_part_after_table"
+    tx 62             "alter table drop_part_after_table drop part 'all_2_2_0'" | grep -Eo "NO_SUCH_DATA_PART" | uniq
+    tx 61 "commit"
+    tx 62             "commit" | grep -Eo "INVALID_TRANSACTION" | uniq
+
+    $CLICKHOUSE_CLIENT -q "select n from drop_part_after_table order by n"
+    $CLICKHOUSE_CLIENT -q "select name, rows from system.parts
+                              where table='drop_part_after_table' and database=currentDatabase() and active
+                              order by name"
+    $CLICKHOUSE_CLIENT -q "system flush logs"
+    $CLICKHOUSE_CLIENT -q "select event_type, part_name from system.part_log
+                              where table='drop_part_after_table' and database=currentDatabase()
+                              order by part_name"
+}
+
+concurrent_drop_part_after
+
+function concurrent_truncate_notx_after()
+{
+    echo "concurrent_truncate_notx_after"
+
+    reset_table
+
+    tx 71 "begin transaction"
+    tx 71 "select count() from tt"
+    tx 71 "alter table tt drop part 'all_2_2_0'"
+    $CLICKHOUSE_CLIENT -q                                 "truncate table tt"
+    # return 0, since truncate was out of transaction
+    # it would be better if exception raised
+    tx 71 "select count() from tt"
+    tx 71 "commit"
+
+    $CLICKHOUSE_CLIENT -q "select count() from tt"
+}
+
+concurrent_truncate_notx_after
+
+function concurrent_truncate_notx_before()
+{
+    echo "concurrent_truncate_notx_before"
+
+    reset_table
+
+    tx 81 "begin transaction"
+    tx 81 "select count() from tt"
+    $CLICKHOUSE_CLIENT -q                                 "truncate table tt"
+    tx 81 "alter table tt drop part 'all_2_2_0'" | grep -Eo "NO_SUCH_DATA_PART" | uniq
+    tx 81 "select count() from tt" | grep -Eo "INVALID_TRANSACTION" | uniq
+    tx 81 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq
+
+    $CLICKHOUSE_CLIENT -q "select count() from tt"
+}
+
+concurrent_truncate_notx_before
+
+function concurrent_rollback_truncate()
+{
+    echo "concurrent_rollback_truncate"
+
+    reset_table
+
+    tx 91       "begin transaction"
+    tx 92               "begin transaction"
+    tx 91       "truncate table tt"
+    tx_async 91 "rollback"
+    tx 92               "truncate table tt" | grep -vwe "PART_IS_TEMPORARILY_LOCKED" -vwe "SERIALIZATION_ERROR" ||:
+    tx 92               "rollback"
+    tx_wait 91
+
+    $CLICKHOUSE_CLIENT -q "select count() from tt"
+}
+
+concurrent_rollback_truncate
diff --git a/tests/queries/0_stateless/02421_truncate_isolation_with_mutations.reference b/tests/queries/0_stateless/02421_truncate_isolation_with_mutations.reference
new file mode 100644
index 00000000000..5890f1120db
--- /dev/null
+++ b/tests/queries/0_stateless/02421_truncate_isolation_with_mutations.reference
@@ -0,0 +1,60 @@
+concurrent_delete_before
+tx11	41	3
+tx11	41	3
+SERIALIZATION_ERROR
+tx12	42	1
+2
+4
+concurrent_delete_after
+tx21	111	3
+tx22	112	3
+UNFINISHED
+concurrent_delete_rollback
+tx31	3
+tx31	3
+tx32	1
+tx31	3
+0
+concurrent_optimize_table_not_start
+tx41	4
+3	all_1_1_0
+1	all_2_2_0
+concurrent_optimize_table
+tx43	5
+SERIALIZATION_ERROR
+INVALID_TRANSACTION
+5	all_1_2_1
+1	all_3_3_0
+concurrent_optimize_table_before
+3	all_1_1_0
+drop_parts_which_already_outdated
+tx69	before optimize	3	all_1_1_6
+tx69	before optimize	1	all_2_2_0
+tx69	after optimize	3	all_1_1_6
+tx69	after optimize	1	all_2_2_0
+SERIALIZATION_ERROR
+at the end	4	all_1_2_7
+unable_drop_one_part_which_outdated_but_visible
+tx79	before optimize	3	all_1_1_2
+tx79	before optimize	1	all_2_2_0
+tx79	after optimize	3	all_1_1_2
+tx79	after optimize	1	all_2_2_0
+NO_SUCH_DATA_PART
+at the end	3	all_1_1_2
+at the end	1	all_2_2_0
+drop_one_part_which_outdated_and_reverted
+tx89	before optimize	3	all_1_1_1
+tx89	before optimize	1	all_2_2_0
+tx89	after optimize	3	all_1_1_1
+tx89	after optimize	1	all_2_2_0
+tx89	after rollback	3	all_1_1_1
+tx89	after rollback	1	all_2_2_0
+at the end	3	all_1_1_1
+drop_one_part_which_outdated_and_reverted_no_name_intersection
+tx99	before optimize	3	all_1_1_0
+tx99	before optimize	1	all_2_2_0
+tx99	after optimize	3	all_1_1_0
+tx99	after optimize	1	all_2_2_0
+tx99	after rollback	3	all_1_1_0
+tx99	after rollback	1	all_2_2_0
+at the end	3	all_1_1_0
diff --git a/tests/queries/0_stateless/02421_truncate_isolation_with_mutations.sh b/tests/queries/0_stateless/02421_truncate_isolation_with_mutations.sh
new file mode 100755
index 00000000000..fabc9eab140
--- /dev/null
+++ b/tests/queries/0_stateless/02421_truncate_isolation_with_mutations.sh
@@ -0,0 +1,272 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-replicated-database, no-ordinary-database, long
+
+set -e -o pipefail
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+# shellcheck source=./transactions.lib
+. "$CURDIR"/transactions.lib
+# shellcheck source=./parts.lib
+. "$CURDIR"/parts.lib
+
+function reset_table()
+{
+    table=${1:-"tt"}
+    settings=${2:-""}
+    $CLICKHOUSE_CLIENT -q "drop table if exists $table"
+    $CLICKHOUSE_CLIENT -q "create table $table (n int) engine=MergeTree order by tuple() $settings"
+
+    $CLICKHOUSE_CLIENT -q "insert into $table values (1), (2), (3)" # inserts all_1_1_0
+}
+
+function concurrent_delete_before()
+{
+    $CLICKHOUSE_CLIENT -q "select 'concurrent_delete_before'"
+
+    reset_table tt
+
+    tx 11 "begin transaction"
+    tx 11 "select 41, count() from tt"
+    tx 12                                            "begin transaction"
+    tx 12                                            "alter table tt delete where n%2=1"
+    tx 11 "select 41, count() from tt"
+    tx 11 "truncate table tt" | grep -Eo "SERIALIZATION_ERROR" | uniq
+    tx 12                                            "select 42, count() from tt"
+    tx 11 "rollback"
+    tx 12                                            "insert into tt values (4)"
+    tx 12                                            "commit"
+
+    $CLICKHOUSE_CLIENT -q "select n from tt order by n"
+}
+
+concurrent_delete_before
+
+function concurrent_delete_after()
+{
+    $CLICKHOUSE_CLIENT -q "select 'concurrent_delete_after'"
+
+    reset_table tt
+
+    tx 21 "begin transaction"
+    tx 22                                            "begin transaction"
+    tx 21 "select 111, count() from tt"
+    tx 21 "truncate table tt"
+    tx 22                                            "select 112, count() from tt"
+    tx 22                                            "alter table tt delete where n%2=1" | grep -Eo "UNFINISHED" | uniq
+    tx 21 "commit"
+    tx 22                                            "rollback"
+
+    $CLICKHOUSE_CLIENT -q "select n from tt order by n"
+}
+
+concurrent_delete_after
+
+function concurrent_delete_rollback()
+{
+    $CLICKHOUSE_CLIENT -q "select 'concurrent_delete_rollback'"
+
+    reset_table tt
+
+    tx 31 "begin transaction"
+    tx 31 "select count() from tt"
+    tx 32                                            "begin transaction"
+    tx 32                                            "alter table tt delete where n%2=1"
+    tx 31 "select count() from tt"
+    tx 32                                            "select count() from tt"
+    tx 31 "select count() from tt"
+    tx 32                                            "rollback"
+    tx 31 "truncate table tt"
+    tx 31 "commit"
+
+    $CLICKHOUSE_CLIENT -q "select count() from tt"
+}
+
+concurrent_delete_rollback
+
+
+function concurrent_optimize_table_not_start()
+{
+    $CLICKHOUSE_CLIENT -q "select 'concurrent_optimize_table_not_start'"
+
+    reset_table tt
+
+    tx 41 "begin transaction"
+    tx 41 "insert into tt values (4)" # inserts all_2_2_0
+
+    tx 42             "begin transaction"
+    tx 42             "optimize table tt final"
+    tx 42             "commit"
+
+    tx 41 "select count() from tt"
+    tx 41 "commit"
+
+    $CLICKHOUSE_CLIENT -q "select count(), _part from tt group by _part order by _part"
+}
+
+concurrent_optimize_table_not_start
+
+
+function concurrent_optimize_table()
+{
+    $CLICKHOUSE_CLIENT -q "select 'concurrent_optimize_table'"
+
+    reset_table tt
+
+    $CLICKHOUSE_CLIENT -q "insert into $table values (4), (5)" # inserts all_2_2_0
+
+    tx 41 "begin transaction"
+    tx 41 "optimize table tt final"
+
+    tx 42                "begin transaction"
+    tx 42                "insert into tt values (6)" # inserts all_3_3_0
+
+    tx 43                                            "begin transaction"
+    tx 43                                            "select count() from tt"
+    tx 43                                            "alter table tt drop partition id 'all'" | grep -Eo "SERIALIZATION_ERROR" | uniq
+
+    tx 42                "commit"
+    tx 43                                            "commit" | grep -Eo "INVALID_TRANSACTION" | uniq
+    tx 41 "commit"
+
+    $CLICKHOUSE_CLIENT -q "select count(), _part from tt group by _part order by _part"
+}
+
+concurrent_optimize_table
+
+function concurrent_optimize_table_before()
+{
+    $CLICKHOUSE_CLIENT -q "select 'concurrent_optimize_table_before'"
+
+    reset_table tt
+
+    tx 51 "begin transaction"
+    tx 52             "begin transaction"
+    tx 51 "optimize table tt final" # inserts all_1_1_1
+    tx 51 "rollback" # inserts all_1_1_1 is outdated
+    tx 52             "alter table tt drop partition id 'all'" | grep -vwe "PART_IS_TEMPORARILY_LOCKED" ||: # conflict with all_1_1_1
+    tx 52             "rollback"
+
+    $CLICKHOUSE_CLIENT -q "select count(), _part from tt group by _part order by _part"
+}
+
+concurrent_optimize_table_before
+
+function drop_parts_which_already_outdated()
+{
+    $CLICKHOUSE_CLIENT -q "select 'drop_parts_which_already_outdated'"
+
+    reset_table tt "settings old_parts_lifetime=0"
+
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_1*/"
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_2*/"
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_3*/"
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_4*/"
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_5*/"
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_6*/"
+
+    $CLICKHOUSE_CLIENT -q "insert into $table values (4)" # inserts all_2_2_0
+
+    tx 69             "begin transaction"
+    tx 69             "select 'before optimize', count(), _part from tt group by _part order by _part"
+
+    tx 61 "begin transaction"
+    tx 61 "optimize table tt final /*all_1_2_7*/"
+    tx 61 "commit"
+
+    tx 62 "begin transaction"
+    tx 62 "optimize table tt final /*all_1_2_8*/"
+
+    tx 69             "select 'after optimize', count(), _part from tt group by _part order by _part"
+    tx 69             "alter table tt drop partition id 'all'" | grep -Eo "SERIALIZATION_ERROR" | uniq
+    tx 69             "rollback"
+
+    tx 62 "rollback"
+
+    $CLICKHOUSE_CLIENT -q "select 'at the end', count(), _part from tt group by _part order by _part"
+}
+
+drop_parts_which_already_outdated
+
+function unable_drop_one_part_which_outdated_but_visible()
+{
+    $CLICKHOUSE_CLIENT -q "select 'unable_drop_one_part_which_outdated_but_visible'"
+
+    reset_table tt "settings old_parts_lifetime=0"
+
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_1*/"
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_2*/"
+
+    $CLICKHOUSE_CLIENT -q "insert into $table values (4)" # inserts all_2_2_0
+
+    tx 79             "begin transaction"
+    tx 79             "select 'before optimize', count(), _part from tt group by _part order by _part"
+
+    tx 71 "begin transaction"
+    tx 71 "optimize table tt final /*all_1_2_3*/"
+
+    tx 79             "select 'after optimize', count(), _part from tt group by _part order by _part"
+    tx 79             "alter table tt drop part 'all_2_2_0'" | grep -Eo "NO_SUCH_DATA_PART" | uniq
+    tx 79             "rollback"
+
+    tx 71 "rollback"
+
+    $CLICKHOUSE_CLIENT -q "select 'at the end', count(), _part from tt group by _part order by _part"
+}
+
+unable_drop_one_part_which_outdated_but_visible
+
+function drop_one_part_which_outdated_and_reverted()
+{
+    $CLICKHOUSE_CLIENT -q "select 'drop_one_part_which_outdated_and_reverted'"
+
+    reset_table tt "settings old_parts_lifetime=0"
+
+    $CLICKHOUSE_CLIENT -q "optimize table tt final /*all_1_1_1*/"
+
+    $CLICKHOUSE_CLIENT -q "insert into $table values (4)" # inserts all_2_2_0
+
+    tx 89             "begin transaction"
+    tx 89             "select 'before optimize', count(), _part from tt group by _part order by _part"
+
+    tx 81 "begin transaction"
+    tx 81 "optimize table tt final /*all_1_2_2*/"
+
+    tx 89             "select 'after optimize', count(), _part from tt group by _part order by _part"
+    tx 81 "rollback"
+
+    tx 89             "select 'after rollback', count(), _part from tt group by _part order by _part"
+    tx 89             "alter table tt drop part 'all_2_2_0'"
+    tx 89             "commit"
+
+    $CLICKHOUSE_CLIENT -q "select 'at the end', count(), _part from tt group by _part order by _part"
+}
+
+drop_one_part_which_outdated_and_reverted
+
+function drop_one_part_which_outdated_and_reverted_no_name_intersection()
+{
+    $CLICKHOUSE_CLIENT -q "select 'drop_one_part_which_outdated_and_reverted_no_name_intersection'"
+
+    reset_table tt "settings old_parts_lifetime=0"
+
+    $CLICKHOUSE_CLIENT -q "insert into $table values (4)" # inserts all_2_2_0
+
+    tx 99             "begin transaction"
+    tx 99             "select 'before optimize', count(), _part from tt group by _part order by _part"
+
+    tx 91 "begin transaction"
+    tx 91 "optimize table tt final /*all_1_2_1*/"
+
+    tx 99             "select 'after optimize', count(), _part from tt group by _part order by _part"
+    tx 91 "rollback"
+
+    tx 99             "select 'after rollback', count(), _part from tt group by _part order by _part"
+    tx 99             "alter table tt drop part 'all_2_2_0'"
+    tx 99             "commit"
+
+    $CLICKHOUSE_CLIENT -q "select 'at the end', count(), _part from tt group by _part order by _part"
+}
+
+drop_one_part_which_outdated_and_reverted_no_name_intersection
diff --git a/tests/queries/0_stateless/02422_insert_different_granularity.reference b/tests/queries/0_stateless/02422_insert_different_granularity.reference
new file mode 100644
index 00000000000..f4ca728d701
--- /dev/null
+++ b/tests/queries/0_stateless/02422_insert_different_granularity.reference
@@ -0,0 +1,4 @@
+=== ataptive granularity: table one -; table two + ===
+=== ataptive granularity: table one -; table two - ===
+=== ataptive granularity: table one +; table two + ===
+=== ataptive granularity: table one +; table two - ===
diff --git a/tests/queries/0_stateless/02422_insert_different_granularity.sql b/tests/queries/0_stateless/02422_insert_different_granularity.sql
new file mode 100644
index 00000000000..e122cd134fe
--- /dev/null
+++ b/tests/queries/0_stateless/02422_insert_different_granularity.sql
@@ -0,0 +1,81 @@
+SELECT '=== ataptive granularity: table one -; table two + ===';
+
+DROP TABLE IF EXISTS table_one;
+CREATE TABLE table_one (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 0, min_bytes_for_wide_part = 100;
+
+DROP TABLE IF EXISTS table_two;
+CREATE TABLE table_two (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 1024, min_bytes_for_wide_part = 100;
+
+INSERT INTO table_one SELECT intDiv(number, 10), number   FROM numbers(100);
+
+ALTER TABLE table_two REPLACE PARTITION 0 FROM table_one;
+
+SELECT '=== ataptive granularity: table one -; table two - ===';
+
+DROP TABLE IF EXISTS table_one;
+
+CREATE TABLE table_one (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 0, min_bytes_for_wide_part = 100;
+
+DROP TABLE IF EXISTS table_two;
+
+CREATE TABLE table_two (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 0, min_bytes_for_wide_part = 100;
+
+INSERT INTO table_one SELECT intDiv(number, 10), number   FROM numbers(100);
+
+ALTER TABLE table_two REPLACE PARTITION 0 FROM table_one;
+
+SELECT '=== ataptive granularity: table one +; table two + ===';
+
+DROP TABLE IF EXISTS table_one;
+CREATE TABLE table_one (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 1024, min_bytes_for_wide_part = 100;
+
+DROP TABLE IF EXISTS table_two;
+CREATE TABLE table_two (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 1024, min_bytes_for_wide_part = 100;
+
+INSERT INTO table_one SELECT intDiv(number, 10), number   FROM numbers(100);
+
+ALTER TABLE table_two REPLACE PARTITION 0 FROM table_one;
+
+SELECT '=== ataptive granularity: table one +; table two - ===';
+
+DROP TABLE IF EXISTS table_one;
+CREATE TABLE table_one (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 1024, min_bytes_for_wide_part = 100;
+
+DROP TABLE IF EXISTS table_two;
+CREATE TABLE table_two (id UInt64, value UInt64)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS index_granularity = 8192, index_granularity_bytes = 0, min_bytes_for_wide_part = 100;
+
+INSERT INTO table_one SELECT intDiv(number, 10), number   FROM numbers(100);
+
+ALTER TABLE table_two REPLACE PARTITION 0 FROM table_one; -- { serverError 36 }
diff --git a/tests/queries/0_stateless/02423_drop_memory_parts.reference b/tests/queries/0_stateless/02423_drop_memory_parts.reference
new file mode 100644
index 00000000000..d69a5f07a05
--- /dev/null
+++ b/tests/queries/0_stateless/02423_drop_memory_parts.reference
@@ -0,0 +1,14 @@
+init state
+30
+0_1_1_0	InMemory	10	1
+1_2_2_0	InMemory	10	1
+2_3_3_0	InMemory	10	1
+drop part 0
+20
+1_2_2_0	InMemory	10	1
+2_3_3_0	InMemory	10	1
+detach table
+attach table
+20
+1_2_2_0	InMemory	10	1
+2_3_3_0	InMemory	10	1
diff --git a/tests/queries/0_stateless/02423_drop_memory_parts.sql b/tests/queries/0_stateless/02423_drop_memory_parts.sql
new file mode 100644
index 00000000000..0d42847f6e5
--- /dev/null
+++ b/tests/queries/0_stateless/02423_drop_memory_parts.sql
@@ -0,0 +1,38 @@
+DROP TABLE IF EXISTS table_in_memory;
+
+CREATE TABLE table_in_memory
+(
+    `id` UInt64,
+    `value` UInt64
+)
+ENGINE = MergeTree
+PARTITION BY id
+ORDER BY value
+SETTINGS min_bytes_for_wide_part=1000, min_bytes_for_compact_part=900;
+
+SELECT 'init state';
+INSERT INTO table_in_memory SELECT intDiv(number, 10), number FROM numbers(30);
+
+SELECT count() FROM table_in_memory;
+SELECT name, part_type, rows, active from system.parts
+WHERE table='table_in_memory' AND database=currentDatabase();
+
+SELECT 'drop part 0';
+ALTER TABLE table_in_memory DROP PARTITION 0;
+
+SELECT count() FROM table_in_memory;
+SELECT name, part_type, rows, active from system.parts
+WHERE table='table_in_memory' AND database=currentDatabase() AND active;
+
+SELECT 'detach table';
+DETACH TABLE table_in_memory;
+
+SELECT name, part_type, rows, active from system.parts
+WHERE table='table_in_memory' AND database=currentDatabase();
+
+SELECT 'attach table';
+ATTACH TABLE table_in_memory;
+
+SELECT count() FROM table_in_memory;
+SELECT name, part_type, rows, active from system.parts
+WHERE table='table_in_memory' AND database=currentDatabase();
diff --git a/tests/queries/0_stateless/02452_check_low_cardinality.reference b/tests/queries/0_stateless/02452_check_low_cardinality.reference
new file mode 100644
index 00000000000..700778e02c7
--- /dev/null
+++ b/tests/queries/0_stateless/02452_check_low_cardinality.reference
@@ -0,0 +1,7 @@
+('hi','hello','hola','see you, bye, bye')
+('hi\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','hello\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','hola\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','see you, bye, bye\0\0\0')
+(11,0,0,0)
+(11,2222,0,0)
+(11,2222,33333333,0)
+(11,2222,33333333,4444444444444444)
+('2d49dc6e-ddce-4cd0-afb8-790956df54c4','2d49dc6e-ddce-4cd0-afb8-790956df54c3','2d49dc6e-ddce-4cd0-afb8-790956df54c1','2d49dc6e-ddce-4cd0-afb8-790956df54c1')
diff --git a/tests/queries/0_stateless/02452_check_low_cardinality.sql b/tests/queries/0_stateless/02452_check_low_cardinality.sql
new file mode 100644
index 00000000000..e9cb8c800c7
--- /dev/null
+++ b/tests/queries/0_stateless/02452_check_low_cardinality.sql
@@ -0,0 +1,55 @@
+-- Tags: no-fasttest
+DROP TABLE IF EXISTS test_low_cardinality_string;
+DROP TABLE IF EXISTS test_low_cardinality_uuid;
+DROP TABLE IF EXISTS test_low_cardinality_int;
+CREATE TABLE test_low_cardinality_string (data String) ENGINE MergeTree ORDER BY data;
+CREATE TABLE test_low_cardinality_uuid (data String) ENGINE MergeTree ORDER BY data;
+CREATE TABLE test_low_cardinality_int (data String) ENGINE MergeTree ORDER BY data;
+INSERT INTO test_low_cardinality_string (data) VALUES ('{"a": "hi", "b": "hello", "c": "hola", "d": "see you, bye, bye"}');
+INSERT INTO test_low_cardinality_int (data) VALUES ('{"a": 11, "b": 2222, "c": 33333333, "d": 4444444444444444}');
+INSERT INTO test_low_cardinality_uuid (data) VALUES ('{"a": "2d49dc6e-ddce-4cd0-afb8-790956df54c4", "b": "2d49dc6e-ddce-4cd0-afb8-790956df54c3", "c": "2d49dc6e-ddce-4cd0-afb8-790956df54c1", "d": "2d49dc6e-ddce-4cd0-afb8-790956df54c1"}');
+SELECT JSONExtract(data, 'Tuple(
+                            a LowCardinality(String),
+                            b LowCardinality(String),
+                            c LowCardinality(String),
+                            d LowCardinality(String)
+                            )') AS json FROM test_low_cardinality_string;
+SELECT JSONExtract(data, 'Tuple(
+                            a LowCardinality(FixedString(20)),
+                            b LowCardinality(FixedString(20)),
+                            c LowCardinality(FixedString(20)),
+                            d LowCardinality(FixedString(20))
+                            )') AS json FROM test_low_cardinality_string;
+SELECT JSONExtract(data, 'Tuple(
+                            a LowCardinality(Int8),
+                            b LowCardinality(Int8),
+                            c LowCardinality(Int8),
+                            d LowCardinality(Int8)
+                            )') AS json FROM test_low_cardinality_int;
+SELECT JSONExtract(data, 'Tuple(
+                            a LowCardinality(Int16),
+                            b LowCardinality(Int16),
+                            c LowCardinality(Int16),
+                            d LowCardinality(Int16)
+                            )') AS json FROM test_low_cardinality_int;
+SELECT JSONExtract(data, 'Tuple(
+                            a LowCardinality(Int32),
+                            b LowCardinality(Int32),
+                            c LowCardinality(Int32),
+                            d LowCardinality(Int32)
+                            )') AS json FROM test_low_cardinality_int;
+SELECT JSONExtract(data, 'Tuple(
+                            a LowCardinality(Int64),
+                            b LowCardinality(Int64),
+                            c LowCardinality(Int64),
+                            d LowCardinality(Int64)
+                            )') AS json FROM test_low_cardinality_int;
+SELECT JSONExtract(data, 'Tuple(
+                            a LowCardinality(UUID),
+                            b LowCardinality(UUID),
+                            c LowCardinality(UUID),
+                            d LowCardinality(UUID)
+                            )') AS json FROM test_low_cardinality_uuid;
+DROP TABLE test_low_cardinality_string;
+DROP TABLE test_low_cardinality_uuid;
+DROP TABLE test_low_cardinality_int;
diff --git a/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.reference b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.reference
new file mode 100644
index 00000000000..3a528a24821
--- /dev/null
+++ b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.reference
@@ -0,0 +1 @@
+('{"b":{"c":1,"d":"str"}}\0')
diff --git a/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql
new file mode 100644
index 00000000000..449713d396f
--- /dev/null
+++ b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql
@@ -0,0 +1,6 @@
+-- Tags: no-fasttest
+DROP TABLE IF EXISTS test_fixed_string_nested_json;
+CREATE TABLE test_fixed_string_nested_json (data String) ENGINE MergeTree ORDER BY data;
+INSERT INTO test_fixed_string_nested_json (data) VALUES ('{"a" : {"b" : {"c" : 1, "d" : "str"}}}');
+SELECT JSONExtract(data, 'Tuple(a FixedString(24))') AS json FROM test_fixed_string_nested_json;
+DROP TABLE test_fixed_string_nested_json;
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02457_filesystem_function.reference b/tests/queries/0_stateless/02457_filesystem_function.reference
new file mode 100644
index 00000000000..6ed281c757a
--- /dev/null
+++ b/tests/queries/0_stateless/02457_filesystem_function.reference
@@ -0,0 +1,2 @@
+1
+1
diff --git a/tests/queries/0_stateless/02457_filesystem_function.sql b/tests/queries/0_stateless/02457_filesystem_function.sql
new file mode 100644
index 00000000000..d8322bc65b5
--- /dev/null
+++ b/tests/queries/0_stateless/02457_filesystem_function.sql
@@ -0,0 +1,6 @@
+-- Tags: no-fasttest
+
+select filesystemCapacity('s3_disk') >= filesystemAvailable('s3_disk') and filesystemAvailable('s3_disk') >= filesystemUnreserved('s3_disk');
+select filesystemCapacity('default') >= filesystemAvailable('default') and filesystemAvailable('default') >= 0 and filesystemUnreserved('default') >= 0;
+
+select filesystemCapacity('__un_exists_disk'); -- { serverError UNKNOWN_DISK }
diff --git a/tests/queries/0_stateless/02459_group_by_all.reference b/tests/queries/0_stateless/02459_group_by_all.reference
new file mode 100644
index 00000000000..7c5ccbd8fbf
--- /dev/null
+++ b/tests/queries/0_stateless/02459_group_by_all.reference
@@ -0,0 +1,44 @@
+abc1	1
+abc2	1
+abc3	1
+abc4	1
+abc	4
+abc	ab
+abc	ab
+abc	ab
+abc	bc
+abc	bc
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+1	abc	a
+1	abc	a
+1	abc	a
+1	abc	a
+abc1	1
+abc2	1
+abc3	1
+abc4	1
+abc	4
+abc	ab
+abc	ab
+abc	ab
+abc	bc
+abc	bc
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+abc	a
+1	abc	a
+1	abc	a
+1	abc	a
+1	abc	a
diff --git a/tests/queries/0_stateless/02459_group_by_all.sql b/tests/queries/0_stateless/02459_group_by_all.sql
new file mode 100644
index 00000000000..4f08ee331a4
--- /dev/null
+++ b/tests/queries/0_stateless/02459_group_by_all.sql
@@ -0,0 +1,35 @@
+DROP TABLE IF EXISTS group_by_all;
+
+CREATE TABLE group_by_all
+(
+    a String,
+    b int,
+    c int
+)
+engine = Memory;
+
+insert into group_by_all values ('abc1', 1, 1), ('abc2', 1, 1), ('abc3', 1, 1), ('abc4', 1, 1);
+
+select a, count(b) from group_by_all group by all order by a;
+select substring(a, 1, 3), count(b) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;
+select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all;
+
+SET allow_experimental_analyzer = 1;
+
+select a, count(b) from group_by_all group by all order by a;
+select substring(a, 1, 3), count(b) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all;
+select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;
+select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all;
diff --git a/tests/queries/0_stateless/02470_mutation_sync_race.reference b/tests/queries/0_stateless/02470_mutation_sync_race.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02470_mutation_sync_race.sh b/tests/queries/0_stateless/02470_mutation_sync_race.sh
new file mode 100755
index 00000000000..6c259e46cb1
--- /dev/null
+++ b/tests/queries/0_stateless/02470_mutation_sync_race.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Tags: long, zookeeper
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+$CLICKHOUSE_CLIENT -q "drop table if exists src;"
+$CLICKHOUSE_CLIENT -q "create table src(A UInt64) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src1', '1') order by tuple() SETTINGS min_bytes_for_wide_part=0;"
+$CLICKHOUSE_CLIENT -q "insert into src values (0)"
+
+function thread()
+{
+    for i in $(seq 1000); do
+        $CLICKHOUSE_CLIENT -q "alter table src detach partition tuple()"
+        $CLICKHOUSE_CLIENT -q "alter table src attach partition tuple()"
+        $CLICKHOUSE_CLIENT -q "alter table src update A = ${i} where 1 settings mutations_sync=2"
+        $CLICKHOUSE_CLIENT -q "select throwIf(A != ${i}) from src format Null"
+    done
+}
+
+export -f thread;
+
+TIMEOUT=30
+
+timeout $TIMEOUT bash -c thread || true
diff --git a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.reference b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.reference
new file mode 100644
index 00000000000..73eab066599
--- /dev/null
+++ b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.reference
@@ -0,0 +1,2 @@
+('{"b":{"c":1,"d":"str"}}','','','')
+('{"b":{"c":1,"d":"str"}}','','','')
diff --git a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql
new file mode 100644
index 00000000000..664c52e772f
--- /dev/null
+++ b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql
@@ -0,0 +1,3 @@
+-- Tags: no-fasttest
+SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a LowCardinality(String), b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))');
+SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a String, b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))');
diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py
index c1b31eeecbb..6c1c32822d3 100755
--- a/tests/queries/0_stateless/02473_infile_progress.py
+++ b/tests/queries/0_stateless/02473_infile_progress.py
@@ -27,7 +27,7 @@ with client(name="client>", log=log) as client1:
     )
     client1.expect(prompt)
     client1.send(f"INSERT INTO test.infile_progress FROM INFILE '{filename}'")
-    client1.expect("Progress: 0.00 rows, 10.00 B.*\)")
+    client1.expect("Progress: 5.00 rows, 30.00 B.*\)")
     client1.expect(prompt)
 
     # send Ctrl-C
diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference
new file mode 100644
index 00000000000..783d12fcf1a
--- /dev/null
+++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference
@@ -0,0 +1,10 @@
+\0\0\0\0\0\0\0\0\0\0\0
+{"a":123456}
+\0\0\0\0\0
+123456
+\0\0\0\0\0
+123456
+\0\0\0\0\0
+\0\0\0\0\0
+131231
+131231
diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql
new file mode 100644
index 00000000000..336dda411da
--- /dev/null
+++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql
@@ -0,0 +1,9 @@
+-- Tags: no-fasttest
+SELECT JSONExtract('{"a": 123456}', 'FixedString(11)');
+SELECT JSONExtract('{"a": 123456}', 'FixedString(12)');
+SELECT JSONExtract('{"a": "123456"}', 'a', 'FixedString(5)');
+SELECT JSONExtract('{"a": "123456"}', 'a', 'FixedString(6)');
+SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(5)');
+SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(6)');
+SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(5))') FROM numbers(2);
+SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(6))') FROM numbers(2);
diff --git a/tests/queries/0_stateless/02474_unhex_in_fix_string.reference b/tests/queries/0_stateless/02474_unhex_in_fix_string.reference
new file mode 100644
index 00000000000..407a8b39948
--- /dev/null
+++ b/tests/queries/0_stateless/02474_unhex_in_fix_string.reference
@@ -0,0 +1 @@
+ClickHouse	ClickHouse
diff --git a/tests/queries/0_stateless/02474_unhex_in_fix_string.sql b/tests/queries/0_stateless/02474_unhex_in_fix_string.sql
new file mode 100644
index 00000000000..288336aa4fa
--- /dev/null
+++ b/tests/queries/0_stateless/02474_unhex_in_fix_string.sql
@@ -0,0 +1,4 @@
+drop table if exists unhex_in_fix_string_table;
+create table unhex_in_fix_string_table ( dt Date, s1 FixedString(20), s2 String) engine=MergeTree partition by dt order by tuple();
+insert into unhex_in_fix_string_table values(today(), '436C69636B486F757365', '436C69636B486F757365');
+select unhex(s1), unhex(s2) from unhex_in_fix_string_table;
diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.reference b/tests/queries/0_stateless/02475_bson_each_row_format.reference
new file mode 100644
index 00000000000..b4a841ed3eb
--- /dev/null
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference
@@ -0,0 +1,252 @@
+Integers
+false	0	0	0	0	0	0	0	0
+true	1	1	1	1	1	1	1	1
+true	2	2	2	2	2	2	2	2
+true	3	3	3	3	3	3	3	3
+true	4	4	4	4	4	4	4	4
+bool	Nullable(Bool)					
+int8	Nullable(Int32)					
+uint8	Nullable(Int32)					
+int16	Nullable(Int32)					
+uint16	Nullable(Int32)					
+int32	Nullable(Int32)					
+uint32	Nullable(Int64)					
+int64	Nullable(Int64)					
+uint64	Nullable(Int64)					
+false	0	0	0	0	0	0	0	0
+true	1	1	1	1	1	1	1	1
+true	2	2	2	2	2	2	2	2
+true	3	3	3	3	3	3	3	3
+true	4	4	4	4	4	4	4	4
+Integers conversion
+1	4294967295
+1	-1
+1	65535
+1	-1
+1	255
+1	-1
+uint64	Nullable(Int64)					
+int64	Nullable(Int64)					
+4294967297	-4294967297
+Floats
+0	0
+0.5	0.5
+0.6666667	0.6666666666666666
+0.75	0.75
+0.8	0.8
+float32	Nullable(Float64)					
+float64	Nullable(Float64)					
+0	0
+0.5	0.5
+0.6666666865348816	0.6666666666666666
+0.75	0.75
+0.800000011920929	0.8
+Big integers
+0	0	0	0
+-10000000000000000000000	10000000000000000000000	-100000000000000000000000000000000000000000000	100000000000000000000000000000000000000000000
+-20000000000000000000000	20000000000000000000000	-200000000000000000000000000000000000000000000	200000000000000000000000000000000000000000000
+-30000000000000000000000	30000000000000000000000	-300000000000000000000000000000000000000000000	300000000000000000000000000000000000000000000
+-40000000000000000000000	40000000000000000000000	-400000000000000000000000000000000000000000000	400000000000000000000000000000000000000000000
+int128	Nullable(String)					
+uint128	Nullable(String)					
+int256	Nullable(String)					
+uint256	Nullable(String)					
+Dates
+1970-01-01	1970-01-01	1970-01-01 00:00:00	1970-01-01 00:00:00.000000
+1970-01-02	1970-01-02	1970-01-01 00:00:01	1970-01-01 00:00:01.000000
+1970-01-03	1970-01-03	1970-01-01 00:00:02	1970-01-01 00:00:02.000000
+1970-01-04	1970-01-04	1970-01-01 00:00:03	1970-01-01 00:00:03.000000
+1970-01-05	1970-01-05	1970-01-01 00:00:04	1970-01-01 00:00:04.000000
+date	Nullable(Int32)					
+date32	Nullable(Int32)					
+datetime	Nullable(Int64)					
+datetime64	Nullable(DateTime64(6, \'UTC\'))					
+0	0	0	1970-01-01 00:00:00.000000
+1	1	1	1970-01-01 00:00:01.000000
+2	2	2	1970-01-01 00:00:02.000000
+3	3	3	1970-01-01 00:00:03.000000
+4	4	4	1970-01-01 00:00:04.000000
+Decimals
+0	0	0	0
+42.422	42.424242	42.424242424242	42.424242424242424242424242
+84.844	84.848484	84.848484848484	84.848484848484848484848484
+127.266	127.272726	127.272727272726	127.272727272727272727272726
+169.688	169.696968	169.696969696968	169.696969696969696969696968
+decimal32	Nullable(Int32)					
+decimal64	Nullable(Int64)					
+decimal128	Nullable(String)					
+decimal256	Nullable(String)					
+Strings
+	\0\0\0\0\0
+HelloWorld	b\0\0\0\0
+HelloWorldHelloWorld	cc\0\0\0
+HelloWorldHelloWorldHelloWorld	ddd\0\0
+HelloWorldHelloWorldHelloWorldHelloWorld	eeee\0
+	\0\0\0\0\0
+HelloWorld	b\0\0\0\0
+HelloWorldHelloWorld	cc\0\0\0
+HelloWorldHelloWorldHelloWorld	ddd\0\0
+HelloWorldHelloWorldHelloWorldHelloWorld	eeee\0
+str	Nullable(String)					
+fixstr	Nullable(String)					
+	\0\0\0\0\0
+HelloWorld	b\0\0\0\0
+HelloWorldHelloWorld	cc\0\0\0
+HelloWorldHelloWorldHelloWorld	ddd\0\0
+HelloWorldHelloWorldHelloWorldHelloWorld	eeee\0
+UUID
+b86d5c23-4b87-4465-8f33-4a685fa1c868
+uuid	Nullable(UUID)					
+b86d5c23-4b87-4465-8f33-4a685fa1c868
+LowCardinality
+a
+b
+c
+a
+b
+lc	Nullable(String)					
+a
+b
+c
+a
+b
+Nullable
+0
+\N
+2
+\N
+4
+0
+0
+2
+0
+4
+FAIL
+null	Nullable(Int64)					
+0
+\N
+2
+\N
+4
+LowCardinality(Nullable)
+a
+\N
+c
+\N
+b
+lc	Nullable(String)					
+a
+\N
+c
+\N
+b
+Array
+[]	['Hello']
+[0]	['Hello']
+[0,1]	['Hello']
+[0,1,2]	['Hello']
+[0,1,2,3]	['Hello']
+arr1	Array(Nullable(Int64))					
+arr2	Array(Nullable(String))					
+[]	['Hello']
+[0]	['Hello']
+[0,1]	['Hello']
+[0,1,2]	['Hello']
+[0,1,2,3]	['Hello']
+Tuple
+(0,'Hello')
+(1,'Hello')
+(2,'Hello')
+(3,'Hello')
+(4,'Hello')
+('Hello',0)
+('Hello',1)
+('Hello',2)
+('Hello',3)
+('Hello',4)
+OK
+OK
+tuple	Tuple(x Nullable(Int64), s Nullable(String))					
+(0,'Hello')
+(1,'Hello')
+(2,'Hello')
+(3,'Hello')
+(4,'Hello')
+(0,'Hello')
+(1,'Hello')
+(2,'Hello')
+(3,'Hello')
+(4,'Hello')
+(0,'Hello')
+(1,'Hello')
+(2,'Hello')
+(3,'Hello')
+(4,'Hello')
+OK
+OK
+tuple	Tuple(Nullable(Int64), Nullable(String))					
+(0,'Hello')
+(1,'Hello')
+(2,'Hello')
+(3,'Hello')
+(4,'Hello')
+Map
+OK
+OK
+{'a':0,'b':1}
+{'a':1,'b':2}
+{'a':2,'b':3}
+{'a':3,'b':4}
+{'a':4,'b':5}
+map	Map(String, Nullable(Int64))					
+{'a':0,'b':1}
+{'a':1,'b':2}
+{'a':2,'b':3}
+{'a':3,'b':4}
+{'a':4,'b':5}
+Nested types
+[[],[0]]	((0,'Hello'),'Hello')	{'a':{'a.a':0,'a.b':1},'b':{'b.a':0,'b.b':1}}
+[[0],[0,1]]	((1,'Hello'),'Hello')	{'a':{'a.a':1,'a.b':2},'b':{'b.a':1,'b.b':2}}
+[[0,1],[0,1,2]]	((2,'Hello'),'Hello')	{'a':{'a.a':2,'a.b':3},'b':{'b.a':2,'b.b':3}}
+[[0,1,2],[0,1,2,3]]	((3,'Hello'),'Hello')	{'a':{'a.a':3,'a.b':4},'b':{'b.a':3,'b.b':4}}
+[[0,1,2,3],[0,1,2,3,4]]	((4,'Hello'),'Hello')	{'a':{'a.a':4,'a.b':5},'b':{'b.a':4,'b.b':5}}
+nested1	Array(Array(Nullable(Int64)))					
+nested2	Tuple(Tuple(x Nullable(Int64), s Nullable(String)), Nullable(String))					
+nested3	Map(String, Map(String, Nullable(Int64)))					
+[[],[0]]	((0,'Hello'),'Hello')	{'a':{'a.a':0,'a.b':1},'b':{'b.a':0,'b.b':1}}
+[[0],[0,1]]	((1,'Hello'),'Hello')	{'a':{'a.a':1,'a.b':2},'b':{'b.a':1,'b.b':2}}
+[[0,1],[0,1,2]]	((2,'Hello'),'Hello')	{'a':{'a.a':2,'a.b':3},'b':{'b.a':2,'b.b':3}}
+[[0,1,2],[0,1,2,3]]	((3,'Hello'),'Hello')	{'a':{'a.a':3,'a.b':4},'b':{'b.a':3,'b.b':4}}
+[[0,1,2,3],[0,1,2,3,4]]	((4,'Hello'),'Hello')	{'a':{'a.a':4,'a.b':5},'b':{'b.a':4,'b.b':5}}
+[({'a':[],'b':[0]},[{'c':([],[0])},{'d':([0,1],[0,1,2])}])]
+[({'a':[0],'b':[0,1]},[{'c':([0],[0,1])},{'d':([0,1,2],[0,1,2,3])}])]
+[({'a':[0,1],'b':[0,1,2]},[{'c':([0,1],[0,1,2])},{'d':([0,1,2,3],[0,1,2,3,4])}])]
+[({'a':[0,1,2],'b':[0,1,2,3]},[{'c':([0,1,2],[0,1,2,3])},{'d':([0,1,2,3,4],[0,1,2,3,4,5])}])]
+[({'a':[0,1,2,3],'b':[0,1,2,3,4]},[{'c':([0,1,2,3],[0,1,2,3,4])},{'d':([0,1,2,3,4,5],[0,1,2,3,4,5,6])}])]
+nested	Array(Tuple(Map(String, Array(Nullable(Int64))), Array(Map(String, Array(Array(Nullable(Int64)))))))					
+[({'a':[],'b':[0]},[{'c':[[],[0]]},{'d':[[0,1],[0,1,2]]}])]
+[({'a':[0],'b':[0,1]},[{'c':[[0],[0,1]]},{'d':[[0,1,2],[0,1,2,3]]}])]
+[({'a':[0,1],'b':[0,1,2]},[{'c':[[0,1],[0,1,2]]},{'d':[[0,1,2,3],[0,1,2,3,4]]}])]
+[({'a':[0,1,2],'b':[0,1,2,3]},[{'c':[[0,1,2],[0,1,2,3]]},{'d':[[0,1,2,3,4],[0,1,2,3,4,5]]}])]
+[({'a':[0,1,2,3],'b':[0,1,2,3,4]},[{'c':[[0,1,2,3],[0,1,2,3,4]]},{'d':[[0,1,2,3,4,5],[0,1,2,3,4,5,6]]}])]
+Schema inference
+x	Nullable(Int32)					
+x	Nullable(Int64)					
+x	Nullable(Int64)					
+FAIL
+x	Array(Nullable(Int32))					
+x	Array(Nullable(Int64))					
+x	Array(Nullable(Int64))					
+FAIL
+OK
+OK
+OK
+OK
+Sync after error
+OK
+0	42	[]
+1	42	[0]
+2	42	[0,1]
+0	42	[]
+1	42	[0]
+2	42	[0,1]
diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh
new file mode 100755
index 00000000000..6de33b38183
--- /dev/null
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh
@@ -0,0 +1,199 @@
+#!/usr/bin/env bash
+# Tags: no-parallel
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+echo "Integers"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Bool as bool, number::Int8 as int8, number::UInt8 as uint8, number::Int16 as int16, number::UInt16 as uint16, number::Int32 as int32, number::UInt32 as uint32, number::Int64 as int64, number::UInt64 as uint64 from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'bool Bool, int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+echo "Integers conversion"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'uint64 UInt64, int64 Int64') select 4294967297, -4294967297 settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 UInt32, int64 UInt32')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 Int32, int64 Int32')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 UInt16, int64 UInt16')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 Int16, int64 Int16')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 UInt8, int64 UInt8')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 Int8, int64 Int8')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Floats"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'float32 Float32, float64 Float64') select number / (number + 1), number / (number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'float32 Float32, float64 Float64')";
+
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Big integers"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256') select number * -10000000000000000000000::Int128 as int128, number * 10000000000000000000000::UInt128 as uint128, number * -100000000000000000000000000000000000000000000::Int256 as int256, number * 100000000000000000000000000000000000000000000::UInt256 as uint256 from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+
+
+echo "Dates"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'date Date, date32 Date32, datetime DateTime(\'UTC\'), datetime64 DateTime64(6, \'UTC\')') select number, number, number, number from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'date Date, date32 Date32, datetime DateTime(\'UTC\'), datetime64 DateTime64(6, \'UTC\')')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Decimals"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6), decimal128 Decimal128(12), decimal256 Decimal256(24)') select number * 42.422::Decimal32(3) as decimal32, number * 42.424242::Decimal64(6) as decimal64, number * 42.424242424242::Decimal128(12) as decimal128, number * 42.424242424242424242424242::Decimal256(24) as decimal256 from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6), decimal128 Decimal128(12), decimal256 Decimal256(24)')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+
+
+echo "Strings"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)') select repeat('HelloWorld', number), repeat(char(97 + number), number % 6) from numbers(5) settings engine_file_truncate_on_insert=1, output_format_bson_string_as_string=0"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)')"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)') select repeat('HelloWorld', number), repeat(char(97 + number), number % 6) from numbers(5) settings engine_file_truncate_on_insert=1, output_format_bson_string_as_string=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)')"
+
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "UUID"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'uuid UUID') select 'b86d5c23-4b87-4465-8f33-4a685fa1c868'::UUID settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uuid UUID')"
+
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "LowCardinality"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'lc LowCardinality(String)') select char(97 + number % 3)::LowCardinality(String) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'lc LowCardinality(String)')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Nullable"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)') select number % 2 ? NULL : number from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "LowCardinality(Nullable)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'lc LowCardinality(Nullable(String))') select number % 2 ? NULL : char(97 + number % 3)::LowCardinality(String) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'lc LowCardinality(Nullable(String))')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Array"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'arr1 Array(UInt64), arr2 Array(String)') select range(number), ['Hello'] from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'arr1 Array(UInt64), arr2 Array(String)') settings engine_file_truncate_on_insert=1" 
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Tuple"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, s String)') select tuple(number, 'Hello') from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, s String)')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(s String, x UInt64)')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, b String)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64, String)') select tuple(number, 'Hello') from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, s String)')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64, String)')"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64, String, UInt64)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Map"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
+
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Nested types"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'nested1 Array(Array(UInt32)), nested2 Tuple(Tuple(x UInt32, s String), String), nested3 Map(String, Map(String, UInt32))') select [range(number), range(number + 1)], tuple(tuple(number, 'Hello'), 'Hello'), map('a', map('a.a', number, 'a.b', number + 1), 'b', map('b.a', number, 'b.b', number + 1)) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'nested1 Array(Array(UInt32)), nested2 Tuple(Tuple(x UInt32, s String), String), nested3 Map(String, Map(String, UInt32))')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'nested Array(Tuple(Map(String, Array(UInt32)), Array(Map(String, Tuple(Array(UInt64), Array(UInt64))))))') select [(map('a', range(number), 'b', range(number + 1)), [map('c', (range(number), range(number + 1))), map('d', (range(number + 2), range(number + 3)))])] from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'nested Array(Tuple(Map(String, Array(UInt32)), Array(Map(String, Tuple(Array(UInt64), Array(UInt64))))))')"
+
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
+
+
+echo "Schema inference"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Bool as x from numbers(2) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Int32 as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::UInt32 as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Int64 as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::UInt64 as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Bool] as x from numbers(2) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Int32] as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::UInt32] as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Int64] as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::UInt64] as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [] as x from numbers(2) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "ONLY_NULLS_WHILE_READING_SCHEMA" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select NULL as x from numbers(2) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "ONLY_NULLS_WHILE_READING_SCHEMA" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [NULL, 1] as x from numbers(2) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "ONLY_NULLS_WHILE_READING_SCHEMA" && echo "OK" || echo "FAIL"
+
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select tuple(1, 'str') as x from numbers(2) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select tuple(1) as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL"
+
+
+echo "Sync after error"
+$CLICKHOUSE_CLIENT -q "insert into function file(data.bsonEachRow) select number, 42::Int128 as int, range(number) as arr from numbers(3) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q " insert into function file(data.bsonEachRow) select number, 'Hello' as int, range(number) as arr from numbers(2) settings engine_file_truncate_on_insert=0"
+$CLICKHOUSE_CLIENT -q "insert into function file(data.bsonEachRow) select number, 42::Int128 as int, range(number) as arr from numbers(3) settings engine_file_truncate_on_insert=0"
+$CLICKHOUSE_CLIENT -q "select * from file(data.bsonEachRow, auto, 'number UInt64, int Int128, arr Array(UInt64)') settings input_format_allow_errors_num=0"  2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "select * from file(data.bsonEachRow, auto, 'number UInt64, int Int128, arr Array(UInt64)') settings input_format_allow_errors_num=2"
diff --git a/tests/queries/0_stateless/02475_precise_decimal_arithmetics.reference b/tests/queries/0_stateless/02475_precise_decimal_arithmetics.reference
new file mode 100644
index 00000000000..6ffc8602640
--- /dev/null
+++ b/tests/queries/0_stateless/02475_precise_decimal_arithmetics.reference
@@ -0,0 +1,23 @@
+0
+0
+0
+9999999999999999550522436926092261716351992671467843175339166479588690755584
+9999999999999999451597035424131548206707486713696660676795842648250000000000
+11.126038
+10.8
+-11.126038
+-10.8
+10.8
+1376.638914
+1403.6
+-1376.638914
+-1403.6
+1403.6
+332833500
+999
+1000
+1000
+1000
+0.1
+0.1
+0.1
diff --git a/tests/queries/0_stateless/02475_precise_decimal_arithmetics.sql b/tests/queries/0_stateless/02475_precise_decimal_arithmetics.sql
new file mode 100644
index 00000000000..3bd7906c7d8
--- /dev/null
+++ b/tests/queries/0_stateless/02475_precise_decimal_arithmetics.sql
@@ -0,0 +1,45 @@
+-- Tags: no-fasttest
+
+-- check cases when one of operands is zero
+SELECT divideDecimal(toDecimal32(0, 2), toDecimal128(11.123456, 6));
+SELECT divideDecimal(toDecimal64(123.123, 3), toDecimal64(0, 1)); -- { serverError 153 }
+SELECT multiplyDecimal(toDecimal32(0, 2), toDecimal128(11.123456, 6));
+SELECT multiplyDecimal(toDecimal32(123.123, 3), toDecimal128(0, 1));
+
+-- don't look at strange query result -- it happens due to bad float precision: toUInt256(1e38) == 99999999999999997752612184630461283328
+SELECT multiplyDecimal(toDecimal256(1e38, 0), toDecimal256(1e38, 0));
+SELECT divideDecimal(toDecimal256(1e66, 0), toDecimal256(1e-10, 10), 0);
+
+-- fits Decimal256, but scale is too big to fit
+SELECT multiplyDecimal(toDecimal256(1e38, 0), toDecimal256(1e38, 0), 2); -- { serverError 407 }
+SELECT divideDecimal(toDecimal256(1e72, 0), toDecimal256(1e-5, 5), 2); -- { serverError 407 }
+
+-- does not fit Decimal256
+SELECT multiplyDecimal(toDecimal256('1e38', 0), toDecimal256('1e38', 0)); -- { serverError 407 }
+SELECT multiplyDecimal(toDecimal256(1e39, 0), toDecimal256(1e39, 0), 0); -- { serverError 407 }
+SELECT divideDecimal(toDecimal256(1e39, 0), toDecimal256(1e-38, 39)); -- { serverError 407 }
+
+-- test different signs
+SELECT divideDecimal(toDecimal128(123.76, 2), toDecimal128(11.123456, 6));
+SELECT divideDecimal(toDecimal32(123.123, 3), toDecimal128(11.4, 1), 2);
+SELECT divideDecimal(toDecimal128(-123.76, 2), toDecimal128(11.123456, 6));
+SELECT divideDecimal(toDecimal32(123.123, 3), toDecimal128(-11.4, 1), 2);
+SELECT divideDecimal(toDecimal32(-123.123, 3), toDecimal128(-11.4, 1), 2);
+
+SELECT multiplyDecimal(toDecimal64(123.76, 2), toDecimal128(11.123456, 6));
+SELECT multiplyDecimal(toDecimal32(123.123, 3), toDecimal128(11.4, 1), 2);
+SELECT multiplyDecimal(toDecimal64(-123.76, 2), toDecimal128(11.123456, 6));
+SELECT multiplyDecimal(toDecimal32(123.123, 3), toDecimal128(-11.4, 1), 2);
+SELECT multiplyDecimal(toDecimal32(-123.123, 3), toDecimal128(-11.4, 1), 2);
+
+-- check against non-const columns
+SELECT sum(multiplyDecimal(toDecimal64(number, 1), toDecimal64(number, 5))) FROM numbers(1000);
+SELECT sum(divideDecimal(toDecimal64(number, 1), toDecimal64(number, 5))) FROM (select * from numbers(1000) OFFSET 1);
+
+-- check against Nullable type
+SELECT multiplyDecimal(toNullable(toDecimal64(10, 1)), toDecimal64(100, 5));
+SELECT multiplyDecimal(toDecimal64(10, 1), toNullable(toDecimal64(100, 5)));
+SELECT multiplyDecimal(toNullable(toDecimal64(10, 1)), toNullable(toDecimal64(100, 5)));
+SELECT divideDecimal(toNullable(toDecimal64(10, 1)), toDecimal64(100, 5));
+SELECT divideDecimal(toDecimal64(10, 1), toNullable(toDecimal64(100, 5)));
+SELECT divideDecimal(toNullable(toDecimal64(10, 1)), toNullable(toDecimal64(100, 5)));
diff --git a/tests/queries/0_stateless/02476_query_parameters_without_serialisation.reference b/tests/queries/0_stateless/02476_query_parameters_without_serialisation.reference
new file mode 100644
index 00000000000..3b69cb37de8
--- /dev/null
+++ b/tests/queries/0_stateless/02476_query_parameters_without_serialisation.reference
@@ -0,0 +1,9 @@
+42	hello	2022-08-04 18:30:53	{'2b95a497-3a5d-49af-bf85-15763318cde7':[1.2,3.4]}
+UInt64	String	DateTime	Map(UUID, Array(Float32))
+42	[1,2,3]	{'abc':22,'def':33}	[[4,5,6],[7],[8,9]]	{10:[11,12],13:[14,15]}	{'ghj':{'klm':[16,17]},'nop':{'rst':[18]}}
+Int64	Array(UInt8)	Map(String, UInt8)	Array(Array(UInt8))	Map(UInt8, Array(UInt8))	Map(String, Map(String, Array(UInt8)))
+5
+[[['a','b','c'],['d','e','f']],[['g','h','i'],['j','k','l']]]	Array(Array(Array(String)))
+(((1,'a','2b95a497-3a5d-49af-bf85-15763318cde7',3.14)))	Tuple(Tuple(Tuple(Int32, String, UUID, Float32)))
+[{1:(2,'2022-08-04 18:30:53','s'),3:(4,'2020-08-04 18:30:53','t')}]	Array(Map(UInt64, Tuple(Int16, DateTime, String)))
+{'a':[(1,{10:1,20:2}),(2,{30:3,40:4})],'b':[(3,{50:5,60:6}),(4,{70:7,80:8})]}	Map(String, Array(Tuple(UInt8, Map(UInt32, Int64))))
diff --git a/tests/queries/0_stateless/02476_query_parameters_without_serialisation.sql b/tests/queries/0_stateless/02476_query_parameters_without_serialisation.sql
new file mode 100644
index 00000000000..ca62e44c61e
--- /dev/null
+++ b/tests/queries/0_stateless/02476_query_parameters_without_serialisation.sql
@@ -0,0 +1,29 @@
+SET param_num=42;
+SET param_str='hello';
+SET param_date='2022-08-04 18:30:53';
+SET param_map={'2b95a497-3a5d-49af-bf85-15763318cde7': [1.2, 3.4]};
+SELECT {num:UInt64}, {str:String}, {date:DateTime}, {map:Map(UUID, Array(Float32))};
+SELECT toTypeName({num:UInt64}), toTypeName({str:String}), toTypeName({date:DateTime}), toTypeName({map:Map(UUID, Array(Float32))});
+
+SET param_id=42;
+SET param_arr=[1, 2, 3];
+SET param_map_2={'abc': 22, 'def': 33};
+SET param_mul_arr=[[4, 5, 6], [7], [8, 9]];
+SET param_map_arr={10: [11, 12], 13: [14, 15]};
+SET param_map_map_arr={'ghj': {'klm': [16, 17]}, 'nop': {'rst': [18]}};
+SELECT {id: Int64}, {arr: Array(UInt8)}, {map_2: Map(String, UInt8)}, {mul_arr: Array(Array(UInt8))}, {map_arr: Map(UInt8, Array(UInt8))}, {map_map_arr: Map(String, Map(String, Array(UInt8)))};
+SELECT toTypeName({id: Int64}), toTypeName({arr: Array(UInt8)}), toTypeName({map_2: Map(String, UInt8)}), toTypeName({mul_arr: Array(Array(UInt8))}), toTypeName({map_arr: Map(UInt8, Array(UInt8))}), toTypeName({map_map_arr: Map(String, Map(String, Array(UInt8)))});
+
+SET param_tbl=numbers;
+SET param_db=system;
+SET param_col=number;
+SELECT {col:Identifier} FROM {db:Identifier}.{tbl:Identifier} LIMIT 1 OFFSET 5;
+
+SET param_arr_arr_arr=[[['a', 'b', 'c'], ['d', 'e', 'f']], [['g', 'h', 'i'], ['j', 'k', 'l']]];
+SET param_tuple_tuple_tuple=(((1, 'a', '2b95a497-3a5d-49af-bf85-15763318cde7', 3.14)));
+SET param_arr_map_tuple=[{1:(2, '2022-08-04 18:30:53', 's'), 3:(4, '2020-08-04 18:30:53', 't')}];
+SET param_map_arr_tuple_map={'a':[(1,{10:1, 20:2}),(2, {30:3, 40:4})], 'b':[(3, {50:5, 60:6}),(4, {70:7, 80:8})]};
+SELECT {arr_arr_arr: Array(Array(Array(String)))}, toTypeName({arr_arr_arr: Array(Array(Array(String)))});
+SELECT {tuple_tuple_tuple: Tuple(Tuple(Tuple(Int32, String, UUID, Float32)))}, toTypeName({tuple_tuple_tuple: Tuple(Tuple(Tuple(Int32, String, UUID, Float32)))});
+SELECT {arr_map_tuple: Array(Map(UInt64, Tuple(Int16, DateTime, String)))}, toTypeName({arr_map_tuple: Array(Map(UInt64, Tuple(Int16, DateTime, String)))});
+SELECT {map_arr_tuple_map: Map(String, Array(Tuple(UInt8, Map(UInt32, Int64))))}, toTypeName({map_arr_tuple_map: Map(String, Array(Tuple(UInt8, Map(UInt32, Int64))))});
diff --git a/tests/queries/0_stateless/02477_exists_fuzz_43478.reference b/tests/queries/0_stateless/02477_exists_fuzz_43478.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02477_exists_fuzz_43478.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02477_exists_fuzz_43478.sql b/tests/queries/0_stateless/02477_exists_fuzz_43478.sql
new file mode 100644
index 00000000000..8ec876eb252
--- /dev/null
+++ b/tests/queries/0_stateless/02477_exists_fuzz_43478.sql
@@ -0,0 +1,3 @@
+create table test_rows_compact_part__fuzz_11 (x UInt32) engine = MergeTree order by x;
+insert into test_rows_compact_part__fuzz_11 select 1;
+select 1 from test_rows_compact_part__fuzz_11 where exists(select 1) settings allow_experimental_analyzer=1;
diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.reference b/tests/queries/0_stateless/02477_s3_request_throttler.reference
new file mode 100644
index 00000000000..9315e86b328
--- /dev/null
+++ b/tests/queries/0_stateless/02477_s3_request_throttler.reference
@@ -0,0 +1,2 @@
+1
+1	1	1
diff --git a/tests/queries/0_stateless/02477_s3_request_throttler.sh b/tests/queries/0_stateless/02477_s3_request_throttler.sh
new file mode 100755
index 00000000000..c74cb598d42
--- /dev/null
+++ b/tests/queries/0_stateless/02477_s3_request_throttler.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+# Tag no-fasttest: needs s3
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nq "
+-- Limit S3 PUT request per second rate
+SET s3_max_put_rps = 2;
+SET s3_max_put_burst = 1;
+
+CREATE TEMPORARY TABLE times (t DateTime);
+
+-- INSERT query requires 3 PUT requests and 1/rps = 0.5 second in between, the first query is not throttled due to burst
+INSERT INTO times SELECT now();
+INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/request-throttler.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1;
+INSERT INTO times SELECT now();
+
+SELECT max(t) - min(t) >= 1 FROM times;
+
+SYSTEM FLUSH LOGS;
+SELECT ProfileEvents['S3CreateMultipartUpload'] == 1,
+       ProfileEvents['S3UploadPart'] == 1,
+       ProfileEvents['S3CompleteMultipartUpload'] == 1
+FROM system.query_log
+WHERE query LIKE '%request-throttler.csv%'
+AND type = 'QueryFinish'
+AND current_database = currentDatabase()
+ORDER BY query_start_time DESC
+LIMIT 1;
+"
diff --git a/tests/queries/0_stateless/02477_single_value_data_string_regression.reference b/tests/queries/0_stateless/02477_single_value_data_string_regression.reference
new file mode 100644
index 00000000000..9285866de08
--- /dev/null
+++ b/tests/queries/0_stateless/02477_single_value_data_string_regression.reference
@@ -0,0 +1,30 @@
+1
+22.8.5.29	10
+22.8.6.71	10
+1
+22.8.5.29	52
+22.8.6.71	52
+1
+22.8.5.29	0
+22.8.6.71	0
+46_OK	0123456789012345678901234567890123456789012345
+46_KO	0123456789012345678901234567890123456789012345
+47_OK	01234567890123456789012345678901234567890123456
+47_KO	01234567890123456789012345678901234567890123456
+48_OK	012345678901234567890123456789012345678901234567
+48_KO	012345678901234567890123456789012345678901234567
+63_OK	012345678901234567890123456789012345678901234567890123456789012
+63_KO	012345678901234567890123456789012345678901234567890123456789012
+64_OK	0123456789012345678901234567890123456789012345678901234567890123
+64_KO	0123456789012345678901234567890123456789012345678901234567890123
+-1		0
+-2		0
+-2^31		0
+1M without 0	1048576
+1M with 0	1048575
+fuzz2	0123	4
+1		0
+2	\0	1
+3	\0\0\0\0	4
+4	abrac\0dabra\0	12
+abrac\0dabra\0	12
diff --git a/tests/queries/0_stateless/02477_single_value_data_string_regression.sql b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql
new file mode 100644
index 00000000000..0f11a06f3fc
--- /dev/null
+++ b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql
@@ -0,0 +1,121 @@
+
+-- Context: https://github.com/ClickHouse/ClickHouse/issues/42916
+
+-- STRING WITH 10 CHARACTERS
+-- SELECT version() AS v, hex(argMaxState('0123456789', number)) AS state FROM numbers(1) FORMAT CSV
+
+CREATE TABLE argmaxstate_hex_small
+(
+    `v` String,
+    `state` String
+)
+ENGINE = TinyLog;
+
+INSERT into argmaxstate_hex_small VALUES ('22.8.5.29','0B0000003031323334353637383900010000000000000000'), ('22.8.6.71','0A00000030313233343536373839010000000000000000');
+
+-- Assert that the current version will write the same as 22.8.5 (last known good 22.8 minor)
+SELECT
+    (SELECT hex(argMaxState('0123456789', number)) FROM numbers(1)) = state
+FROM argmaxstate_hex_small
+WHERE v = '22.8.5.29';
+
+-- Assert that the current version can read correctly both the old and the regression states
+SELECT
+    v,
+    length(finalizeAggregation(CAST(unhex(state) AS AggregateFunction(argMax, String, UInt64))))
+FROM argmaxstate_hex_small;
+
+-- STRING WITH 54 characters
+-- SELECT version() AS v, hex(argMaxState('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', number)) AS state FROM numbers(1) FORMAT CSV
+CREATE TABLE argmaxstate_hex_large
+(
+    `v` String,
+    `state` String
+)
+ENGINE = TinyLog;
+
+INSERT into argmaxstate_hex_large VALUES ('22.8.5.29','350000004142434445464748494A4B4C4D4E4F505152535455565758595A6162636465666768696A6B6C6D6E6F707172737475767778797A00010000000000000000'), ('22.8.6.71','340000004142434445464748494A4B4C4D4E4F505152535455565758595A6162636465666768696A6B6C6D6E6F707172737475767778797A010000000000000000');
+
+SELECT
+    (SELECT hex(argMaxState('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', number)) FROM numbers(1)) = state
+FROM argmaxstate_hex_large
+WHERE v = '22.8.5.29';
+
+SELECT
+    v,
+    length(finalizeAggregation(CAST(unhex(state) AS AggregateFunction(argMax, String, UInt64))))
+FROM argmaxstate_hex_large;
+
+-- STRING WITH 0 characters
+-- SELECT version() AS v, hex(argMaxState('', number)) AS state FROM numbers(1) FORMAT CSV
+CREATE TABLE argmaxstate_hex_empty
+(
+    `v` String,
+    `state` String
+)
+ENGINE = TinyLog;
+
+INSERT into argmaxstate_hex_empty VALUES ('22.8.5.29','0100000000010000000000000000'), ('22.8.6.71','00000000010000000000000000');
+
+SELECT
+    (SELECT hex(argMaxState('', number)) FROM numbers(1)) = state
+FROM argmaxstate_hex_empty
+WHERE v = '22.8.5.29';
+
+SELECT v, length(finalizeAggregation(CAST(unhex(state) AS AggregateFunction(argMax, String, UInt64))))
+FROM argmaxstate_hex_empty;
+
+-- Right in the border of small and large buffers
+-- SELECT hex(argMaxState('0123456789012345678901234567890123456789012345' as a, number)) AS state, length(a) FROM numbers(1) FORMAT CSV
+SELECT '46_OK', finalizeAggregation(CAST(unhex('2F0000003031323334353637383930313233343536373839303132333435363738393031323334353637383930313233343500010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+SELECT '46_KO', finalizeAggregation(CAST(unhex('2E00000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+
+-- SELECT hex(argMaxState('01234567890123456789012345678901234567890123456' as a, number)) AS state, length(a) FROM numbers(1) FORMAT CSV
+SELECT '47_OK', finalizeAggregation(CAST(unhex('30000000303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353600010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+SELECT '47_KO', finalizeAggregation(CAST(unhex('2F0000003031323334353637383930313233343536373839303132333435363738393031323334353637383930313233343536010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+
+-- SELECT hex(argMaxState('012345678901234567890123456789012345678901234567' as a, number)) AS state, length(a) FROM numbers(1) FORMAT CSV
+SELECT '48_OK', finalizeAggregation(CAST(unhex('3100000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363700010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+SELECT '48_KO', finalizeAggregation(CAST(unhex('30000000303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+
+-- Right in the allocation limit (power of 2)
+-- SELECT hex(argMaxState('012345678901234567890123456789012345678901234567890123456789012' as a, number)) AS state, length(a) FROM numbers(1) FORMAT CSV
+SELECT '63_OK', finalizeAggregation(CAST(unhex('4000000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313200010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+SELECT '63_KO', finalizeAggregation(CAST(unhex('3F000000303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233343536373839303132010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+-- SELECT hex(argMaxState('0123456789012345678901234567890123456789012345678901234567890123' as a, number)) AS state, length(a) FROM numbers(1) FORMAT CSV
+SELECT '64_OK', finalizeAggregation(CAST(unhex('410000003031323334353637383930313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323300010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+SELECT '64_KO', finalizeAggregation(CAST(unhex('4000000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233010000000000000000'), 'AggregateFunction(argMax, String, UInt64)'));
+
+SELECT '-1', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('ffffffff') || randomString(100500), 'AggregateFunction(max, String)') as x);
+SELECT '-2', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('fffffffe') || randomString(100500), 'AggregateFunction(max, String)') as x);
+SELECT '-2^31', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('00000080') || randomString(100500), 'AggregateFunction(max, String)') as x);
+
+SELECT '2^31-1', maxMerge(x) from (select CAST(unhex('ffffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
+
+SELECT '2^31-2', maxMerge(x) from (select CAST(unhex('feffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
+
+SELECT '2^30', maxMerge(x) from (select CAST(unhex('00000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
+SELECT '2^30+1', maxMerge(x) from (select CAST(unhex('01000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
+
+SELECT '2^30-1', maxMerge(x) from (select CAST(unhex('ffffff3f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
+-- The following query works, but it's too long and consumes to much memory
+-- SELECT '2^30-1', length(maxMerge(x)) from (select CAST(unhex('ffffff3f') || randomString(0x3FFFFFFF - 1) || 'x', 'AggregateFunction(max, String)') as x);
+SELECT '1M without 0', length(maxMerge(x)) from (select CAST(unhex('00001000') || randomString(0x00100000 - 1) || 'x', 'AggregateFunction(max, String)') as x);
+SELECT '1M with 0', length(maxMerge(x)) from (select CAST(unhex('00001000') || randomString(0x00100000 - 1) || '\0', 'AggregateFunction(max, String)') as x);
+
+SELECT 'fuzz1', finalizeAggregation(CAST(unhex('3000000\0303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353600010000000000000000'), 'AggregateFunction(argMax, String, UInt64)')); -- { serverError CORRUPTED_DATA }
+SELECT 'fuzz2', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '01' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x);
+SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA }
+SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA }
+SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError CORRUPTED_DATA }
+
+
+drop table if exists aggr;
+create table aggr (n int, s AggregateFunction(max, String)) engine=MergeTree order by n;
+insert into aggr select 1, maxState('');
+insert into aggr select 2, maxState('\0');
+insert into aggr select 3, maxState('\0\0\0\0');
+insert into aggr select 4, maxState('abrac\0dabra\0');
+select n, maxMerge(s) as x, length(x) from aggr group by n order by n;
+select maxMerge(s) as x, length(x) from aggr;
+drop table aggr;
diff --git a/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.reference b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.reference
new file mode 100644
index 00000000000..7c5d87e1389
--- /dev/null
+++ b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.reference
@@ -0,0 +1,8 @@
+0
+0
+
+0
+((0.0001))	0
+((0.0001))	0
+
+((0.0001))	0
diff --git a/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.sql b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.sql
new file mode 100644
index 00000000000..6cd3e6a9385
--- /dev/null
+++ b/tests/queries/0_stateless/02479_analyzer_aggregation_totals_rollup_crash_fix.sql
@@ -0,0 +1,5 @@
+SET allow_experimental_analyzer = 1;
+
+SELECT anyLast(number) FROM numbers(1) GROUP BY number WITH ROLLUP WITH TOTALS;
+
+SELECT tuple(tuple(0.0001)), anyLast(number) FROM numbers(1) GROUP BY number WITH ROLLUP WITH TOTALS;
diff --git a/tests/queries/0_stateless/02480_analyzer_alias_nullptr.reference b/tests/queries/0_stateless/02480_analyzer_alias_nullptr.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02480_analyzer_alias_nullptr.sql b/tests/queries/0_stateless/02480_analyzer_alias_nullptr.sql
new file mode 100644
index 00000000000..f6b381e5c70
--- /dev/null
+++ b/tests/queries/0_stateless/02480_analyzer_alias_nullptr.sql
@@ -0,0 +1,3 @@
+SET allow_experimental_analyzer = 1;
+
+SELECT min(b), x AS b FROM (SELECT max(number) FROM numbers(1)); -- { serverError UNKNOWN_IDENTIFIER }
diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.reference b/tests/queries/0_stateless/02480_max_map_null_totals.reference
new file mode 100644
index 00000000000..5cc9b5a495f
--- /dev/null
+++ b/tests/queries/0_stateless/02480_max_map_null_totals.reference
@@ -0,0 +1,119 @@
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+
+([0,2],[0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+([0,2],[0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+([0,2],[0,2])
+-
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+
+([0,2],[0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+([0,2],[0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+([0,2],[0,2])
+-
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([-1,0],[0,0])
+([1,2],[0,2])
+([0,1],[0,1])
+([-1,0,1,2],[0,0,0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+
+([0,2],[0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+([0,2],[0,2])
+([0],[0])
+([2],[2])
+([1],[1])
+([0,2],[0,2])
diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.sql b/tests/queries/0_stateless/02480_max_map_null_totals.sql
new file mode 100644
index 00000000000..81e2a5c4243
--- /dev/null
+++ b/tests/queries/0_stateless/02480_max_map_null_totals.sql
@@ -0,0 +1,39 @@
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT '-';
+
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT '-';
+
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
diff --git a/tests/queries/0_stateless/02480_s3_support_wildcard.reference b/tests/queries/0_stateless/02480_s3_support_wildcard.reference
new file mode 100644
index 00000000000..c6b63f647f8
--- /dev/null
+++ b/tests/queries/0_stateless/02480_s3_support_wildcard.reference
@@ -0,0 +1,43 @@
+-- { echo }
+drop table if exists test_02480_write;
+drop table if exists test_02480_write2;
+create table test_02480_write (a UInt64, b String) engine = S3(s3_conn, filename='test_02480_{_partition_id}', format=Parquet) partition by a;
+set s3_truncate_on_insert=1;
+insert into test_02480_write values (1, 'a'), (22, 'b'), (333, 'c');
+select a, b from s3(s3_conn, filename='test_02480_*', format=Parquet) order by a;
+1	a
+22	b
+333	c
+select a, b from s3(s3_conn, filename='test_02480_?', format=Parquet) order by a;
+1	a
+select a, b from s3(s3_conn, filename='test_02480_??', format=Parquet) order by a;
+22	b
+select a, b from s3(s3_conn, filename='test_02480_?*?', format=Parquet) order by a;
+22	b
+333	c
+select a, b from s3(s3_conn, filename='test_02480_{1,333}', format=Parquet) order by a;
+1	a
+333	c
+select a, b from s3(s3_conn, filename='test_02480_{1..333}', format=Parquet) order by a;
+1	a
+22	b
+333	c
+create table test_02480_write2 (a UInt64, b String) engine = S3(s3_conn, filename='prefix/test_02480_{_partition_id}', format=Parquet) partition by a;
+set s3_truncate_on_insert=1;
+insert into test_02480_write2 values (4, 'd'), (55, 'f'), (666, 'g');
+select a, b from s3(s3_conn, filename='*/test_02480_*', format=Parquet) order by a;
+4	d
+55	f
+666	g
+select a, b from s3(s3_conn, filename='*/test_02480_?', format=Parquet) order by a;
+4	d
+select a, b from s3(s3_conn, filename='prefix/test_02480_??', format=Parquet) order by a;
+55	f
+select a, b from s3(s3_conn, filename='prefi?/test_02480_*', format=Parquet) order by a;
+4	d
+55	f
+666	g
+select a, b from s3(s3_conn, filename='p?*/test_02480_{56..666}', format=Parquet) order by a;
+666	g
+drop table test_02480_write;
+drop table test_02480_write2;
diff --git a/tests/queries/0_stateless/02480_s3_support_wildcard.sql b/tests/queries/0_stateless/02480_s3_support_wildcard.sql
new file mode 100644
index 00000000000..9da5a022dc4
--- /dev/null
+++ b/tests/queries/0_stateless/02480_s3_support_wildcard.sql
@@ -0,0 +1,29 @@
+-- Tags: no-parallel, no-fasttest
+-- Tag no-fasttest: Depends on AWS
+
+-- { echo }
+drop table if exists test_02480_write;
+drop table if exists test_02480_write2;
+create table test_02480_write (a UInt64, b String) engine = S3(s3_conn, filename='test_02480_{_partition_id}', format=Parquet) partition by a;
+set s3_truncate_on_insert=1;
+insert into test_02480_write values (1, 'a'), (22, 'b'), (333, 'c');
+
+select a, b from s3(s3_conn, filename='test_02480_*', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='test_02480_?', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='test_02480_??', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='test_02480_?*?', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='test_02480_{1,333}', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='test_02480_{1..333}', format=Parquet) order by a;
+
+create table test_02480_write2 (a UInt64, b String) engine = S3(s3_conn, filename='prefix/test_02480_{_partition_id}', format=Parquet) partition by a;
+set s3_truncate_on_insert=1;
+insert into test_02480_write2 values (4, 'd'), (55, 'f'), (666, 'g');
+
+select a, b from s3(s3_conn, filename='*/test_02480_*', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='*/test_02480_?', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='prefix/test_02480_??', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='prefi?/test_02480_*', format=Parquet) order by a;
+select a, b from s3(s3_conn, filename='p?*/test_02480_{56..666}', format=Parquet) order by a;
+
+drop table test_02480_write;
+drop table test_02480_write2;
diff --git a/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.reference b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.reference
new file mode 100644
index 00000000000..0a9ed2fb4c8
--- /dev/null
+++ b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.reference
@@ -0,0 +1 @@
+String	Value_1
diff --git a/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.sql b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.sql
new file mode 100644
index 00000000000..b0983159eaf
--- /dev/null
+++ b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.sql
@@ -0,0 +1,36 @@
+SET allow_experimental_analyzer = 1;
+
+DROP TABLE IF EXISTS test_table_join_1;
+CREATE TABLE test_table_join_1
+(
+    id UInt8,
+    value String
+)
+ENGINE = TinyLog;
+
+INSERT INTO test_table_join_1 VALUES (0, 'Value_0');
+
+DROP TABLE IF EXISTS test_table_join_2;
+CREATE TABLE test_table_join_2
+(
+    id UInt16,
+    value String
+)
+ENGINE = TinyLog;
+
+INSERT INTO test_table_join_2 VALUES (0, 'Value_1');
+
+SELECT
+    toTypeName(t2_value),
+    t2.value AS t2_value
+FROM test_table_join_1 AS t1
+INNER JOIN test_table_join_2 USING (id); -- { serverError 47 };
+
+SELECT
+    toTypeName(t2_value),
+    t2.value AS t2_value
+FROM test_table_join_1 AS t1
+INNER JOIN test_table_join_2 AS t2 USING (id);
+
+DROP TABLE test_table_join_1;
+DROP TABLE test_table_join_2;
diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.reference b/tests/queries/0_stateless/02481_async_insert_race_long.reference
new file mode 100644
index 00000000000..d86bac9de59
--- /dev/null
+++ b/tests/queries/0_stateless/02481_async_insert_race_long.reference
@@ -0,0 +1 @@
+OK
diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh
new file mode 100755
index 00000000000..cec9278c127
--- /dev/null
+++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Tags: no-random-settings, no-fasttest, long
+
+set -e
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+export MY_CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --async_insert_busy_timeout_ms 10 --async_insert_max_data_size 1 --async_insert 1"
+
+function insert1()
+{
+    while true; do
+        ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT CSV 1,"a"'
+    done
+}
+
+function insert2()
+{
+    while true; do
+        ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}'
+    done
+}
+
+function insert3()
+{
+    while true; do
+        ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" &
+        sleep 0.05
+    done
+}
+
+function select1()
+{
+    while true; do
+        ${MY_CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts_race FORMAT Null"
+    done
+
+}
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts_race"
+${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts_race (id UInt32, s String) ENGINE = MergeTree ORDER BY id"
+
+TIMEOUT=10
+
+export -f insert1
+export -f insert2
+export -f insert3
+export -f select1
+
+for _ in {1..3}; do
+    timeout $TIMEOUT bash -c insert1 &
+    timeout $TIMEOUT bash -c insert2 &
+    timeout $TIMEOUT bash -c insert3 &
+done
+
+timeout $TIMEOUT bash -c select1 &
+
+wait
+echo "OK"
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts_race";
diff --git a/tests/queries/0_stateless/02481_default_value_used_in_row_level_filter.reference b/tests/queries/0_stateless/02481_default_value_used_in_row_level_filter.reference
new file mode 100644
index 00000000000..c8e17be819a
--- /dev/null
+++ b/tests/queries/0_stateless/02481_default_value_used_in_row_level_filter.reference
@@ -0,0 +1,16 @@
+-- { echoOn }
+
+SELECT a, c FROM test_rlp WHERE c%2 == 0 AND b < 5;
+0	10
+2	12
+4	14
+DROP POLICY IF EXISTS test_rlp_policy ON test_rlp;
+CREATE ROW POLICY test_rlp_policy ON test_rlp FOR SELECT USING c%2 == 0 TO default;
+SELECT a, c FROM test_rlp WHERE b < 5 SETTINGS optimize_move_to_prewhere = 0;
+0	10
+2	12
+4	14
+SELECT a, c FROM test_rlp PREWHERE b < 5;
+0	10
+2	12
+4	14
diff --git a/tests/queries/0_stateless/02481_default_value_used_in_row_level_filter.sql b/tests/queries/0_stateless/02481_default_value_used_in_row_level_filter.sql
new file mode 100644
index 00000000000..6835a3a57ea
--- /dev/null
+++ b/tests/queries/0_stateless/02481_default_value_used_in_row_level_filter.sql
@@ -0,0 +1,25 @@
+DROP TABLE IF EXISTS test_rlp;
+
+CREATE TABLE test_rlp (a Int32, b Int32) ENGINE=MergeTree() ORDER BY a SETTINGS index_granularity=5;
+
+INSERT INTO test_rlp SELECT number, number FROM numbers(15);
+
+ALTER TABLE test_rlp ADD COLUMN c Int32 DEFAULT b+10;
+
+-- { echoOn }
+
+SELECT a, c FROM test_rlp WHERE c%2 == 0 AND b < 5;
+
+DROP POLICY IF EXISTS test_rlp_policy ON test_rlp;
+
+CREATE ROW POLICY test_rlp_policy ON test_rlp FOR SELECT USING c%2 == 0 TO default;
+
+SELECT a, c FROM test_rlp WHERE b < 5 SETTINGS optimize_move_to_prewhere = 0;
+
+SELECT a, c FROM test_rlp PREWHERE b < 5;
+
+-- { echoOff }
+
+DROP POLICY test_rlp_policy ON test_rlp;
+
+DROP TABLE test_rlp;
diff --git a/tests/queries/0_stateless/02481_fix_parameters_parsing.reference b/tests/queries/0_stateless/02481_fix_parameters_parsing.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02481_fix_parameters_parsing.sql b/tests/queries/0_stateless/02481_fix_parameters_parsing.sql
new file mode 100644
index 00000000000..6164ec77774
--- /dev/null
+++ b/tests/queries/0_stateless/02481_fix_parameters_parsing.sql
@@ -0,0 +1,2 @@
+SELECT func(1)(2)(3); -- { clientError SYNTAX_ERROR }
+SELECT * FROM VALUES(1)(2); -- { clientError SYNTAX_ERROR }
diff --git a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference
new file mode 100644
index 00000000000..3e3abfb9a41
--- /dev/null
+++ b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.reference
@@ -0,0 +1 @@
+0123456789012345678901234567890123456789012345678901234567890123
diff --git a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql
new file mode 100644
index 00000000000..c893e49fed3
--- /dev/null
+++ b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql
@@ -0,0 +1,7 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/43247
+SELECT finalizeAggregation(CAST('AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)',
+                           'AggregateFunction(min, String)')); -- { serverError CANNOT_READ_ALL_DATA }
+
+-- Value from hex(minState('0123456789012345678901234567890123456789012345678901234567890123')). Size 63 + 1 (64)
+SELECT finalizeAggregation(CAST(unhex('4000000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233'),
+                           'AggregateFunction(min, String)'));
diff --git a/tests/queries/0_stateless/02481_merge_array_join_sample_by.reference b/tests/queries/0_stateless/02481_merge_array_join_sample_by.reference
new file mode 100644
index 00000000000..a6635b9ba11
--- /dev/null
+++ b/tests/queries/0_stateless/02481_merge_array_join_sample_by.reference
@@ -0,0 +1,2 @@
+199998
+199998
diff --git a/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql b/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql
new file mode 100644
index 00000000000..39fc751f331
--- /dev/null
+++ b/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS 02481_mergetree;
+DROP TABLE IF EXISTS 02481_merge;
+
+CREATE TABLE 02481_mergetree(x UInt64, y UInt64, arr Array(String)) ENGINE = MergeTree ORDER BY x SAMPLE BY x;
+
+CREATE TABLE 02481_merge(x UInt64, y UInt64, arr Array(String)) ENGINE = Merge(currentDatabase(), '^(02481_mergetree)$');
+
+INSERT INTO 02481_mergetree SELECT number, number + 1, [1,2] FROM system.numbers LIMIT 100000;
+
+SELECT count() FROM 02481_mergetree SAMPLE 1 / 2 ARRAY JOIN arr WHERE x != 0;
+SELECT count() FROM 02481_merge SAMPLE 1 / 2 ARRAY JOIN arr WHERE x != 0;
+
+DROP TABLE 02481_mergetree;
+DROP TABLE 02481_merge;
diff --git a/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.reference b/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.reference
new file mode 100644
index 00000000000..285856e363a
--- /dev/null
+++ b/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.reference
@@ -0,0 +1,3 @@
+Parquet
+3d94071a2fe62a3b3285f170ca6f42e5  -
+70000
diff --git a/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.sh b/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.sh
new file mode 100755
index 00000000000..c2c6f689851
--- /dev/null
+++ b/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Tags: no-ubsan, no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo "Parquet"
+
+# File generated with the below script
+
+#import pyarrow as pa
+#import pyarrow.parquet as pq
+#import random
+#
+#
+#def gen_array(offset):
+#	array = []
+#	array_length = random.randint(0, 9)
+#	for i in range(array_length):
+#		array.append(i + offset)
+#
+#	return array
+#
+#
+#def gen_arrays(number_of_arrays):
+#	list_of_arrays = []
+#	for i in range(number_of_arrays):
+#		list_of_arrays.append(gen_array(i))
+#	return list_of_arrays
+#
+#arr = pa.array(gen_arrays(70000))
+#table  = pa.table([arr], ["arr"])
+#pq.write_table(table, "int-list-zero-based-chunked-array.parquet")
+
+DATA_FILE=$CUR_DIR/data_parquet/int-list-zero-based-chunked-array.parquet
+${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
+${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (arr Array(Int64)) ENGINE = Memory"
+cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum
+${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load"
+${CLICKHOUSE_CLIENT} --query="drop table parquet_load"
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.reference b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.reference
new file mode 100644
index 00000000000..2db066c0f87
--- /dev/null
+++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.reference
@@ -0,0 +1,3 @@
+Parquet
+e1cfe4265689ead763b18489b363344d  -
+39352
diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
new file mode 100755
index 00000000000..47245eeb940
--- /dev/null
+++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+# Tags: no-ubsan, no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo "Parquet"
+
+DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet
+${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
+${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory"
+cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum
+${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load"
+${CLICKHOUSE_CLIENT} --query="drop table parquet_load"
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.reference b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.reference
new file mode 100644
index 00000000000..b6a7d89c68e
--- /dev/null
+++ b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.reference
@@ -0,0 +1 @@
+16
diff --git a/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql
new file mode 100644
index 00000000000..91402bbed60
--- /dev/null
+++ b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql
@@ -0,0 +1,23 @@
+CREATE TABLE gen
+(
+   repo_name String,
+   event_type Enum8('CommitCommentEvent' = 1, 'CreateEvent' = 2, 'DeleteEvent' = 3, 'ForkEvent' = 4, 'GollumEvent' = 5, 'IssueCommentEvent' = 6, 'IssuesEvent' = 7, 'MemberEvent' = 8, 'PublicEvent' = 9, 'PullRequestEvent' = 10, 'PullRequestReviewCommentEvent' = 11, 'PushEvent' = 12, 'ReleaseEvent' = 13, 'SponsorshipEvent' = 14, 'WatchEvent' = 15, 'GistEvent' = 16, 'FollowEvent' = 17, 'DownloadEvent' = 18, 'PullRequestReviewEvent' = 19, 'ForkApplyEvent' = 20, 'Event' = 21, 'TeamAddEvent' = 22),
+   actor_login String,
+   created_at DateTime,
+   action Enum8('none' = 0, 'created' = 1, 'added' = 2, 'edited' = 3, 'deleted' = 4, 'opened' = 5, 'closed' = 6, 'reopened' = 7, 'assigned' = 8, 'unassigned' = 9, 'labeled' = 10, 'unlabeled' = 11, 'review_requested' = 12, 'review_request_removed' = 13, 'synchronize' = 14, 'started' = 15, 'published' = 16, 'update' = 17, 'create' = 18, 'fork' = 19, 'merged' = 20),
+   number UInt32,
+   merged_at DateTime
+)
+ENGINE = GenerateRandom;
+
+CREATE TABLE github_events AS gen ENGINE=MergeTree ORDER BY (event_type, repo_name, created_at);
+
+INSERT INTO github_events SELECT * FROM gen LIMIT 100000;
+
+INSERT INTO github_events VALUES ('apache/pulsar','PullRequestEvent','hangc0276','2021-01-22 06:58:03','opened',9276,'1970-01-01 00:00:00') ('apache/pulsar','PullRequestEvent','hangc0276','2021-01-25 02:38:07','closed',9276,'1970-01-01 00:00:00') ('apache/pulsar','PullRequestEvent','hangc0276','2021-01-25 02:38:09','reopened',9276,'1970-01-01 00:00:00') ('apache/pulsar','PullRequestEvent','hangc0276','2021-04-22 06:05:09','closed',9276,'2021-04-22 06:05:08') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-01-23 00:32:09','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-01-23 02:52:11','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-01-24 03:02:31','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-01-25 02:16:42','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-01-26 06:52:42','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-01-27 01:10:33','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-01-29 02:11:41','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-02-02 07:35:40','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-02-03 00:44:26','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','IssueCommentEvent','hangc0276','2021-02-03 02:14:26','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','PullRequestReviewEvent','codelipenghui','2021-03-29 14:31:25','created',9276,'1970-01-01 00:00:00') ('apache/pulsar','PullRequestReviewEvent','eolivelli','2021-03-29 16:34:02','created',9276,'1970-01-01 00:00:00');
+
+OPTIMIZE TABLE github_events FINAL;
+
+SELECT count()
+FROM github_events
+WHERE (repo_name = 'apache/pulsar') AND (toString(event_type) IN ('PullRequestEvent', 'PullRequestReviewCommentEvent', 'PullRequestReviewEvent', 'IssueCommentEvent')) AND (actor_login NOT IN ('github-actions[bot]', 'codecov-commenter')) AND (number = 9276);
diff --git a/tests/queries/0_stateless/02481_xxh3_hash_function.reference b/tests/queries/0_stateless/02481_xxh3_hash_function.reference
new file mode 100644
index 00000000000..73276fe135e
--- /dev/null
+++ b/tests/queries/0_stateless/02481_xxh3_hash_function.reference
@@ -0,0 +1 @@
+18009318874338624809
diff --git a/tests/queries/0_stateless/02481_xxh3_hash_function.sql b/tests/queries/0_stateless/02481_xxh3_hash_function.sql
new file mode 100644
index 00000000000..cd87f08a68e
--- /dev/null
+++ b/tests/queries/0_stateless/02481_xxh3_hash_function.sql
@@ -0,0 +1 @@
+SELECT xxh3('ClickHouse');
diff --git a/tests/queries/0_stateless/02482_if_with_nothing_argument.reference b/tests/queries/0_stateless/02482_if_with_nothing_argument.reference
new file mode 100644
index 00000000000..484c9fb68b5
--- /dev/null
+++ b/tests/queries/0_stateless/02482_if_with_nothing_argument.reference
@@ -0,0 +1,2 @@
+[]	0
+[]	0
diff --git a/tests/queries/0_stateless/02482_if_with_nothing_argument.sql b/tests/queries/0_stateless/02482_if_with_nothing_argument.sql
new file mode 100644
index 00000000000..af46ef30d02
--- /dev/null
+++ b/tests/queries/0_stateless/02482_if_with_nothing_argument.sql
@@ -0,0 +1,3 @@
+select [] as arr, if(empty(arr), 0, arr[-1]);
+select [] as arr, multiIf(empty(arr), 0, length(arr) > 1, arr[-1], 0);
+
diff --git a/tests/queries/0_stateless/02482_insert_into_dist_race.reference b/tests/queries/0_stateless/02482_insert_into_dist_race.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02482_insert_into_dist_race.sql b/tests/queries/0_stateless/02482_insert_into_dist_race.sql
new file mode 100644
index 00000000000..ee412a87d33
--- /dev/null
+++ b/tests/queries/0_stateless/02482_insert_into_dist_race.sql
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS tmp_02482;
+DROP TABLE IF EXISTS dist_02482;
+
+-- This test produces warning
+SET send_logs_level = 'error';
+SET prefer_localhost_replica=0;
+
+CREATE TABLE tmp_02482 (i UInt64, n LowCardinality(String)) ENGINE = Memory;
+CREATE TABLE dist_02482(i UInt64, n LowCardinality(Nullable(String))) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), tmp_02482, i);
+
+SET insert_distributed_sync=1;
+
+INSERT INTO dist_02482 VALUES (1, '1'), (2, '2');
+INSERT INTO dist_02482 SELECT number, number FROM numbers(1000);
+
+SET insert_distributed_sync=0;
+
+SYSTEM STOP DISTRIBUTED SENDS dist_02482;
+
+INSERT INTO dist_02482 VALUES (1, '1'),(2, '2');
+INSERT INTO dist_02482 SELECT number, number FROM numbers(1000);
+
+SYSTEM FLUSH DISTRIBUTED dist_02482;
+
+DROP TABLE tmp_02482;
+DROP TABLE dist_02482;
diff --git a/tests/queries/0_stateless/02483_substitute_udf_create.reference b/tests/queries/0_stateless/02483_substitute_udf_create.reference
new file mode 100644
index 00000000000..ea07b63e068
--- /dev/null
+++ b/tests/queries/0_stateless/02483_substitute_udf_create.reference
@@ -0,0 +1,33 @@
+-- { echo }
+CREATE FUNCTION 02483_plusone AS (a) -> a + 1;
+CREATE TABLE 02483_substitute_udf (id UInt32, number UInt32 DEFAULT 02483_plusone(id)) ENGINE=MergeTree() ORDER BY id;
+DESC TABLE 02483_substitute_udf;
+id	UInt32					
+number	UInt32	DEFAULT	id + 1			
+INSERT INTO 02483_substitute_udf (id, number) VALUES (1, NULL);
+SELECT * FROM 02483_substitute_udf ORDER BY id;
+1	2
+CREATE FUNCTION 02483_plustwo AS (a) -> a + 2;
+ALTER TABLE 02483_substitute_udf MODIFY COLUMN number UInt32 DEFAULT 02483_plustwo(id);
+DESC TABLE 02483_substitute_udf;
+id	UInt32					
+number	UInt32	DEFAULT	id + 2			
+INSERT INTO 02483_substitute_udf (id, number) VALUES (5, NULL);
+SELECT * FROM 02483_substitute_udf ORDER BY id;
+1	2
+5	7
+CREATE FUNCTION 02483_plusthree AS (a) -> a + 3;
+ALTER TABLE 02483_substitute_udf DROP COLUMN number;
+ALTER TABLE 02483_substitute_udf ADD COLUMN new_number UInt32 DEFAULT 02483_plusthree(id);
+DESC TABLE 02483_substitute_udf;
+id	UInt32					
+new_number	UInt32	DEFAULT	id + 3			
+INSERT INTO 02483_substitute_udf (id, new_number) VALUES (10, NULL);
+SELECT * FROM 02483_substitute_udf ORDER BY id;
+1	4
+5	8
+10	13
+DROP TABLE 02483_substitute_udf;
+DROP FUNCTION 02483_plusone;
+DROP FUNCTION 02483_plustwo;
+DROP FUNCTION 02483_plusthree;
diff --git a/tests/queries/0_stateless/02483_substitute_udf_create.sql b/tests/queries/0_stateless/02483_substitute_udf_create.sql
new file mode 100644
index 00000000000..9cfb198cf4c
--- /dev/null
+++ b/tests/queries/0_stateless/02483_substitute_udf_create.sql
@@ -0,0 +1,31 @@
+-- Tags: no-parallel
+
+DROP TABLE IF EXISTS 02483_substitute_udf;
+DROP FUNCTION IF EXISTS 02483_plusone;
+DROP FUNCTION IF EXISTS 02483_plustwo;
+DROP FUNCTION IF EXISTS 02483_plusthree;
+
+-- { echo }
+CREATE FUNCTION 02483_plusone AS (a) -> a + 1;
+CREATE TABLE 02483_substitute_udf (id UInt32, number UInt32 DEFAULT 02483_plusone(id)) ENGINE=MergeTree() ORDER BY id;
+DESC TABLE 02483_substitute_udf;
+INSERT INTO 02483_substitute_udf (id, number) VALUES (1, NULL);
+SELECT * FROM 02483_substitute_udf ORDER BY id;
+
+CREATE FUNCTION 02483_plustwo AS (a) -> a + 2;
+ALTER TABLE 02483_substitute_udf MODIFY COLUMN number UInt32 DEFAULT 02483_plustwo(id);
+DESC TABLE 02483_substitute_udf;
+INSERT INTO 02483_substitute_udf (id, number) VALUES (5, NULL);
+SELECT * FROM 02483_substitute_udf ORDER BY id;
+
+CREATE FUNCTION 02483_plusthree AS (a) -> a + 3;
+ALTER TABLE 02483_substitute_udf DROP COLUMN number;
+ALTER TABLE 02483_substitute_udf ADD COLUMN new_number UInt32 DEFAULT 02483_plusthree(id);
+DESC TABLE 02483_substitute_udf;
+INSERT INTO 02483_substitute_udf (id, new_number) VALUES (10, NULL);
+SELECT * FROM 02483_substitute_udf ORDER BY id;
+
+DROP TABLE 02483_substitute_udf;
+DROP FUNCTION 02483_plusone;
+DROP FUNCTION 02483_plustwo;
+DROP FUNCTION 02483_plusthree;
diff --git a/tests/queries/0_stateless/02484_substitute_udf_storage_args.reference b/tests/queries/0_stateless/02484_substitute_udf_storage_args.reference
new file mode 100644
index 00000000000..6a799b1e013
--- /dev/null
+++ b/tests/queries/0_stateless/02484_substitute_udf_storage_args.reference
@@ -0,0 +1,23 @@
+-- { echo }
+CREATE TABLE 02484_substitute_udf (id UInt32, dt DateTime, number UInt32) 
+ENGINE=MergeTree() 
+ORDER BY 02484_plusone(id)
+PARTITION BY 02484_plustwo(id)
+SAMPLE BY 02484_plusone(id)
+TTL 02484_plusthreemonths(dt);
+SHOW CREATE TABLE 02484_substitute_udf;
+CREATE TABLE default.`02484_substitute_udf`\n(\n    `id` UInt32,\n    `dt` DateTime,\n    `number` UInt32\n)\nENGINE = MergeTree\nPARTITION BY id + 2\nORDER BY id + 1\nSAMPLE BY id + 1\nTTL dt + toIntervalMonth(3)\nSETTINGS index_granularity = 8192
+CREATE FUNCTION 02484_plusthree AS (a) -> a + 3;
+ALTER TABLE 02484_substitute_udf ADD COLUMN id2 UInt64, MODIFY ORDER BY (02484_plusone(id), 02484_plusthree(id2));
+SHOW CREATE TABLE 02484_substitute_udf;
+CREATE TABLE default.`02484_substitute_udf`\n(\n    `id` UInt32,\n    `dt` DateTime,\n    `number` UInt32,\n    `id2` UInt64\n)\nENGINE = MergeTree\nPARTITION BY id + 2\nPRIMARY KEY id + 1\nORDER BY (id + 1, id2 + 3)\nSAMPLE BY id + 1\nTTL dt + toIntervalMonth(3)\nSETTINGS index_granularity = 8192
+CREATE FUNCTION 02484_plusthreedays AS (a) -> a + INTERVAL 3 DAY;
+ALTER TABLE 02484_substitute_udf MODIFY TTL 02484_plusthreedays(dt);
+SHOW CREATE TABLE 02484_substitute_udf;
+CREATE TABLE default.`02484_substitute_udf`\n(\n    `id` UInt32,\n    `dt` DateTime,\n    `number` UInt32,\n    `id2` UInt64\n)\nENGINE = MergeTree\nPARTITION BY id + 2\nPRIMARY KEY id + 1\nORDER BY (id + 1, id2 + 3)\nSAMPLE BY id + 1\nTTL dt + toIntervalDay(3)\nSETTINGS index_granularity = 8192
+DROP TABLE 02484_substitute_udf;
+DROP FUNCTION 02484_plusone;
+DROP FUNCTION 02484_plustwo;
+DROP FUNCTION 02484_plusthree;
+DROP FUNCTION 02484_plusthreemonths;
+DROP FUNCTION 02484_plusthreedays;
diff --git a/tests/queries/0_stateless/02484_substitute_udf_storage_args.sql b/tests/queries/0_stateless/02484_substitute_udf_storage_args.sql
new file mode 100644
index 00000000000..a39c6009d58
--- /dev/null
+++ b/tests/queries/0_stateless/02484_substitute_udf_storage_args.sql
@@ -0,0 +1,37 @@
+-- Tags: no-parallel
+
+DROP TABLE IF EXISTS 02484_substitute_udf;
+DROP FUNCTION IF EXISTS 02484_plusone;
+DROP FUNCTION IF EXISTS 02484_plustwo;
+DROP FUNCTION IF EXISTS 02484_plusthree;
+DROP FUNCTION IF EXISTS 02484_plusthreemonths;
+DROP FUNCTION IF EXISTS 02484_plusthreedays;
+
+CREATE FUNCTION 02484_plusone AS (a) -> a + 1;
+CREATE FUNCTION 02484_plustwo AS (a) -> a + 2;
+CREATE FUNCTION 02484_plusthreemonths AS (a) -> a + INTERVAL 3 MONTH;
+
+-- { echo }
+CREATE TABLE 02484_substitute_udf (id UInt32, dt DateTime, number UInt32) 
+ENGINE=MergeTree() 
+ORDER BY 02484_plusone(id)
+PARTITION BY 02484_plustwo(id)
+SAMPLE BY 02484_plusone(id)
+TTL 02484_plusthreemonths(dt);
+
+SHOW CREATE TABLE 02484_substitute_udf;
+
+CREATE FUNCTION 02484_plusthree AS (a) -> a + 3;
+ALTER TABLE 02484_substitute_udf ADD COLUMN id2 UInt64, MODIFY ORDER BY (02484_plusone(id), 02484_plusthree(id2));
+SHOW CREATE TABLE 02484_substitute_udf;
+
+CREATE FUNCTION 02484_plusthreedays AS (a) -> a + INTERVAL 3 DAY;
+ALTER TABLE 02484_substitute_udf MODIFY TTL 02484_plusthreedays(dt);
+SHOW CREATE TABLE 02484_substitute_udf;
+
+DROP TABLE 02484_substitute_udf;
+DROP FUNCTION 02484_plusone;
+DROP FUNCTION 02484_plustwo;
+DROP FUNCTION 02484_plusthree;
+DROP FUNCTION 02484_plusthreemonths;
+DROP FUNCTION 02484_plusthreedays;
diff --git a/tests/queries/0_stateless/02491_part_log_has_table_uuid.reference b/tests/queries/0_stateless/02491_part_log_has_table_uuid.reference
new file mode 100644
index 00000000000..fbc09700fe6
--- /dev/null
+++ b/tests/queries/0_stateless/02491_part_log_has_table_uuid.reference
@@ -0,0 +1,4 @@
+1	NewPart	NotAMerge	all_1_1_0
+1	MergeParts	RegularMerge	all_1_1_1
+1	NewPart	NotAMerge	all_1_1_2
+1	RemovePart	NotAMerge	all_1_1_1
diff --git a/tests/queries/0_stateless/02491_part_log_has_table_uuid.sql b/tests/queries/0_stateless/02491_part_log_has_table_uuid.sql
new file mode 100644
index 00000000000..1d18962443c
--- /dev/null
+++ b/tests/queries/0_stateless/02491_part_log_has_table_uuid.sql
@@ -0,0 +1,22 @@
+-- Tags: no-ordinary-database
+
+create table data_02491 (key Int) engine=MergeTree() order by tuple();
+insert into data_02491 values (1);
+optimize table data_02491 final;
+truncate table data_02491;
+
+system flush logs;
+with (select uuid from system.tables where database = currentDatabase() and table = 'data_02491') as table_uuid_
+select
+    table_uuid != toUUIDOrDefault(Null),
+    event_type,
+    merge_reason,
+    part_name
+from system.part_log
+where
+    database = currentDatabase() and
+    table = 'data_02491' and
+    table_uuid = table_uuid_
+order by event_time_microseconds;
+
+drop table data_02491;
diff --git a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh
new file mode 100755
index 00000000000..00e7a426fd0
--- /dev/null
+++ b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# This is the regression test for clickhouse-local, that may use already freed
+# context from the suggestion thread on error.
+
+grep_options=(
+    -e ^$
+    -e 'Cannot create file: /no/such/directory'
+    -e 'Cannot load data for command line suggestions:'
+    -e 'ClickHouse local version'
+)
+
+ASAN_OPTIONS=$ASAN_OPTIONS:exitcode=3 $CLICKHOUSE_LOCAL --history_file /no/such/directory |& grep -v "${grep_options[@]}"
+# on sanitizer error the code will be not 1, but 3
+echo $?
diff --git a/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.reference b/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.reference
new file mode 100644
index 00000000000..eccf51501ed
--- /dev/null
+++ b/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.reference
@@ -0,0 +1,77 @@
+QUERY id: 0
+  PROJECTION COLUMNS
+    sumIf(1, equals(modulo(number, 2), 0)) UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: countIf, function_type: aggregate, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 5, nodes: 2
+                  FUNCTION id: 6, function_name: modulo, function_type: ordinary, result_type: UInt8
+                    ARGUMENTS
+                      LIST id: 7, nodes: 2
+                        COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
+                        CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8
+                  CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 9, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 12, nodes: 1
+          CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
+--
+5
+--
+QUERY id: 0
+  PROJECTION COLUMNS
+    sum(if(equals(modulo(number, 2), 0), 1, 0)) UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: countIf, function_type: aggregate, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 5, nodes: 2
+                  FUNCTION id: 6, function_name: modulo, function_type: ordinary, result_type: UInt8
+                    ARGUMENTS
+                      LIST id: 7, nodes: 2
+                        COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
+                        CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8
+                  CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 9, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 12, nodes: 1
+          CONSTANT id: 13, constant_value: UInt64_10, constant_value_type: UInt8
+--
+5
+--
+QUERY id: 0
+  PROJECTION COLUMNS
+    sum(if(equals(modulo(number, 2), 0), 0, 1)) UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: countIf, function_type: aggregate, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            FUNCTION id: 4, function_name: not, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 5, nodes: 1
+                  FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8
+                    ARGUMENTS
+                      LIST id: 7, nodes: 2
+                        FUNCTION id: 8, function_name: modulo, function_type: ordinary, result_type: UInt8
+                          ARGUMENTS
+                            LIST id: 9, nodes: 2
+                              COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11
+                              CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
+                        CONSTANT id: 13, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 11, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 14, nodes: 1
+          CONSTANT id: 15, constant_value: UInt64_10, constant_value_type: UInt8
+--
+5
diff --git a/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.sql b/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.sql
new file mode 100644
index 00000000000..f1dbfa1f32a
--- /dev/null
+++ b/tests/queries/0_stateless/02493_analyzer_sum_if_to_count_if.sql
@@ -0,0 +1,24 @@
+SET allow_experimental_analyzer = 1;
+SET optimize_rewrite_sum_if_to_count_if = 1;
+
+EXPLAIN QUERY TREE (SELECT sumIf(1, (number % 2) == 0) FROM numbers(10));
+
+SELECT '--';
+
+SELECT sumIf(1, (number % 2) == 0) FROM numbers(10);
+
+SELECT '--';
+
+EXPLAIN QUERY TREE (SELECT sum(if((number % 2) == 0, 1, 0)) FROM numbers(10));
+
+SELECT '--';
+
+SELECT sum(if((number % 2) == 0, 1, 0)) FROM numbers(10);
+
+SELECT '--';
+
+EXPLAIN QUERY TREE (SELECT sum(if((number % 2) == 0, 0, 1)) FROM numbers(10));
+
+SELECT '--';
+
+SELECT sum(if((number % 2) == 0, 0, 1)) FROM numbers(10);
diff --git a/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.reference b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.reference
new file mode 100644
index 00000000000..c9a8d73701d
--- /dev/null
+++ b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.reference
@@ -0,0 +1,28 @@
+0
+--
+0
+1
+--
+1
+2
+--
+(1)	0
+--
+(0,1)	0
+--
+(1,2)	1
+(1,2)	2
+--
+(1)	0
+--
+(0,1)	0
+--
+(1,2)	1
+(1,2)	2
+--
+('1')	0
+--
+('0','1')	0
+--
+('1','2')	1
+('1','2')	2
diff --git a/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.sql b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.sql
new file mode 100644
index 00000000000..bdbe65c643b
--- /dev/null
+++ b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.sql
@@ -0,0 +1,47 @@
+SET allow_experimental_analyzer = 1;
+
+SELECT number FROM numbers(untuple(tuple(1)));
+
+SELECT '--';
+
+SELECT number FROM numbers(untuple(tuple(0, 2)));
+
+SELECT '--';
+
+SELECT number FROM numbers(untuple(tuple(1, 2)));
+
+SELECT '--';
+
+SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(untuple(value));
+
+SELECT '--';
+
+SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value));
+
+SELECT '--';
+
+SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value));
+
+SELECT '--';
+
+SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(value.*);
+
+SELECT '--';
+
+SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*);
+
+SELECT '--';
+
+SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*);
+
+SELECT '--';
+
+SELECT cast(tuple('1'), 'Tuple(value String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
+
+SELECT '--';
+
+SELECT cast(tuple('0', '1'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
+
+SELECT '--';
+
+SELECT cast(tuple('1', '2'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
new file mode 100644
index 00000000000..ba7786789b4
--- /dev/null
+++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference
@@ -0,0 +1,15 @@
+QUERY id: 0
+  PROJECTION COLUMNS
+    uniqCombined(tuple(\'\')) UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: uniqCombined, function_type: aggregate, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            CONSTANT id: 4, constant_value: \'\', constant_value_type: String
+  JOIN TREE
+    TABLE_FUNCTION id: 5, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 6, nodes: 1
+          CONSTANT id: 7, constant_value: UInt64_1, constant_value_type: UInt8
+1
diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql
new file mode 100644
index 00000000000..830db274678
--- /dev/null
+++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql
@@ -0,0 +1,5 @@
+SET allow_experimental_analyzer = 1;
+
+EXPLAIN QUERY TREE SELECT uniqCombined(tuple('')) FROM numbers(1);
+
+SELECT uniqCombined(tuple('')) FROM numbers(1);
diff --git a/tests/queries/0_stateless/02493_do_not_assume_that_the_original_query_was_valid_when_transforming_joins.reference b/tests/queries/0_stateless/02493_do_not_assume_that_the_original_query_was_valid_when_transforming_joins.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02493_do_not_assume_that_the_original_query_was_valid_when_transforming_joins.sql b/tests/queries/0_stateless/02493_do_not_assume_that_the_original_query_was_valid_when_transforming_joins.sql
new file mode 100644
index 00000000000..6df5623638d
--- /dev/null
+++ b/tests/queries/0_stateless/02493_do_not_assume_that_the_original_query_was_valid_when_transforming_joins.sql
@@ -0,0 +1,26 @@
+CREATE TABLE table1 (column1 String) ENGINE=MergeTree() ORDER BY tuple();
+CREATE TABLE table2 (column1 String, column2 String, column3 String) ENGINE=MergeTree() ORDER BY tuple();
+CREATE TABLE table3 (column3 String) ENGINE=MergeTree() ORDER BY tuple();
+
+SELECT
+    *
+FROM
+(
+    SELECT
+        column1
+    FROM table1
+    GROUP BY
+        column1
+) AS a
+ANY LEFT JOIN
+(
+    SELECT
+        *
+    FROM table2
+) AS b ON (b.column1 = a.column1) AND (b.column2 = a.column2)
+ANY LEFT JOIN
+(
+    SELECT
+        *
+    FROM table3
+) AS c ON c.column3 = b.column3; -- {serverError UNKNOWN_IDENTIFIER}
diff --git a/tests/queries/0_stateless/data_parquet/int-list-zero-based-chunked-array.parquet b/tests/queries/0_stateless/data_parquet/int-list-zero-based-chunked-array.parquet
new file mode 100644
index 00000000000..2eb3ba3ab15
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/int-list-zero-based-chunked-array.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/list_monotonically_increasing_offsets.parquet b/tests/queries/0_stateless/data_parquet/list_monotonically_increasing_offsets.parquet
new file mode 100644
index 00000000000..1c23e27db65
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/list_monotonically_increasing_offsets.parquet differ
diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries
index ab73e97b96e..228dccfcb5b 100644
--- a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries
+++ b/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries
@@ -3,7 +3,7 @@
 SYSTEM DROP FILESYSTEM CACHE;
 SET enable_filesystem_cache_on_write_operations=0;
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 
@@ -18,7 +18,7 @@ SYSTEM DROP FILESYSTEM CACHE;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
 
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy_3', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100);
 SELECT  * FROM test FORMAT Null;
 SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;
diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries b/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries
index 76aebfcaca3..bd185942e6c 100644
--- a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries
+++ b/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries
@@ -3,7 +3,7 @@
 SET enable_filesystem_cache_on_write_operations=1;
 
 DROP TABLE IF EXISTS test;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 SYSTEM DROP FILESYSTEM CACHE;
 SELECT file_segment_range_begin, file_segment_range_end, size, state
diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries
index 386a1792ea4..56a8710cc93 100644
--- a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries
+++ b/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries
@@ -6,7 +6,7 @@ SET enable_filesystem_cache_on_write_operations=0;
 
 DROP TABLE IF EXISTS test;
 DROP TABLE IF EXISTS system.filesystem_cache_log;
-CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760;
+CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
 SYSTEM STOP MERGES test;
 INSERT INTO test SELECT number, toString(number) FROM numbers(100000);
 
diff --git a/tests/queries/0_stateless/parts.lib b/tests/queries/0_stateless/parts.lib
new file mode 100644
index 00000000000..c35f996ffed
--- /dev/null
+++ b/tests/queries/0_stateless/parts.lib
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+function wait_for_delete_empty_parts()
+{
+    local table=$1
+    local database=${2:-$CLICKHOUSE_DATABASE}
+    local timeout=${3:-20}
+
+    while [[ timeout -gt 0 ]]
+    do
+        res=$(${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.parts WHERE database='$database' AND table='$table' AND active AND rows=0")
+        [[ $res -eq 0 ]] && return 0
+
+        sleep 2
+        timeout=$((timeout - 2))
+    done
+
+    echo "Timed out while waiting for delete empty parts!" >&2
+    return 2
+}
+
+function wait_for_delete_inactive_parts()
+{
+    local table=$1
+    local database=${2:-$CLICKHOUSE_DATABASE}
+    local timeout=${3:-20}
+
+    while [[ timeout -gt 0 ]]
+    do
+        res=$(${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.parts WHERE database='$database' AND table='$table' AND not active")
+        [[ $res -eq 0 ]] && return 0
+
+        sleep 2
+        timeout=$((timeout - 2))
+    done
+
+    echo "Timed out while waiting for delete inactive parts!" >&2
+    return 2
+}
diff --git a/tests/queries/1_stateful/00152_insert_different_granularity.sql b/tests/queries/1_stateful/00152_insert_different_granularity.sql
index 6415cdad8a5..294d71b384b 100644
--- a/tests/queries/1_stateful/00152_insert_different_granularity.sql
+++ b/tests/queries/1_stateful/00152_insert_different_granularity.sql
@@ -32,7 +32,12 @@ ALTER TABLE test.hits ATTACH PARTITION 201403;
 
 DROP TABLE IF EXISTS hits_copy;
 
-CREATE TABLE hits_copy (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, index_granularity_bytes=0, min_bytes_for_wide_part = 0;
+CREATE TABLE hits_copy (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8)
+    ENGINE = MergeTree()
+    PARTITION BY toYYYYMM(EventDate)
+    ORDER BY (CounterID, EventDate, intHash32(UserID))
+    SAMPLE BY intHash32(UserID)
+    SETTINGS index_granularity=8192, min_bytes_for_wide_part = 0;
 
 ALTER TABLE hits_copy REPLACE PARTITION 201403 FROM test.hits;
 
diff --git a/tests/queries/1_stateful/00176_bson_parallel_parsing.reference b/tests/queries/1_stateful/00176_bson_parallel_parsing.reference
new file mode 100644
index 00000000000..d86bac9de59
--- /dev/null
+++ b/tests/queries/1_stateful/00176_bson_parallel_parsing.reference
@@ -0,0 +1 @@
+OK
diff --git a/tests/queries/1_stateful/00176_bson_parallel_parsing.sh b/tests/queries/1_stateful/00176_bson_parallel_parsing.sh
new file mode 100755
index 00000000000..8c021e8d3f6
--- /dev/null
+++ b/tests/queries/1_stateful/00176_bson_parallel_parsing.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Tags: disabled
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_bson"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_bson(WatchID UInt64, ClientIP6 FixedString(16), EventTime DateTime, Title String) ENGINE=Memory()"
+
+
+$CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --output_format_parallel_formatting=false -q \
+"SELECT WatchID, ClientIP6, EventTime, Title FROM test.hits ORDER BY UserID LIMIT 30000 Format BSONEachRow" > 00176_data.bson
+
+cat 00176_data.bson | $CLICKHOUSE_CLIENT --max_threads=0 --input_format_parallel_parsing=false -q "INSERT INTO parsing_bson FORMAT BSONEachRow"
+
+checksum1=$($CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_bson ORDER BY WatchID;" | md5sum)
+$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE parsing_bson;"
+
+cat 00176_data.bson | $CLICKHOUSE_CLIENT --max_threads=0 --max_insert_block_size=5000 --input_format_parallel_parsing=true -q "INSERT INTO parsing_bson FORMAT BSONEachRow"
+
+checksum2=$($CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_bson ORDER BY WatchID;" | md5sum)
+
+
+if [[ "$checksum1" == "$checksum2" ]];
+then
+    echo "OK"
+else
+    echo "FAIL"
+fi
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE parsing_bson"
+
+rm 00176_data.bson
+
diff --git a/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.reference b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.reference
new file mode 100644
index 00000000000..f599e28b8ab
--- /dev/null
+++ b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.reference
@@ -0,0 +1 @@
+10
diff --git a/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.sql b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.sql
new file mode 100644
index 00000000000..6397d7f5a28
--- /dev/null
+++ b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.sql
@@ -0,0 +1,11 @@
+SELECT count()
+FROM
+(
+    SELECT DISTINCT
+        Title,
+        SearchPhrase
+    FROM test.hits
+    WHERE (SearchPhrase != '') AND (NOT match(Title, '[а-яА-ЯёЁ]')) AND (NOT match(SearchPhrase, '[а-яА-ЯёЁ]'))
+    LIMIT 1 BY Title
+    LIMIT 10
+);
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 63bf2c37ee3..25781190d6a 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -9,6 +9,9 @@ AddressSanitizer
 AppleClang
 ArrowStream
 AvroConfluent
+BSON
+BSONEachRow
+Bool
 CCTOOLS
 CLion
 CMake
@@ -95,6 +98,7 @@ NEKUDOTAYIM
 NULLIF
 NVME
 NuRaft
+ObjectId
 Ok
 OpenSUSE
 OpenStack
@@ -190,6 +194,8 @@ bools
 boringssl
 brotli
 buildable
+bson
+bsoneachrow
 camelCase
 capn
 capnproto
@@ -450,6 +456,7 @@ subquery
 subseconds
 substring
 subtree
+subtype
 sudo
 symlink
 symlinks
@@ -482,6 +489,7 @@ userspace
 userver
 utils
 uuid
+uint
 variadic
 varint
 vectorized
diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list
index f331e222541..9c26f322c8e 100644
--- a/utils/check-style/codespell-ignore-words.list
+++ b/utils/check-style/codespell-ignore-words.list
@@ -23,3 +23,4 @@ hastable
 nam
 ubuntu
 toolchain
+vie
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 47dbec5a5f8..2c1061c3333 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,9 +1,11 @@
+v22.11.1.1360-stable	2022-11-17
 v22.10.2.11-stable	2022-11-01
 v22.10.1.1877-stable	2022-10-26
 v22.9.4.32-stable	2022-10-26
 v22.9.3.18-stable	2022-09-30
 v22.9.2.7-stable	2022-09-23
 v22.9.1.2603-stable	2022-09-22
+v22.8.9.24-lts	2022-11-19
 v22.8.8.3-lts	2022-10-27
 v22.8.7.34-lts	2022-10-26
 v22.8.6.71-lts	2022-09-30
diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh
deleted file mode 100755
index 15933da7942..00000000000
--- a/utils/security-generator/SECURITY.md.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-# This is a script to automate the SECURITY.md generation in the repository root.
-# The logic is the following:
-# We support the latest ClickHouse Y.M stable release,
-# the two releases before the latest stable,
-# and the two latest LTS releases (which may be already included by the criteria above).
-# The LTS releases are every Y.3 and Y.8 stable release.
-
-echo "
-# Security Policy
-
-## Security Announcements
-Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/).
-
-## Scope and Supported Versions
-
-The following versions of ClickHouse server are currently being supported with security updates:
-"
-
-clickhouse-local --query "
-SELECT
-    y::String || '.' || (y < toYear(today()) - 2000 - 1 ? '*' : m::String) AS Version,
-    (n <= 3 OR (is_lts AND lts_n <= 2)) ? '✔️' : '❌' AS Supported
-FROM
-(
-    SELECT
-        y,
-        m,
-        count() OVER (ORDER BY y DESC, m DESC) AS n,
-        m IN (3, 8) AS is_lts,
-        countIf(is_lts) OVER (ORDER BY y DESC, m DESC) AS lts_n
-    FROM
-    (
-        WITH
-            extractGroups(version, 'v(\\d+)\\.(\\d+)') AS v,
-            v[1]::UInt8 AS y,
-            v[2]::UInt8 AS m
-        SELECT
-            y,
-            m
-        FROM file('$(dirname "${BASH_SOURCE[0]}")/../list-versions/version_date.tsv', TSV, 'version String, date String')
-        ORDER BY
-            y DESC,
-            m DESC
-        LIMIT 1 BY
-            y,
-            m
-    )
-)
-LIMIT 1 BY Version
-FORMAT Markdown"
-
-echo "
-## Reporting a Vulnerability
-
-We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
-
-To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
-
-### When Should I Report a Vulnerability?
-
-- You think you discovered a potential security vulnerability in ClickHouse
-- You are unsure how a vulnerability affects ClickHouse
-
-### When Should I NOT Report a Vulnerability?
-
-- You need help tuning ClickHouse components for security
-- You need help applying security related updates
-- Your issue is not security related
-
-## Security Vulnerability Response
-
-Each report is acknowledged and analyzed by ClickHouse maintainers within 5 working days.
-As the security issue moves from triage, to identified fix, to release planning we will keep the reporter updated.
-
-## Public Disclosure Timing
-
-A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. 
-"
diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py
new file mode 100755
index 00000000000..4835a60d152
--- /dev/null
+++ b/utils/security-generator/generate_security.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+from pathlib import Path
+from typing import List
+
+VERSIONS_FILE = (
+    Path(__file__).absolute().parent.parent / "list-versions" / "version_date.tsv"
+)
+
+HEADER = """<!--
+the file is autogenerated by utils/security-generator/generate_security.py
+-->
+
+# Security Policy
+
+## Security Announcements
+Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/).
+
+## Scope and Supported Versions
+
+The following versions of ClickHouse server are currently being supported with security updates:
+"""
+
+FOOTER = """## Reporting a Vulnerability
+
+We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
+
+To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
+
+### When Should I Report a Vulnerability?
+
+- You think you discovered a potential security vulnerability in ClickHouse
+- You are unsure how a vulnerability affects ClickHouse
+
+### When Should I NOT Report a Vulnerability?
+
+- You need help tuning ClickHouse components for security
+- You need help applying security related updates
+- Your issue is not security related
+
+## Security Vulnerability Response
+
+Each report is acknowledged and analyzed by ClickHouse maintainers within 5 working days.
+As the security issue moves from triage, to identified fix, to release planning we will keep the reporter updated.
+
+## Public Disclosure Timing
+
+A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days.
+"""
+
+
+def generate_supported_versions():
+    with open(VERSIONS_FILE, "r", encoding="utf-8") as fd:
+        versions = [line.split(maxsplit=1)[0][1:] for line in fd.readlines()]
+
+    # The versions in VERSIONS_FILE are ordered ascending, so the first one is
+    # the greatest one. We may have supported versions in the previous year
+    unsupported_year = int(versions[0].split(".", maxsplit=1)[0]) - 2
+    # 3 supported versions
+    supported = []  # type: List[str]
+    # 2 LTS versions, one of them could be in supported
+    lts = []  # type: List[str]
+    # The rest are unsupported
+    unsupported = []  # type: List[str]
+    table = [
+        "| Version | Supported |",
+        "|:-|:-|",
+    ]
+    for version in versions:
+        year = int(version.split(".")[0])
+        month = int(version.split(".")[1])
+        version = f"{year}.{month}"
+        if version in supported or version in lts:
+            continue
+        if len(supported) < 3:
+            supported.append(version)
+            if len(lts) < 2 and month in [3, 8]:
+                # The version can be LTS as well
+                lts.append(version)
+            table.append(f"| {version} | ✔️ |")
+            continue
+        if len(lts) < 2 and month in [3, 8]:
+            lts.append(version)
+            table.append(f"| {version} | ✔️ |")
+            continue
+        if year <= unsupported_year:
+            # The whole year is unsopported
+            version = f"{year}.*"
+        if not version in unsupported:
+            unsupported.append(version)
+            table.append(f"| {version} | ❌ |")
+
+    return "\n".join(table) + "\n"
+
+
+def main():
+    print(HEADER)
+    print(generate_supported_versions())
+    print(FOOTER)
+
+
+if __name__ == "__main__":
+    main()