Merge branch 'master' into classification

2024-11-21 15:12:02 +00:00 · 2021-12-30 02:17:47 +00:00 · 2021-12-30 02:17:47 +00:00 · 2bdee0b0b8
commit 2bdee0b0b8
parent ba0083736e e879aca58b
1447 changed files with 41754 additions and 14972 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,4 +1,2 @@
 contrib/* linguist-vendored
 *.h linguist-language=C++
-# to avoid frequent conflicts
-tests/queries/0_stateless/arcadia_skip_list.txt text merge=union
--- a/.github/workflows/anchore-analysis.yml
+++ b/.github/workflows/anchore-analysis.yml
@ -8,6 +8,10 @@

 name: Docker Container Scan (clickhouse-server)

+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
 "on":
  pull_request:
    paths:
--- a/.github/workflows/backport.yml
+++ b/.github/workflows/backport.yml
@ -1,4 +1,9 @@
 name: CherryPick
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
 concurrency:
  group: cherry-pick
 on: # yamllint disable-line rule:truthy
@ -8,18 +13,24 @@ jobs:
  CherryPick:
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Set envs
+        # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/cherry_pick
+          ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
+          ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
+          RCSK
+          REPO_OWNER=ClickHouse
+          REPO_NAME=ClickHouse
+          REPO_TEAM=core
+          EOF
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
          token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
          fetch-depth: 0
      - name: Cherry pick
-        env:
-          TEMP_PATH: ${{runner.temp}}/cherry_pick
-          ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
-          REPO_OWNER: "ClickHouse"
-          REPO_NAME: "ClickHouse"
-          REPO_TEAM: "core"
        run: |
          sudo pip install GitPython
          cd $GITHUB_WORKSPACE/tests/ci
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -1,4 +1,9 @@
 name: BackportPR
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
 on: # yamllint disable-line rule:truthy
  push:
    branches:
@ -7,6 +12,9 @@ jobs:
  DockerHubPush:
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
@ -22,17 +30,23 @@ jobs:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
      - name: CompatibilityCheck
-        env:
-          TEMP_PATH: ${{runner.temp}}/compatibility_check
-          REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse
-          REPORTS_PATH: ${{runner.temp}}/reports_dir
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -51,154 +65,182 @@ jobs:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/build_check
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH=${{runner.temp}}/../ccaches
+          CHECK_NAME=ClickHouse build check (actions)
+          BUILD_NAME=package_release
+          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
-        env:
-          TEMP_PATH: ${{runner.temp}}/build_check
-          IMAGES_PATH: ${{runner.temp}}/images_path
-          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
-          CACHES_PATH: ${{runner.temp}}/../ccaches
-          CHECK_NAME: 'ClickHouse build check (actions)'
-          BUILD_NAME: 'package_release'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
      - name: Upload build URLs to artifacts
+        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
  BuilderDebAsan:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/build_check
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH=${{runner.temp}}/../ccaches
+          CHECK_NAME=ClickHouse build check (actions)
+          BUILD_NAME=package_asan
+          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
-        env:
-          TEMP_PATH: ${{runner.temp}}/build_check
-          IMAGES_PATH: ${{runner.temp}}/images_path
-          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
-          CACHES_PATH: ${{runner.temp}}/../ccaches
-          CHECK_NAME: 'ClickHouse build check (actions)'
-          BUILD_NAME: 'package_asan'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
      - name: Upload build URLs to artifacts
+        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
  BuilderDebTsan:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/build_check
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH=${{runner.temp}}/../ccaches
+          CHECK_NAME=ClickHouse build check (actions)
+          BUILD_NAME=package_tsan
+          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
-        env:
-          TEMP_PATH: ${{runner.temp}}/build_check
-          IMAGES_PATH: ${{runner.temp}}/images_path
-          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
-          CACHES_PATH: ${{runner.temp}}/../ccaches
-          CHECK_NAME: 'ClickHouse build check (actions)'
-          BUILD_NAME: 'package_tsan'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
      - name: Upload build URLs to artifacts
+        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
  BuilderDebDebug:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/build_check
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH=${{runner.temp}}/../ccaches
+          CHECK_NAME=ClickHouse build check (actions)
+          BUILD_NAME=package_debug
+          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
-        env:
-          TEMP_PATH: ${{runner.temp}}/build_check
-          IMAGES_PATH: ${{runner.temp}}/images_path
-          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
-          CACHES_PATH: ${{runner.temp}}/../ccaches
-          CHECK_NAME: 'ClickHouse build check (actions)'
-          BUILD_NAME: 'package_debug'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
      - name: Upload build URLs to artifacts
+        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
 ############################################################################################
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
@ -207,22 +249,26 @@ jobs:
      - BuilderDebRelease
      - BuilderDebAsan
      - BuilderDebTsan
-      - BuilderDebUBsan
-      - BuilderDebMsan
      - BuilderDebDebug
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/report_check
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=ClickHouse build check (actions)
+          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Report Builder
-        env:
-          TEMP_PATH: ${{runner.temp}}/report_check
-          REPORTS_PATH: ${{runner.temp}}/reports_dir
-          CHECK_NAME: 'ClickHouse build check (actions)'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -241,19 +287,25 @@ jobs:
    needs: [BuilderDebAsan]
    runs-on: [self-hosted, func-tester]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (address, actions)
+          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
+          KILL_TIMEOUT=10800
+          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Functional test
-        env:
-          TEMP_PATH: ${{runner.temp}}/stateless_debug
-          REPORTS_PATH: ${{runner.temp}}/reports_dir
-          CHECK_NAME: 'Stateless tests (address, actions)'
-          REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
-          KILL_TIMEOUT: 10800
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -273,19 +325,25 @@ jobs:
    needs: [BuilderDebDebug]
    runs-on: [self-hosted, func-tester]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateful_debug
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateful tests (debug, actions)
+          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
+          KILL_TIMEOUT=3600
+          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Functional test
-        env:
-          TEMP_PATH: ${{runner.temp}}/stateful_debug
-          REPORTS_PATH: ${{runner.temp}}/reports_dir
-          CHECK_NAME: 'Stateful tests (debug, actions)'
-          REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
-          KILL_TIMEOUT: 3600
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -303,20 +361,30 @@ jobs:
 ##############################################################################################
  StressTestTsan:
    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
+    # func testers have 16 cores + 128 GB memory
+    # while stress testers have 36 cores + 72 memory
+    # It would be better to have something like 32 + 128,
+    # but such servers almost unavailable as spot instances.
+    runs-on: [self-hosted, func-tester]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stress_thread
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stress test (thread, actions)
+          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
+          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Stress test
-        env:
-          TEMP_PATH: ${{runner.temp}}/stress_thread
-          REPORTS_PATH: ${{runner.temp}}/reports_dir
-          CHECK_NAME: 'Stress test (thread, actions)'
-          REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -333,21 +401,27 @@ jobs:
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
  IntegrationTestsRelease:
-    needs: [BuilderDebRelease, FunctionalStatelessTestRelease]
+    needs: [BuilderDebRelease]
    runs-on: [self-hosted, stress-tester]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/integration_tests_release
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Integration tests (release, actions)
+          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
+          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Integration test
-        env:
-          TEMP_PATH: ${{runner.temp}}/integration_tests_release
-          REPORTS_PATH: ${{runner.temp}}/reports_dir
-          CHECK_NAME: 'Integration tests (release, actions)'
-          REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -371,6 +445,9 @@ jobs:
      - CompatibilityCheck
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Finish label
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@ -1,4 +1,9 @@
 name: Cancel
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
 on: # yamllint disable-line rule:truthy
  workflow_run:
    workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"]
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@ -1,4 +1,9 @@
 name: DocsCheck
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
 on: # yamllint disable-line rule:truthy
  pull_request:
    types:
@ -14,6 +19,9 @@ jobs:
  CheckLabels:
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Labels check
@ -24,6 +32,9 @@ jobs:
    needs: CheckLabels
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
@ -39,17 +50,23 @@ jobs:
    needs: DockerHubPush
    runs-on: [self-hosted, func-tester]
    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/docs_check
+          REPO_COPY=${{runner.temp}}/docs_check/ClickHouse
+          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/docs_check
+          path: ${{ env.TEMP_PATH }}
+      - name: Clear repository
+        run: |
+          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Docs Check
-        env:
-          TEMP_PATH: ${{runner.temp}}/docs_check
-          REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
        run: |
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@ -0,0 +1,44 @@
+name: JepsenWorkflow
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+concurrency:
+  group: jepsen
+on: # yamllint disable-line rule:truthy
+  schedule:
+    - cron: '0 */6 * * *'
+  workflow_run:
+    workflows: ["CIGithubActions"]
+    types:
+      - completed
+  workflow_dispatch:
+jobs:
+  KeeperJepsenRelease:
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/keeper_jepsen
+          REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse
+          EOF
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+      - name: Check out repository code
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Jepsen Test
+        run: |
+          sudo rm -fr $TEMP_PATH
+          mkdir -p $TEMP_PATH
+          cp -r $GITHUB_WORKSPACE $TEMP_PATH
+          cd $REPO_COPY/tests/ci
+          python3 keeper_jepsen_check.py
+      - name: Cleanup
+        if: always()
+        run: |
+          docker kill $(docker ps -q) ||:
+          docker rm -f $(docker ps -a -q) ||:
+          sudo rm -fr $TEMP_PATH
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,4 +1,9 @@
 name: DocsReleaseChecks
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
 concurrency:
  group: master-release
  cancel-in-progress: true
@ -11,10 +16,15 @@ on: # yamllint disable-line rule:truthy
      - 'website/**'
      - 'benchmark/**'
      - 'docker/**'
+      - '.github/**'
+  workflow_dispatch:
 jobs:
  DockerHubPush:
    runs-on: [self-hosted, style-checker]
    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
@ -30,20 +40,31 @@ jobs:
    needs: DockerHubPush
    runs-on: [self-hosted, func-tester]
    steps:
+      - name: Set envs
+        # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/docs_release
+          REPO_COPY=${{runner.temp}}/docs_release/ClickHouse
+          CLOUDFLARE_TOKEN=${{secrets.CLOUDFLARE}}
+          ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
+          ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
+          RCSK
+          EOF
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{runner.temp}}/docs_release
+          path: ${{ env.TEMP_PATH }}
      - name: Docs Release
-        env:
-          TEMP_PATH: ${{runner.temp}}/docs_release
-          REPO_COPY: ${{runner.temp}}/docs_release/ClickHouse
-          CLOUDFLARE_TOKEN: ${{secrets.CLOUDFLARE}}
-          ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
        run: |
+          sudo rm -fr $TEMP_PATH
+          mkdir -p $TEMP_PATH
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci
          python3 docs_release.py
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
--- a/.github/workflows/woboq.yml
+++ b/.github/workflows/woboq.yml
@ -0,0 +1,42 @@
+name: WoboqBuilder
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
+concurrency:
+  group: woboq
+on: # yamllint disable-line rule:truthy
+  schedule:
+    - cron: '0 */18 * * *'
+  workflow_dispatch:
+jobs:
+  # don't use dockerhub push because this image updates so rarely
+  WoboqCodebrowser:
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/codebrowser
+          REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse
+          IMAGES_PATH=${{runner.temp}}/images_path
+          EOF
+      - name: Clear repository
+        run: |
+          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+      - name: Check out repository code
+        uses: actions/checkout@v2
+        with:
+          submodules: 'true'
+      - name: Codebrowser
+        run: |
+          sudo rm -fr $TEMP_PATH
+          mkdir -p $TEMP_PATH
+          cp -r $GITHUB_WORKSPACE $TEMP_PATH
+          cd $REPO_COPY/tests/ci && python3 codebrowser_check.py
+      - name: Cleanup
+        if: always()
+        run: |
+          docker kill $(docker ps -q) ||:
+          docker rm -f $(docker ps -a -q) ||:
+          sudo rm -fr $TEMP_PATH
--- a/.gitmodules
+++ b/.gitmodules
@ -54,8 +54,8 @@
 	url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
 [submodule "contrib/arrow"]
 	path = contrib/arrow
-	url = https://github.com/ClickHouse-Extras/arrow
-	branch = clickhouse-arrow-2.0.0
+	url = https://github.com/ClickHouse-Extras/arrow.git
+	branch = blessed/release-6.0.1
 [submodule "contrib/thrift"]
 	path = contrib/thrift
 	url = https://github.com/apache/thrift.git
@ -190,8 +190,8 @@
 	url = https://github.com/xz-mirror/xz
 [submodule "contrib/abseil-cpp"]
 	path = contrib/abseil-cpp
-	url = https://github.com/ClickHouse-Extras/abseil-cpp.git
-	branch = lts_2020_02_25
+	url = https://github.com/abseil/abseil-cpp.git
+	branch = lts_2021_11_02
 [submodule "contrib/dragonbox"]
 	path = contrib/dragonbox
 	url = https://github.com/ClickHouse-Extras/dragonbox.git
@ -253,3 +253,6 @@
 [submodule "contrib/nlp-data"]
 	path = contrib/nlp-data
 	url = https://github.com/evillique/nlp-data.git
+[submodule "contrib/azure"]
+	path = contrib/azure
+	url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,181 @@
+### ClickHouse release v21.12, 2021-12-15
+
+#### Backward Incompatible Change
+
+* *A fix for a feature that previously had unwanted behaviour.* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* *A slight change in behaviour of a new function.* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* *Setting rename.* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)).
+* *Further deprecation of already unused code.* This is relevant only for users of ClickHouse versions older than 20.6. A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)).
+
+#### New Feature
+
+* Implemented more of the ZooKeeper Four Letter Words commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). Now `clickhouse-keeper` is feature complete.
+* Support for `Bool` data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)).
+* Support for `PARTITION BY` in File, URL, HDFS storages and with `INSERT INTO` table function. Closes [#30273](https://github.com/ClickHouse/ClickHouse/issues/30273). [#30690](https://github.com/ClickHouse/ClickHouse/pull/30690) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Added `CONSTRAINT ... ASSUME ...` (without checking during `INSERT`). Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization. Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later). Added ability to replace heavy columns with light columns if it's possible. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)).
+* Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)).
+* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)).
+* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
+* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)).
+* *TLDR: Major improvements of completeness and consistency of text formats.* Refactor formats `TSV`, `TSVRaw`, `CSV` and `JSONCompactEachRow`, `JSONCompactStringsEachRow`, remove code duplication, add base interface for formats with `-WithNames` and `-WithNamesAndTypes` suffixes. Add formats `CSVWithNamesAndTypes`, `TSVRawWithNames`, `TSVRawWithNamesAndTypes`, `JSONCompactEachRowWIthNames`, `JSONCompactStringsEachRowWIthNames`, `RowBinaryWithNames`. Support parallel parsing for formats `TSVWithNamesAndTypes`, `TSVRaw(WithNames/WIthNamesAndTypes)`, `CSVWithNamesAndTypes`, `JSONCompactEachRow(WithNames/WIthNamesAndTypes)`, `JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes)`. Support columns mapping and types checking for `RowBinaryWithNamesAndTypes` format. Add setting `input_format_with_types_use_header` which specify if we should check that types written in <format_name>`WIthNamesAndTypes` format matches with table structure. Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should not control `csv_empty_as_default`). Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields). Fix Nullable input/output in `TSVRaw` format, make this format fully compatible with inserting into TSV. Fix inserting NULLs in `LowCardinality(Nullable)` when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). Fix strings deserialization in `JSONStringsEachRow`/`JSONCompactStringsEachRow` formats (strings were parsed just until first '\n' or '\t'). Add ability to use `Raw` escaping rule in Template input format. Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. Fix bug with parallel parsing of `-WithNames` formats in case when setting `min_chunk_bytes_for_parallel_parsing` is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). Allow to print/parse names and types of colums in `CustomSeparated` input/output format. Add formats `CustomSeparatedWithNames/WithNamesAndTypes` similar to `TSVWithNames/WithNamesAndTypes`. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)).
+* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)).
+* Exposes all settings of the global thread pool in the configuration file. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)).
+* Introduced window functions `exponentialTimeDecayedSum`, `exponentialTimeDecayedMax`, `exponentialTimeDecayedCount` and `exponentialTimeDecayedAvg` which are more effective than `exponentialMovingAverage` for bigger windows. Also more use-cases were covered. [#29799](https://github.com/ClickHouse/ClickHouse/pull/29799) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Add option to compress logs before writing them to a file using LZ4. Closes [#23860](https://github.com/ClickHouse/ClickHouse/issues/23860). [#29219](https://github.com/ClickHouse/ClickHouse/pull/29219) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Support `JOIN ON 1 = 1` that have CROSS JOIN semantic. This closes [#25578](https://github.com/ClickHouse/ClickHouse/issues/25578). [#25894](https://github.com/ClickHouse/ClickHouse/pull/25894) ([Vladimir C](https://github.com/vdimir)).
+* Add Map combinator for `Map` type. - Rename old `sum-, min-, max- Map` for mapped arrays to `sum-, min-, max- MappedArrays`. [#24539](https://github.com/ClickHouse/ClickHouse/pull/24539) ([Ildus Kurbangaliev](https://github.com/ildus)).
+* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
+#### Experimental Feature
+
+* `WINDOW VIEW` to enable stream processing in ClickHouse. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)).
+* Drop support for using Ordinary databases with `MaterializedMySQL`. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)).
+* Implement the commands BACKUP and RESTORE for the Log family. This feature is under development. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)).
+
+#### Performance Improvement
+
+* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)).
+* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)).
+* Speed up count over nullable columns. [#31806](https://github.com/ClickHouse/ClickHouse/pull/31806) ([Raúl Marín](https://github.com/Algunenano)).
+* Speed up `avg` and `sumCount` aggregate functions. [#31694](https://github.com/ClickHouse/ClickHouse/pull/31694) ([Raúl Marín](https://github.com/Algunenano)).
+* Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Improve performance of syncing data to block device. This closes [#31181](https://github.com/ClickHouse/ClickHouse/issues/31181). [#31229](https://github.com/ClickHouse/ClickHouse/pull/31229) ([zhanglistar](https://github.com/zhanglistar)).
+* Fixing query performance issue in `LiveView` tables. Fixes [#30831](https://github.com/ClickHouse/ClickHouse/issues/30831). [#31006](https://github.com/ClickHouse/ClickHouse/pull/31006) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Speed up query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)).
+* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional `rule_type` field). [#25122](https://github.com/ClickHouse/ClickHouse/pull/25122) ([Michail Safronov](https://github.com/msaf1980)).
+* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)).
+* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)).
+* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)).
+* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)).
+
+#### Improvement
+
+* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([tavplubix](https://github.com/tavplubix)).
+* Allow versioning of aggregate function states. Now we can introduce backward compatible changes in serialization format of aggregate function states. Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Support PostgreSQL style `ALTER MODIFY COLUMN` syntax. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)).
+* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)).
+* The `murmurHash3_128` and `sipHash128` functions now accept an arbitrary number of arguments. This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)).
+* Support default expression for `HDFS` storage and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)).
+* Improve the operation name of an opentelemetry span. [#32234](https://github.com/ClickHouse/ClickHouse/pull/32234) ([Frank Chen](https://github.com/FrankChen021)).
+* Use `Content-Type: application/x-ndjson` (http://ndjson.org/) for output format `JSONEachRow`. [#32223](https://github.com/ClickHouse/ClickHouse/pull/32223) ([Dmitriy Dorofeev](https://github.com/deem0n)).
+* Improve skipping unknown fields with quoted escaping rule in Template/CustomSeparated formats. Previously you could skip only quoted strings, now you can skip values with any type. [#32204](https://github.com/ClickHouse/ClickHouse/pull/32204) ([Kruglov Pavel](https://github.com/Avogar)).
+* Now `clickhouse-keeper` refuses to start or apply configuration changes when they contain duplicated IDs or endpoints. Fixes [#31339](https://github.com/ClickHouse/ClickHouse/issues/31339). [#32121](https://github.com/ClickHouse/ClickHouse/pull/32121) ([alesapin](https://github.com/alesapin)).
+* Set Content-Type in HTTP packets issued from URL engine. [#32113](https://github.com/ClickHouse/ClickHouse/pull/32113) ([Frank Chen](https://github.com/FrankChen021)).
+* Return Content-Type as 'application/json' for `JSONEachRow` format if `output_format_json_array_of_rows` is enabled. [#32112](https://github.com/ClickHouse/ClickHouse/pull/32112) ([Frank Chen](https://github.com/FrankChen021)).
+* Allow to parse `+` before `Float32`/`Float64` values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)).
+* Allow a user configured `hdfs_replication` parameter for `DiskHDFS` and `StorageHDFS`. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)).
+* Added ClickHouse `exception` and `exception_code` fields to opentelemetry span log. [#32040](https://github.com/ClickHouse/ClickHouse/pull/32040) ([Frank Chen](https://github.com/FrankChen021)).
+* Improve opentelemetry span log duration - it was is zero at the query level if there is a query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)).
+* Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Recreate `system.*_log` tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)).
+* `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)).
+* ClickHouse dictionary source: support predefined connections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Allow to use predefined connections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)) (author: Amos Bird).
+* Add key bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)).
+* Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)).
+* Better exception message when `users.xml` cannot be loaded due to bad password hash. This closes [#24126](https://github.com/ClickHouse/ClickHouse/issues/24126). [#31557](https://github.com/ClickHouse/ClickHouse/pull/31557) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Use shard and replica name from `Replicated` database arguments when expanding macros in `ReplicatedMergeTree` arguments if these macros are not defined in config. Closes [#31471](https://github.com/ClickHouse/ClickHouse/issues/31471). [#31488](https://github.com/ClickHouse/ClickHouse/pull/31488) ([tavplubix](https://github.com/tavplubix)).
+* Better analysis for `min/max/count` projection. Now, with enabled `allow_experimental_projection_optimization`, virtual `min/max/count` projection can be used together with columns from partition key. [#31474](https://github.com/ClickHouse/ClickHouse/pull/31474) ([Amos Bird](https://github.com/amosbird)).
+* Add `--pager` support for `clickhouse-local`. [#31457](https://github.com/ClickHouse/ClickHouse/pull/31457) ([Azat Khuzhin](https://github.com/azat)).
+* Fix waiting of the editor during interactive query edition (`waitpid()` returns -1 on `SIGWINCH` and `EDITOR` and `clickhouse-local`/`clickhouse-client` works concurrently). [#31456](https://github.com/ClickHouse/ClickHouse/pull/31456) ([Azat Khuzhin](https://github.com/azat)).
+* Throw an exception if there is some garbage after field in `JSONCompactStrings(EachRow)` format. [#31455](https://github.com/ClickHouse/ClickHouse/pull/31455) ([Kruglov Pavel](https://github.com/Avogar)).
+* Default value of `http_send_timeout` and `http_receive_timeout` settings changed from 1800 (30 minutes) to 180 (3 minutes). [#31450](https://github.com/ClickHouse/ClickHouse/pull/31450) ([tavplubix](https://github.com/tavplubix)).
+* `MaterializedMySQL` now handles `CREATE TABLE ... LIKE ...` DDL queries. [#31410](https://github.com/ClickHouse/ClickHouse/pull/31410) ([Stig Bakken](https://github.com/stigsb)).
+* Return artificial create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)).
+* Previously progress was shown only for `numbers` table function. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Initial user's roles are used now to find row policies, see [#31080](https://github.com/ClickHouse/ClickHouse/issues/31080). [#31262](https://github.com/ClickHouse/ClickHouse/pull/31262) ([Vitaly Baranov](https://github.com/vitlibar)).
+* If some obsolete setting is changed - show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([tavplubix](https://github.com/tavplubix)).
+* Improved backoff for background cleanup tasks in `MergeTree`. Settings `merge_tree_clear_old_temporary_directories_interval_seconds` and `merge_tree_clear_old_parts_interval_seconds` moved from users settings to merge tree settings. [#31180](https://github.com/ClickHouse/ClickHouse/pull/31180) ([tavplubix](https://github.com/tavplubix)).
+* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). This makes `--hardware_utilization` option in `clickhouse-client` usable.
+* Enable multiline editing in clickhouse-client by default. This addresses [#31121](https://github.com/ClickHouse/ClickHouse/issues/31121) . [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123) ([Amos Bird](https://github.com/amosbird)).
+* Function name normalization for `ALTER` queries. This helps avoid metadata mismatch between creating table with indices/projections and adding indices/projections via alter commands. This is a follow-up PR of https://github.com/ClickHouse/ClickHouse/pull/20174. Mark as improvements as there are no bug reports and the senario is somehow rare. [#31095](https://github.com/ClickHouse/ClickHouse/pull/31095) ([Amos Bird](https://github.com/amosbird)).
+* Support `IF EXISTS` modifier for `RENAME DATABASE`/`TABLE`/`DICTIONARY` query. If this directive is used, one will not get an error if the DATABASE/TABLE/DICTIONARY to be renamed doesn't exist. [#31081](https://github.com/ClickHouse/ClickHouse/pull/31081) ([victorgao](https://github.com/kafka1991)).
+* Cancel vertical merges when partition is dropped. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/25684 and https://github.com/ClickHouse/ClickHouse/pull/30996. [#31057](https://github.com/ClickHouse/ClickHouse/pull/31057) ([Amos Bird](https://github.com/amosbird)).
+* The local session inside a Clickhouse dictionary source won't send its events to the session log anymore. This fixes a possible deadlock (tsan alert) on shutdown. Also this PR fixes flaky `test_dictionaries_dependency_xml/`. [#31013](https://github.com/ClickHouse/ClickHouse/pull/31013) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Less locking in ALTER command. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)).
+* Fix `--verbose` option in clickhouse-local interactive mode and allow logging into file. [#30881](https://github.com/ClickHouse/ClickHouse/pull/30881) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Added `\l`, `\d`, `\c` commands in `clickhouse-client` like in MySQL and PostgreSQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)).
+* For clickhouse-local or clickhouse-client: if there is `--interactive` option with `--query` or `--queries-file`, then first execute them like in non-interactive and then start interactive mode. [#30851](https://github.com/ClickHouse/ClickHouse/pull/30851) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error (if DROP fails during removing znodes from zookeeper). [#30826](https://github.com/ClickHouse/ClickHouse/pull/30826) ([Azat Khuzhin](https://github.com/azat)).
+* Avro format works against Kafka. Setting `output_format_avro_rows_in_file` added. [#30351](https://github.com/ClickHouse/ClickHouse/pull/30351) ([Ilya Golshtein](https://github.com/ilejn)).
+* Allow to specify one or any number of PostgreSQL schemas for one `MaterializedPostgreSQL` database. Closes [#28901](https://github.com/ClickHouse/ClickHouse/issues/28901). Closes [#29324](https://github.com/ClickHouse/ClickHouse/issues/29324). [#28933](https://github.com/ClickHouse/ClickHouse/pull/28933) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)).
+* Implement function transform with Decimal arguments. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([李帅](https://github.com/loneylee)).
+* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Bug Fixes
+
+* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)).
+* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)).
+* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)).
+* Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)).
+* Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)).
+* Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([tavplubix](https://github.com/tavplubix)).
+* Server might fail to start with `Cannot attach 1 tables due to cyclic dependencies` error if `Dictionary` table looks at XML-dictionary with the same name, it's fixed. Fixes [#31315](https://github.com/ClickHouse/ClickHouse/issues/31315). [#32288](https://github.com/ClickHouse/ClickHouse/pull/32288) ([tavplubix](https://github.com/tavplubix)).
+* Fix parsing error while NaN deserializing for `Nullable(Float)` for `Quoted` escaping rule. [#32190](https://github.com/ClickHouse/ClickHouse/pull/32190) ([Kruglov Pavel](https://github.com/Avogar)).
+* XML dictionaries: identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)).
+* Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([tavplubix](https://github.com/tavplubix)).
+* Dictionaries: fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)).
+* Fix CREATE TABLE of Join Storage in some obscure cases. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)).
+* Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([tavplubix](https://github.com/tavplubix)).
+* `MaterializedMySQL` (experimental feature): Fix misinterpretation of `DECIMAL` data from MySQL. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)).
+* `FileLog` (experimental feature) engine unnesessary created meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)).
+* Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([tavplubix](https://github.com/tavplubix)).
+* Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)).
+* Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler`. Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)).
+* Fix invalid cast of Nullable type when nullable primary key is used. (Nullable primary key is a discouraged feature - please do not use). This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)).
+* Fix crash in recursive UDF in SQL. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix crash with empty result of ODBC query (with some ODBC drivers). Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix disabling query profiler (In case of `query_profiler_real_time_period_ns>0`/`query_profiler_cpu_time_period_ns>0` query profiler can stayed enabled even after query finished). [#31740](https://github.com/ClickHouse/ClickHouse/pull/31740) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([tavplubix](https://github.com/tavplubix)).
+* Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([tavplubix](https://github.com/tavplubix)).
+* Fix exception on some of the applications of `decrypt` function on Nullable columns. This closes [#31662](https://github.com/ClickHouse/ClickHouse/issues/31662). This closes [#31426](https://github.com/ClickHouse/ClickHouse/issues/31426). [#31707](https://github.com/ClickHouse/ClickHouse/pull/31707) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed function ngrams when string contains UTF-8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)).
+* Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([tavplubix](https://github.com/tavplubix)).
+* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([tavplubix](https://github.com/tavplubix)).
+* Implement `sparkbar` aggregate function as it was intended, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)).
+* Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)).
+* Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)).
+* Fix progress for short `INSERT SELECT` queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)).
+* Fix wrong behavior with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Remove `notLike` function from index analysis, because it was wrong. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)).
+* Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)).
+* Rewrite right distributed table in local join. solves [#25809](https://github.com/ClickHouse/ClickHouse/issues/25809). [#31105](https://github.com/ClickHouse/ClickHouse/pull/31105) ([abel-cheng](https://github.com/abel-cheng)).
+* Fix `Merge` table with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix JSON_VALUE/JSON_QUERY with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Using `formatRow` function with not row-oriented formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Skip `max_partition_size_to_drop check` in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)).
+* Fix some corner cases with `INTERSECT` and `EXCEPT` operators. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
+#### Build/Testing/Packaging Improvement
+
+* Fix incorrect filtering result on non-x86 builds. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([alexey-milovidov](https://github.com/alexey-milovidov)). Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Use our own CMakeLists for `zlib-ng`, `cassandra`, `mariadb-connector-c` and `xz`, `re2`, `sentry`, `gsasl`, `arrow`, `protobuf`. This is needed for [#20151](https://github.com/ClickHouse/ClickHouse/issues/20151). Part of [#9226](https://github.com/ClickHouse/ClickHouse/issues/9226). A small step towards removal of annoying trash from the build system. [#30599](https://github.com/ClickHouse/ClickHouse/pull/30599) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Hermetic builds: use fixed version of libc and make sure that no source or binary files from the host OS are using during build. This closes [#27133](https://github.com/ClickHouse/ClickHouse/issues/27133). This closes [#21435](https://github.com/ClickHouse/ClickHouse/issues/21435). This closes [#30462](https://github.com/ClickHouse/ClickHouse/issues/30462). [#30011](https://github.com/ClickHouse/ClickHouse/pull/30011) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)).
+* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)).
+* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)).
+* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)).
+* Support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)).
+
+
 ### ClickHouse release v21.11, 2021-11-09

 #### Backward Incompatible Change
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -424,6 +424,11 @@ if (OS_LINUX AND NOT SANITIZE)
    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined")
 endif ()

+# Increase stack size on Musl. We need big stack for our recursive-descend parser.
+if (USE_MUSL)
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152")
+endif ()
+
 include(cmake/dbms_glob_sources.cmake)

 if (OS_LINUX OR OS_ANDROID)
@ -447,10 +452,15 @@ if (MAKE_STATIC_LIBRARIES)
        # It's disabled for ARM because otherwise ClickHouse cannot run on Android.
        set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
        set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie")
+        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie")
    endif ()
 else ()
    set (CMAKE_POSITION_INDEPENDENT_CODE ON)
+    # This is required for clang on Arch linux, that uses PIE by default.
+    # See enable-SSP-and-PIE-by-default.patch [1].
+    #
+    #   [1]: https://github.com/archlinux/svntogit-packages/blob/6e681aa860e65ad46a1387081482eb875c2200f2/trunk/enable-SSP-and-PIE-by-default.patch
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie")
 endif ()

 if (ENABLE_TESTS)
@ -508,6 +518,7 @@ include (cmake/find/hdfs3.cmake) # uses protobuf
 include (cmake/find/poco.cmake)
 include (cmake/find/curl.cmake)
 include (cmake/find/s3.cmake)
+include (cmake/find/blob_storage.cmake)
 include (cmake/find/base64.cmake)
 include (cmake/find/parquet.cmake)
 include (cmake/find/simdjson.cmake)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -2,7 +2,13 @@

 ClickHouse is an open project, and you can contribute to it in many ways. You can help with ideas, code, or documentation. We appreciate any efforts that help us to make the project better.

-Thank you.
+Thank you!
+
+## Legal Info
+
+When you open your first pull-request to ClickHouse repo, a bot will invite you to accept ClickHouse Individual CLA (Contributor License Agreement). It is a simple few click process. For subsequent pull-requests the bot will check if you have already signed it and won't bother you again.
+
+Optionally, to make contributions even more tight legally, your employer as a legal entity may want to sign a ClickHouse Corporate CLA with ClickHouse, Inc. If you're interested to do so, contact us at [legal@clickhouse.com](mailto:legal@clickhouse.com).

 ## Technical Info

--- a/PreLoad.cmake
+++ b/PreLoad.cmake
@ -27,8 +27,7 @@ execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH)
 if (OS MATCHES "Linux"
    AND NOT DEFINED CMAKE_TOOLCHAIN_FILE
    AND NOT DISABLE_HERMETIC_BUILD
-    AND ($ENV{CC} MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*")
-    AND (USE_STATIC_LIBRARIES OR NOT DEFINED USE_STATIC_LIBRARIES))
+    AND ($ENV{CC} MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*"))

    if (ARCH MATCHES "amd64|x86_64")
        set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "" FORCE)
--- a/base/CMakeLists.txt
+++ b/base/CMakeLists.txt
@ -9,7 +9,3 @@ add_subdirectory (pcg-random)
 add_subdirectory (widechar_width)
 add_subdirectory (readpassphrase)
 add_subdirectory (bridge)
-
-if (USE_MYSQL)
-    add_subdirectory (mysqlxx)
-endif ()
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@ -1,8 +1,6 @@
 set (SRCS
    argsToConfig.cpp
    coverage.cpp
-    DateLUT.cpp
-    DateLUTImpl.cpp
    demangle.cpp
    getFQDNOrHostName.cpp
    getMemoryAmount.cpp
@ -18,14 +16,11 @@ set (SRCS
    sleep.cpp
    terminalColors.cpp
    errnoToString.cpp
-    getResource.cpp
    StringRef.cpp
 )

 if (ENABLE_REPLXX)
    list (APPEND SRCS ReplxxLineReader.cpp)
-elseif (ENABLE_READLINE)
-    list (APPEND SRCS ReadlineLineReader.cpp)
 endif ()

 if (USE_DEBUG_HELPERS)
@ -52,28 +47,6 @@ if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES)
    target_link_libraries(common PUBLIC -Wl,-U,_inside_main)
 endif()

-# Allow explicit fallback to readline
-if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
-    message (STATUS "Attempt to fallback to readline explicitly")
-    set (READLINE_PATHS "/usr/local/opt/readline/lib")
-    # First try find custom lib for macos users (default lib without history support)
-    find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS} NO_DEFAULT_PATH)
-    if (NOT READLINE_LIB)
-        find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS})
-    endif ()
-
-    set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include")
-    find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS} NO_DEFAULT_PATH)
-    if (NOT READLINE_INCLUDE_DIR)
-        find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS})
-    endif ()
-    if (READLINE_INCLUDE_DIR AND READLINE_LIB)
-        target_link_libraries(common PUBLIC ${READLINE_LIB})
-        target_compile_definitions(common PUBLIC USE_READLINE=1)
-        message (STATUS "Using readline: ${READLINE_INCLUDE_DIR} : ${READLINE_LIB}")
-    endif ()
-endif ()
-
 target_link_libraries (common
    PUBLIC
        ${CITYHASH_LIBRARIES}
--- a/base/base/LineReader.cpp
+++ b/base/base/LineReader.cpp
@ -10,16 +10,6 @@
 #include <sys/types.h>


-#ifdef OS_LINUX
-/// We can detect if code is linked with one or another readline variants or open the library dynamically.
-#   include <dlfcn.h>
-extern "C"
-{
-    char * readline(const char *) __attribute__((__weak__));
-    char * (*readline_ptr)(const char *) = readline;
-}
-#endif
-
 #ifdef HAS_RESERVED_IDENTIFIER
 #pragma clang diagnostic ignored "-Wreserved-identifier"
 #endif
@ -152,33 +142,6 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt)
 {
    input.clear();

-#ifdef OS_LINUX
-    if (!readline_ptr)
-    {
-        for (const auto * name : {"libreadline.so", "libreadline.so.0", "libeditline.so", "libeditline.so.0"})
-        {
-            void * dl_handle = dlopen(name, RTLD_LAZY);
-            if (dl_handle)
-            {
-                readline_ptr = reinterpret_cast<char * (*)(const char *)>(dlsym(dl_handle, "readline"));
-                if (readline_ptr)
-                {
-                    break;
-                }
-            }
-        }
-    }
-
-    /// Minimal support for readline
-    if (readline_ptr)
-    {
-        char * line_read = (*readline_ptr)(prompt.c_str());
-        if (!line_read)
-            return ABORT;
-        input = line_read;
-    }
-    else
-#endif
    {
        std::cout << prompt;
        std::getline(std::cin, input);
--- a/base/base/ReadlineLineReader.cpp
+++ b/base/base/ReadlineLineReader.cpp
@ -1,187 +0,0 @@
-#include <base/ReadlineLineReader.h>
-#include <base/errnoToString.h>
-#include <base/scope_guard.h>
-
-#include <errno.h>
-#include <signal.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <iostream>
-
-namespace
-{
-
-/// Trim ending whitespace inplace
-void trim(String & s)
-{
-    s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
-}
-
-}
-
-static const LineReader::Suggest * suggest;
-
-/// Points to current word to suggest.
-static LineReader::Suggest::Words::const_iterator pos;
-/// Points after the last possible match.
-static LineReader::Suggest::Words::const_iterator end;
-
-/// Set iterators to the matched range of words if any.
-static void findRange(const char * prefix, size_t prefix_length)
-{
-    std::string prefix_str(prefix);
-    if (auto completions = suggest->getCompletions(prefix_str, prefix_length))
-        std::tie(pos, end) = *completions;
-}
-
-/// Iterates through matched range.
-static char * nextMatch()
-{
-    if (pos >= end)
-        return nullptr;
-
-    /// readline will free memory by itself.
-    char * word = strdup(pos->c_str());
-    ++pos;
-    return word;
-}
-
-static char * generate(const char * text, int state)
-{
-    if (!suggest->ready)
-        return nullptr;
-    if (state == 0)
-        findRange(text, strlen(text));
-
-    /// Do not append whitespace after word. For unknown reason, rl_completion_append_character = '\0' does not work.
-    rl_completion_suppress_append = 1;
-
-    return nextMatch();
-};
-
-ReadlineLineReader::ReadlineLineReader(
-    const Suggest & suggest_, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
-    : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_))
-{
-    suggest = &suggest_;
-
-    if (!history_file_path.empty())
-    {
-        int res = read_history(history_file_path.c_str());
-        if (res)
-            std::cerr << "Cannot read history from file " + history_file_path + ": "+ errnoToString(errno) << std::endl;
-    }
-
-    /// Added '.' to the default list. Because it is used to separate database and table.
-    rl_basic_word_break_characters = word_break_characters;
-
-    /// Not append whitespace after single suggestion. Because whitespace after function name is meaningless.
-    rl_completion_append_character = '\0';
-
-    rl_completion_entry_function = generate;
-
-    /// Install Ctrl+C signal handler that will be used in interactive mode.
-
-    if (rl_initialize())
-        throw std::runtime_error("Cannot initialize readline");
-
-    auto clear_prompt_or_exit = [](int)
-    {
-        /// This is signal safe.
-        ssize_t res = write(STDOUT_FILENO, "\n", 1);
-
-        /// Allow to quit client while query is in progress by pressing Ctrl+C twice.
-        /// (First press to Ctrl+C will try to cancel query by InterruptListener).
-        if (res == 1 && rl_line_buffer[0] && !RL_ISSTATE(RL_STATE_DONE))
-        {
-            rl_replace_line("", 0);
-            if (rl_forced_update_display())
-                _exit(0);
-        }
-        else
-        {
-            /// A little dirty, but we struggle to find better way to correctly
-            /// force readline to exit after returning from the signal handler.
-            _exit(0);
-        }
-    };
-
-    if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR)
-        throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + errnoToString(errno));
-
-    rl_variable_bind("completion-ignore-case", "on");
-    // TODO: it doesn't work
-    // history_write_timestamps = 1;
-}
-
-ReadlineLineReader::~ReadlineLineReader()
-{
-}
-
-LineReader::InputStatus ReadlineLineReader::readOneLine(const String & prompt)
-{
-    input.clear();
-
-    const char* cinput = readline(prompt.c_str());
-    if (cinput == nullptr)
-        return (errno != EAGAIN) ? ABORT : RESET_LINE;
-    input = cinput;
-
-    trim(input);
-    return INPUT_LINE;
-}
-
-void ReadlineLineReader::addToHistory(const String & line)
-{
-    add_history(line.c_str());
-
-    // Flush changes to the disk
-    // NOTE readline builds a buffer of all the lines to write, and write them in one syscall.
-    // Thus there is no need to lock the history file here.
-    write_history(history_file_path.c_str());
-}
-
-#if RL_VERSION_MAJOR >= 7
-
-#define BRACK_PASTE_PREF "\033[200~"
-#define BRACK_PASTE_SUFF "\033[201~"
-
-#define BRACK_PASTE_LAST '~'
-#define BRACK_PASTE_SLEN 6
-
-/// This handler bypasses some unused macro/event checkings and remove trailing newlines before insertion.
-static int clickhouse_rl_bracketed_paste_begin(int /* count */, int /* key */)
-{
-    std::string buf;
-    buf.reserve(128);
-
-    RL_SETSTATE(RL_STATE_MOREINPUT);
-    SCOPE_EXIT(RL_UNSETSTATE(RL_STATE_MOREINPUT));
-    int c;
-    while ((c = rl_read_key()) >= 0)
-    {
-        if (c == '\r')
-            c = '\n';
-        buf.push_back(c);
-        if (buf.size() >= BRACK_PASTE_SLEN && c == BRACK_PASTE_LAST && buf.substr(buf.size() - BRACK_PASTE_SLEN) == BRACK_PASTE_SUFF)
-        {
-            buf.resize(buf.size() - BRACK_PASTE_SLEN);
-            break;
-        }
-    }
-    trim(buf);
-    return static_cast<size_t>(rl_insert_text(buf.c_str())) == buf.size() ? 0 : 1;
-}
-
-#endif
-
-void ReadlineLineReader::enableBracketedPaste()
-{
-#if RL_VERSION_MAJOR >= 7
-    rl_variable_bind("enable-bracketed-paste", "on");
-
-    /// Use our bracketed paste handler to get better user experience. See comments above.
-    rl_bind_keyseq(BRACK_PASTE_PREF, clickhouse_rl_bracketed_paste_begin);
-#endif
-};
--- a/base/base/ReadlineLineReader.h
+++ b/base/base/ReadlineLineReader.h
@ -1,19 +0,0 @@
-#pragma once
-
-#include "LineReader.h"
-
-#include <readline/readline.h>
-#include <readline/history.h>
-
-class ReadlineLineReader : public LineReader
-{
-public:
-    ReadlineLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_);
-    ~ReadlineLineReader() override;
-
-    void enableBracketedPaste() override;
-
-private:
-    InputStatus readOneLine(const String & prompt) override;
-    void addToHistory(const String & line) override;
-};
--- a/base/base/getPageSize.cpp
+++ b/base/base/getPageSize.cpp
@ -1,8 +1,11 @@
 #include <base/getPageSize.h>
 #include <unistd.h>
-
+#include <cstdlib>

 Int64 getPageSize()
 {
-    return sysconf(_SC_PAGESIZE);
+    Int64 page_size = sysconf(_SC_PAGESIZE);
+    if (page_size < 0)
+        abort();
+    return page_size;
 }
--- a/base/base/phdr_cache.cpp
+++ b/base/base/phdr_cache.cpp
@ -123,6 +123,12 @@ bool hasPHDRCache()
 #else

 void updatePHDRCache() {}
-bool hasPHDRCache() { return false; }
+
+#if defined(USE_MUSL)
+    /// With statically linked with musl, dl_iterate_phdr is immutable.
+    bool hasPHDRCache() { return true; }
+#else
+    bool hasPHDRCache() { return false; }
+#endif

 #endif
--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@ -182,7 +182,6 @@ TRAP(vlimit)
 TRAP(wcsnrtombs)
 TRAP(wcsrtombs)
 TRAP(wctomb)
-TRAP(wordexp)
 TRAP(basename)
 TRAP(catgets)
 TRAP(dbm_clearerr)
@ -195,9 +194,8 @@ TRAP(dbm_nextkey)
 TRAP(dbm_open)
 TRAP(dbm_store)
 TRAP(dirname)
-#if !defined(SANITIZER)
-TRAP(dlerror) // Used by tsan
-#endif
+// TRAP(dlerror) // It is not thread-safe. But it is used by dynamic linker to load some name resolution plugins. Also used by TSan.
+/// Note: we should better get rid of glibc, dynamic linking and all that sort of annoying garbage altogether.
 TRAP(ftw)
 TRAP(getc_unlocked)
 //TRAP(getenv) // Ok at program startup
@ -245,4 +243,21 @@ TRAP(lgammaf32x)
 TRAP(lgammaf64)
 TRAP(lgammaf64x)

+/// These functions are unused by ClickHouse and we should be aware if they are accidentally get used.
+/// Sometimes people report that these function contain vulnerabilities (these reports are bogus for ClickHouse).
+TRAP(mq_close)
+TRAP(mq_getattr)
+TRAP(mq_setattr)
+TRAP(mq_notify)
+TRAP(mq_open)
+TRAP(mq_receive)
+TRAP(mq_send)
+TRAP(mq_unlink)
+TRAP(mq_timedsend)
+TRAP(mq_timedreceive)
+
+/// These functions are also unused by ClickHouse.
+TRAP(wordexp)
+TRAP(wordfree)
+
 #endif
--- a/base/mysqlxx/CMakeLists.txt
+++ b/base/mysqlxx/CMakeLists.txt
@ -1,61 +0,0 @@
-add_library (mysqlxx
-    Connection.cpp
-    Exception.cpp
-    Query.cpp
-    ResultBase.cpp
-    UseQueryResult.cpp
-    Row.cpp
-    Value.cpp
-    Pool.cpp
-    PoolFactory.cpp
-    PoolWithFailover.cpp
-)
-
-target_include_directories (mysqlxx PUBLIC ..)
-
-if (NOT USE_INTERNAL_MYSQL_LIBRARY)
-    set(PLATFORM_LIBRARIES ${CMAKE_DL_LIBS})
-
-    if (USE_MYSQL)
-        target_include_directories (mysqlxx SYSTEM PRIVATE ${MYSQL_INCLUDE_DIR})
-    endif ()
-
-    if (APPLE)
-        find_library (ICONV_LIBRARY iconv)
-        set (MYSQLCLIENT_LIBRARIES ${MYSQLCLIENT_LIBRARIES} ${STATIC_MYSQLCLIENT_LIB} ${ICONV_LIBRARY})
-    elseif (USE_STATIC_LIBRARIES AND STATIC_MYSQLCLIENT_LIB)
-        set (MYSQLCLIENT_LIBRARIES ${STATIC_MYSQLCLIENT_LIB})
-    endif ()
-endif ()
-
-target_link_libraries (mysqlxx
-    PUBLIC
-        common
-    PRIVATE
-        ${MYSQLCLIENT_LIBRARIES}
-        ${ZLIB_LIBRARIES}
-)
-
-if(OPENSSL_LIBRARIES)
-    target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES})
-endif()
-
-target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES})
-
-if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR)
-    target_include_directories (mysqlxx SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR})
-endif ()
-
-target_no_warning(mysqlxx reserved-macro-identifier)
-
-if (NOT USE_INTERNAL_MYSQL_LIBRARY AND USE_STATIC_LIBRARIES)
-   message(WARNING "Statically linking with system mysql/mariadb only works "
-           "if mysql client libraries are built with same openssl version as "
-           "we are going to use now. It wouldn't work if GnuTLS is used. "
-           "Try -D\"USE_INTERNAL_MYSQL_LIBRARY\"=ON or -D\"ENABLE_MYSQL\"=OFF or "
-           "-D\"USE_STATIC_LIBRARIES\"=OFF")
-endif ()
-
-if (ENABLE_TESTS)
-    add_subdirectory (tests)
-endif ()
--- a/cmake/find/blob_storage.cmake
+++ b/cmake/find/blob_storage.cmake
@ -0,0 +1,30 @@
+option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
+
+option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
+    "Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
+    ON)
+
+if (ENABLE_AZURE_BLOB_STORAGE)
+    set(USE_AZURE_BLOB_STORAGE 1)
+    set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
+endif()
+
+if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
+        OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
+        AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
+    message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
+    set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
+    set(USE_AZURE_BLOB_STORAGE 0)
+endif ()
+
+if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
+    message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
+endif()
+
+if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
+    message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
+endif()
+
+if (USE_AZURE_BLOB_STORAGE)
+    message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")
+endif()
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@ -32,11 +32,6 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
   if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
      message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")

-      set (CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_CXX_COMPILER_LAUNCHER})
-      set (CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_C_COMPILER_LAUNCHER})
-
-      set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
-
      # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
      # filled from the debian/changelog or current time.
      #
@ -49,11 +44,14 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
      # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
      if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
         message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
+         set(LAUNCHER ${CCACHE_FOUND})
      elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
         message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH")
+         set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND})
      endif()
+
+      set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER})
+      set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER})
   else ()
      message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118")
   endif ()
--- a/cmake/linux/toolchain-x86_64.cmake
+++ b/cmake/linux/toolchain-x86_64.cmake
@ -14,9 +14,12 @@ set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_6

 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc")

-set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
-set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
-set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
+set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
+set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
+set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
+set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")

 set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
 set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
--- a/cmake/target.cmake
+++ b/cmake/target.cmake
@ -42,6 +42,14 @@ if (CMAKE_CROSSCOMPILING)
        message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
    endif ()

+    if (USE_MUSL)
+        set (USE_SENTRY OFF CACHE INTERNAL "")
+        set (ENABLE_ODBC OFF CACHE INTERNAL "")
+        set (ENABLE_GRPC OFF CACHE INTERNAL "")
+        set (ENABLE_HDFS OFF CACHE INTERNAL "")
+        set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "")
+    endif ()
+
    # Don't know why but CXX_STANDARD doesn't work for cross-compilation
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20")

--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@ -91,6 +91,9 @@ endif ()
 if (LINKER_NAME)
    if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0))
        find_program (LLD_PATH NAMES ${LINKER_NAME})
+        if (NOT LLD_PATH)
+            message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
+        endif ()
        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}")
    else ()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -248,6 +248,10 @@ endif()
 # - sentry-native
 add_subdirectory (curl-cmake)

+if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
+    add_subdirectory(azure-cmake)
+endif()
+
 if (USE_SENTRY)
    add_subdirectory (sentry-native-cmake)
 endif()
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit d10351f312c1ae1ca3fdda433693dfbef3acfece
+Subproject commit ff100a8713146e1ca4b4158dd6cc4eef9af47fc3
--- a/contrib/abseil-cpp
+++ b/contrib/abseil-cpp
@ -1 +1 @@
-Subproject commit b004a8a02418b83de8b686caa0b0f6e39ac2191f
+Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf
--- a/contrib/abseil-cpp-cmake/CMakeLists.txt
+++ b/contrib/abseil-cpp-cmake/CMakeLists.txt
@ -2,6 +2,8 @@ set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
 if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt")
  message(FATAL_ERROR " submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
 endif()
+set(BUILD_TESTING OFF)
+set(ABSL_PROPAGATE_CXX_STD ON)
 add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")

 add_library(abseil_swiss_tables INTERFACE)
--- a/contrib/arrow
+++ b/contrib/arrow
@ -1 +1 @@
-Subproject commit 078e21bad344747b7656ef2d7a4f7410a0a303eb
+Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@ -1,5 +1,22 @@
 set (CMAKE_CXX_STANDARD 17)

+set(ARROW_VERSION "6.0.1")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
+
+set(ARROW_VERSION_MAJOR "6")
+set(ARROW_VERSION_MINOR "0")
+set(ARROW_VERSION_PATCH "1")
+
+if(ARROW_VERSION_MAJOR STREQUAL "0")
+    # Arrow 0.x.y => SO version is "x", full SO version is "x.y.0"
+    set(ARROW_SO_VERSION "${ARROW_VERSION_MINOR}")
+    set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
+else()
+    # Arrow 1.x.y => SO version is "10x", full SO version is "10x.y.0"
+    math(EXPR ARROW_SO_VERSION "${ARROW_VERSION_MAJOR} * 100 + ${ARROW_VERSION_MINOR}")
+    set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
+endif()
+
 # === thrift

 set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp")
@ -93,6 +110,9 @@ add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")

 message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}")

+# ===  hdfs
+set(HDFS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/")
+
 # arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
 # Apple Clang compiler failed to compile this code without specifying c++11 standard.
 # As result these compiler features detected as absent. In result it failed to compile orc itself.
@ -114,6 +134,7 @@ configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DI
 configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh")


+# ARROW_ORC + adapters/orc/CMakefiles
 set(ORC_SRCS
        "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
        "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
@ -150,28 +171,8 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")

 configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h")

-# arrow/cpp/src/arrow/CMakeLists.txt
+# arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC)
 set(ARROW_SRCS
-        "${LIBRARY_DIR}/buffer.cc"
-        "${LIBRARY_DIR}/builder.cc"
-        "${LIBRARY_DIR}/chunked_array.cc"
-        "${LIBRARY_DIR}/compare.cc"
-        "${LIBRARY_DIR}/datum.cc"
-        "${LIBRARY_DIR}/device.cc"
-        "${LIBRARY_DIR}/extension_type.cc"
-        "${LIBRARY_DIR}/memory_pool.cc"
-        "${LIBRARY_DIR}/pretty_print.cc"
-        "${LIBRARY_DIR}/record_batch.cc"
-        "${LIBRARY_DIR}/result.cc"
-        "${LIBRARY_DIR}/scalar.cc"
-        "${LIBRARY_DIR}/sparse_tensor.cc"
-        "${LIBRARY_DIR}/status.cc"
-        "${LIBRARY_DIR}/table_builder.cc"
-        "${LIBRARY_DIR}/table.cc"
-        "${LIBRARY_DIR}/tensor.cc"
-        "${LIBRARY_DIR}/type.cc"
-        "${LIBRARY_DIR}/visitor.cc"
-
        "${LIBRARY_DIR}/array/array_base.cc"
        "${LIBRARY_DIR}/array/array_binary.cc"
        "${LIBRARY_DIR}/array/array_decimal.cc"
@ -191,25 +192,112 @@ set(ARROW_SRCS
        "${LIBRARY_DIR}/array/diff.cc"
        "${LIBRARY_DIR}/array/util.cc"
        "${LIBRARY_DIR}/array/validate.cc"
+        "${LIBRARY_DIR}/builder.cc"
+        "${LIBRARY_DIR}/buffer.cc"
+        "${LIBRARY_DIR}/chunked_array.cc"
+        "${LIBRARY_DIR}/compare.cc"
+        "${LIBRARY_DIR}/config.cc"
+        "${LIBRARY_DIR}/datum.cc"
+        "${LIBRARY_DIR}/device.cc"
+        "${LIBRARY_DIR}/extension_type.cc"
+        "${LIBRARY_DIR}/memory_pool.cc"
+        "${LIBRARY_DIR}/pretty_print.cc"
+        "${LIBRARY_DIR}/record_batch.cc"
+        "${LIBRARY_DIR}/result.cc"
+        "${LIBRARY_DIR}/scalar.cc"
+        "${LIBRARY_DIR}/sparse_tensor.cc"
+        "${LIBRARY_DIR}/status.cc"
+        "${LIBRARY_DIR}/table.cc"
+        "${LIBRARY_DIR}/table_builder.cc"
+        "${LIBRARY_DIR}/tensor.cc"
+        "${LIBRARY_DIR}/tensor/coo_converter.cc"
+        "${LIBRARY_DIR}/tensor/csf_converter.cc"
+        "${LIBRARY_DIR}/tensor/csx_converter.cc"
+        "${LIBRARY_DIR}/type.cc"
+        "${LIBRARY_DIR}/visitor.cc"
+        "${LIBRARY_DIR}/c/bridge.cc"
+        "${LIBRARY_DIR}/io/buffered.cc"
+        "${LIBRARY_DIR}/io/caching.cc"
+        "${LIBRARY_DIR}/io/compressed.cc"
+        "${LIBRARY_DIR}/io/file.cc"
+        "${LIBRARY_DIR}/io/hdfs.cc"
+        "${LIBRARY_DIR}/io/hdfs_internal.cc"
+        "${LIBRARY_DIR}/io/interfaces.cc"
+        "${LIBRARY_DIR}/io/memory.cc"
+        "${LIBRARY_DIR}/io/slow.cc"
+        "${LIBRARY_DIR}/io/stdio.cc"
+        "${LIBRARY_DIR}/io/transform.cc"
+        "${LIBRARY_DIR}/util/async_util.cc"
+        "${LIBRARY_DIR}/util/basic_decimal.cc"
+        "${LIBRARY_DIR}/util/bit_block_counter.cc"
+        "${LIBRARY_DIR}/util/bit_run_reader.cc"
+        "${LIBRARY_DIR}/util/bit_util.cc"
+        "${LIBRARY_DIR}/util/bitmap.cc"
+        "${LIBRARY_DIR}/util/bitmap_builders.cc"
+        "${LIBRARY_DIR}/util/bitmap_ops.cc"
+        "${LIBRARY_DIR}/util/bpacking.cc"
+        "${LIBRARY_DIR}/util/cancel.cc"
+        "${LIBRARY_DIR}/util/compression.cc"
+        "${LIBRARY_DIR}/util/counting_semaphore.cc"
+        "${LIBRARY_DIR}/util/cpu_info.cc"
+        "${LIBRARY_DIR}/util/decimal.cc"
+        "${LIBRARY_DIR}/util/delimiting.cc"
+        "${LIBRARY_DIR}/util/formatting.cc"
+        "${LIBRARY_DIR}/util/future.cc"
+        "${LIBRARY_DIR}/util/int_util.cc"
+        "${LIBRARY_DIR}/util/io_util.cc"
+        "${LIBRARY_DIR}/util/logging.cc"
+        "${LIBRARY_DIR}/util/key_value_metadata.cc"
+        "${LIBRARY_DIR}/util/memory.cc"
+        "${LIBRARY_DIR}/util/mutex.cc"
+        "${LIBRARY_DIR}/util/string.cc"
+        "${LIBRARY_DIR}/util/string_builder.cc"
+        "${LIBRARY_DIR}/util/task_group.cc"
+        "${LIBRARY_DIR}/util/tdigest.cc"
+        "${LIBRARY_DIR}/util/thread_pool.cc"
+        "${LIBRARY_DIR}/util/time.cc"
+        "${LIBRARY_DIR}/util/trie.cc"
+        "${LIBRARY_DIR}/util/unreachable.cc"
+        "${LIBRARY_DIR}/util/uri.cc"
+        "${LIBRARY_DIR}/util/utf8.cc"
+        "${LIBRARY_DIR}/util/value_parsing.cc"
+        "${LIBRARY_DIR}/vendored/base64.cpp"
+        "${LIBRARY_DIR}/vendored/datetime/tz.cpp"
+
+        "${LIBRARY_DIR}/vendored/musl/strptime.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriCommon.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriCompare.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriEscape.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriFile.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriIp4Base.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriIp4.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriMemory.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriNormalizeBase.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriNormalize.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriParseBase.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriParse.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriQuery.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriRecompose.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriResolve.c"
+        "${LIBRARY_DIR}/vendored/uriparser/UriShorten.c"

        "${LIBRARY_DIR}/compute/api_aggregate.cc"
        "${LIBRARY_DIR}/compute/api_scalar.cc"
        "${LIBRARY_DIR}/compute/api_vector.cc"
        "${LIBRARY_DIR}/compute/cast.cc"
        "${LIBRARY_DIR}/compute/exec.cc"
+        "${LIBRARY_DIR}/compute/exec/aggregate_node.cc"
+        "${LIBRARY_DIR}/compute/exec/exec_plan.cc"
+        "${LIBRARY_DIR}/compute/exec/expression.cc"
+        "${LIBRARY_DIR}/compute/exec/filter_node.cc"
+        "${LIBRARY_DIR}/compute/exec/project_node.cc"
+        "${LIBRARY_DIR}/compute/exec/source_node.cc"
+        "${LIBRARY_DIR}/compute/exec/sink_node.cc"
+        "${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
        "${LIBRARY_DIR}/compute/function.cc"
        "${LIBRARY_DIR}/compute/function_internal.cc"
        "${LIBRARY_DIR}/compute/kernel.cc"
        "${LIBRARY_DIR}/compute/registry.cc"
-
-        "${LIBRARY_DIR}/compute/exec/exec_plan.cc"
-        "${LIBRARY_DIR}/compute/exec/expression.cc"
-        "${LIBRARY_DIR}/compute/exec/key_compare.cc"
-        "${LIBRARY_DIR}/compute/exec/key_encode.cc"
-        "${LIBRARY_DIR}/compute/exec/key_hash.cc"
-        "${LIBRARY_DIR}/compute/exec/key_map.cc"
-        "${LIBRARY_DIR}/compute/exec/util.cc"
-
        "${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
        "${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
        "${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
@ -227,28 +315,31 @@ set(ARROW_SRCS
        "${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
-        "${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc"
-        "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
-        "${LIBRARY_DIR}/compute/kernels/scalar_temporal.cc"
+        "${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
+        "${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
+        "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
        "${LIBRARY_DIR}/compute/kernels/util_internal.cc"
+        "${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
        "${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
        "${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
        "${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
        "${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
        "${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
-
-        "${LIBRARY_DIR}/csv/chunker.cc"
-        "${LIBRARY_DIR}/csv/column_builder.cc"
-        "${LIBRARY_DIR}/csv/column_decoder.cc"
-        "${LIBRARY_DIR}/csv/converter.cc"
-        "${LIBRARY_DIR}/csv/options.cc"
-        "${LIBRARY_DIR}/csv/parser.cc"
-        "${LIBRARY_DIR}/csv/reader.cc"
-        "${LIBRARY_DIR}/csv/writer.cc"
+        "${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
+        "${LIBRARY_DIR}/compute/exec/union_node.cc"
+        "${LIBRARY_DIR}/compute/exec/key_hash.cc"
+        "${LIBRARY_DIR}/compute/exec/key_map.cc"
+        "${LIBRARY_DIR}/compute/exec/key_compare.cc"
+        "${LIBRARY_DIR}/compute/exec/key_encode.cc"
+        "${LIBRARY_DIR}/compute/exec/util.cc"
+        "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
+        "${LIBRARY_DIR}/compute/exec/hash_join.cc"
+        "${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
+        "${LIBRARY_DIR}/compute/exec/task_util.cc"

        "${LIBRARY_DIR}/ipc/dictionary.cc"
        "${LIBRARY_DIR}/ipc/feather.cc"
@ -258,52 +349,6 @@ set(ARROW_SRCS
        "${LIBRARY_DIR}/ipc/reader.cc"
        "${LIBRARY_DIR}/ipc/writer.cc"

-        "${LIBRARY_DIR}/io/buffered.cc"
-        "${LIBRARY_DIR}/io/caching.cc"
-        "${LIBRARY_DIR}/io/compressed.cc"
-        "${LIBRARY_DIR}/io/file.cc"
-        "${LIBRARY_DIR}/io/interfaces.cc"
-        "${LIBRARY_DIR}/io/memory.cc"
-        "${LIBRARY_DIR}/io/slow.cc"
-        "${LIBRARY_DIR}/io/stdio.cc"
-        "${LIBRARY_DIR}/io/transform.cc"
-
-        "${LIBRARY_DIR}/tensor/coo_converter.cc"
-        "${LIBRARY_DIR}/tensor/csf_converter.cc"
-        "${LIBRARY_DIR}/tensor/csx_converter.cc"
-
-        "${LIBRARY_DIR}/util/basic_decimal.cc"
-        "${LIBRARY_DIR}/util/bit_block_counter.cc"
-        "${LIBRARY_DIR}/util/bit_run_reader.cc"
-        "${LIBRARY_DIR}/util/bit_util.cc"
-        "${LIBRARY_DIR}/util/bitmap_builders.cc"
-        "${LIBRARY_DIR}/util/bitmap_ops.cc"
-        "${LIBRARY_DIR}/util/bitmap.cc"
-        "${LIBRARY_DIR}/util/bpacking.cc"
-        "${LIBRARY_DIR}/util/cancel.cc"
-        "${LIBRARY_DIR}/util/compression.cc"
-        "${LIBRARY_DIR}/util/cpu_info.cc"
-        "${LIBRARY_DIR}/util/decimal.cc"
-        "${LIBRARY_DIR}/util/delimiting.cc"
-        "${LIBRARY_DIR}/util/formatting.cc"
-        "${LIBRARY_DIR}/util/future.cc"
-        "${LIBRARY_DIR}/util/int_util.cc"
-        "${LIBRARY_DIR}/util/io_util.cc"
-        "${LIBRARY_DIR}/util/key_value_metadata.cc"
-        "${LIBRARY_DIR}/util/logging.cc"
-        "${LIBRARY_DIR}/util/memory.cc"
-        "${LIBRARY_DIR}/util/mutex.cc"
-        "${LIBRARY_DIR}/util/string_builder.cc"
-        "${LIBRARY_DIR}/util/string.cc"
-        "${LIBRARY_DIR}/util/task_group.cc"
-        "${LIBRARY_DIR}/util/tdigest.cc"
-        "${LIBRARY_DIR}/util/thread_pool.cc"
-        "${LIBRARY_DIR}/util/time.cc"
-        "${LIBRARY_DIR}/util/trie.cc"
-        "${LIBRARY_DIR}/util/utf8.cc"
-        "${LIBRARY_DIR}/util/value_parsing.cc"
-
-        "${LIBRARY_DIR}/vendored/base64.cpp"
        ${ORC_SRCS}
        )

@ -373,6 +418,7 @@ target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_D
 target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR})
 target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR})
 target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
+target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${HDFS_INCLUDE_DIR})

 # === parquet

@ -446,7 +492,7 @@ set (HAVE_STRERROR_R 1)
 set (HAVE_SCHED_GET_PRIORITY_MAX 1)
 set (HAVE_SCHED_GET_PRIORITY_MIN 1)

-if (OS_LINUX)
+if (OS_LINUX AND NOT USE_MUSL)
    set (STRERROR_R_CHAR_P 1)
 endif ()

--- a/contrib/azure
+++ b/contrib/azure
@ -0,0 +1 @@
+Subproject commit ac4b763d4ca40122275f1497cbdc5451337461d9
--- a/contrib/azure-cmake/CMakeLists.txt
+++ b/contrib/azure-cmake/CMakeLists.txt
@ -0,0 +1,74 @@
+set(AZURE_DIR "${ClickHouse_SOURCE_DIR}/contrib/azure")
+set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk")
+
+file(GLOB AZURE_SDK_CORE_SRC
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.hpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/winhttp/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/io/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/private/*.hpp"
+)
+
+file(GLOB AZURE_SDK_IDENTITY_SRC
+    "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/private/*.hpp"
+)
+
+file(GLOB AZURE_SDK_STORAGE_COMMON_SRC
+    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/private/*.cpp"
+)
+
+file(GLOB AZURE_SDK_STORAGE_BLOBS_SRC
+    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.hpp"
+)
+
+file(GLOB AZURE_SDK_UNIFIED_SRC
+    ${AZURE_SDK_CORE_SRC}
+    ${AZURE_SDK_IDENTITY_SRC}
+    ${AZURE_SDK_STORAGE_COMMON_SRC}
+    ${AZURE_SDK_STORAGE_BLOBS_SRC}
+)
+
+set(AZURE_SDK_INCLUDES
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/inc/"
+    "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/inc/"
+    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/inc/"
+    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/inc/"
+)
+
+include("${AZURE_DIR}/cmake-modules/AzureTransportAdapters.cmake")
+
+add_library(azure_sdk ${AZURE_SDK_UNIFIED_SRC})
+
+if (COMPILER_CLANG)
+    target_compile_options(azure_sdk PRIVATE
+        -Wno-deprecated-copy-dtor
+        -Wno-extra-semi
+        -Wno-suggest-destructor-override
+        -Wno-inconsistent-missing-destructor-override
+        -Wno-error=unknown-warning-option
+    )
+
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
+        target_compile_options(azure_sdk PRIVATE -Wno-reserved-identifier)
+    endif()
+endif()
+
+# Originally, on Windows azure-core is built with bcrypt and crypt32 by default
+if (OPENSSL_FOUND)
+    target_link_libraries(azure_sdk PRIVATE ${OPENSSL_LIBRARIES})
+endif()
+
+# Originally, on Windows azure-core is built with winhttp by default
+if (CURL_FOUND)
+    target_link_libraries(azure_sdk PRIVATE ${CURL_LIBRARY})
+endif()
+
+target_link_libraries(azure_sdk PRIVATE ${LIBXML2_LIBRARIES})
+
+target_include_directories(azure_sdk SYSTEM PUBLIC ${AZURE_SDK_INCLUDES})
--- a/contrib/boost
+++ b/contrib/boost
@ -1 +1 @@
-Subproject commit fcb058e1459ac273ecfe7cdf72791cb1479115af
+Subproject commit c0807e83f2824e8dd67a15b355496a9b784cdcd5
--- a/contrib/boost-cmake/CMakeLists.txt
+++ b/contrib/boost-cmake/CMakeLists.txt
@ -1,9 +1,7 @@
 option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ON)

 if (NOT USE_INTERNAL_BOOST_LIBRARY)
-    # 1.70 like in contrib/boost
-    # 1.71 on CI
-    set(BOOST_VERSION 1.71)
+    set(BOOST_VERSION 1.78)

    find_package(Boost ${BOOST_VERSION} COMPONENTS
        system
@ -66,9 +64,11 @@ if (NOT EXTERNAL_BOOST_FOUND)

    set (SRCS_FILESYSTEM
        "${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp"
+        "${LIBRARY_DIR}/libs/filesystem/src/directory.cpp"
+        "${LIBRARY_DIR}/libs/filesystem/src/exception.cpp"
        "${LIBRARY_DIR}/libs/filesystem/src/operations.cpp"
-        "${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp"
        "${LIBRARY_DIR}/libs/filesystem/src/path.cpp"
+        "${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp"
        "${LIBRARY_DIR}/libs/filesystem/src/portability.cpp"
        "${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp"
        "${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp"
@ -126,24 +126,11 @@ if (NOT EXTERNAL_BOOST_FOUND)
    # regex

    set (SRCS_REGEX
-        "${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/cregex.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/fileiter.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/icu.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/instances.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/internals.hpp"
        "${LIBRARY_DIR}/libs/regex/src/posix_api.cpp"
        "${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp"
        "${LIBRARY_DIR}/libs/regex/src/regex.cpp"
        "${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/usinstances.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp"
        "${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp"
-        "${LIBRARY_DIR}/libs/regex/src/winstances.cpp"
    )

    add_library (_boost_regex ${SRCS_REGEX})
@ -166,7 +153,6 @@ if (NOT EXTERNAL_BOOST_FOUND)

    set (SRCS_CONTEXT
        "${LIBRARY_DIR}/libs/context/src/dummy.cpp"
-        "${LIBRARY_DIR}/libs/context/src/execution_context.cpp"
        "${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp"
    )

--- a/contrib/boringssl-cmake/CMakeLists.txt
+++ b/contrib/boringssl-cmake/CMakeLists.txt
@ -639,6 +639,7 @@ add_library(

  "${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c"
  "${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c"
+  "${BORINGSSL_SOURCE_DIR}/decrepit/bio/base64_bio.c"
 )

 add_executable(
--- a/contrib/cassandra
+++ b/contrib/cassandra
@ -1 +1 @@
-Subproject commit eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1
+Subproject commit f4a31e92a25c34c02c7291ff97c7813bc83b0e09
--- a/contrib/jemalloc
+++ b/contrib/jemalloc
@ -1 +1 @@
-Subproject commit e6891d9746143bf2cf617493d880ba5a0b9a3efd
+Subproject commit a1404807211b1612539f840b3dcb1bf38d1a269e
--- a/contrib/libuv-cmake/CMakeLists.txt
+++ b/contrib/libuv-cmake/CMakeLists.txt
@ -1,17 +1,8 @@
 # This file is a modified version of contrib/libuv/CMakeLists.txt

-include(CMakeDependentOption)
-
 set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv")
 set (BINARY_DIR "${CMAKE_BINARY_DIR}/contrib/libuv")

-
-if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
-  list(APPEND uv_cflags -fvisibility=hidden --std=gnu89)
-  list(APPEND uv_cflags -Wall -Wextra -Wstrict-prototypes)
-  list(APPEND uv_cflags -Wno-unused-parameter)
-endif()
-
 set(uv_sources
    src/fs-poll.c
    src/idna.c
@ -76,7 +67,7 @@ endif()

 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
  list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112)
-  list(APPEND uv_libraries dl rt)
+  list(APPEND uv_libraries rt)
  list(APPEND uv_sources
       src/unix/linux-core.c
       src/unix/linux-inotify.c
--- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
+++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
@ -268,7 +268,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 *
 * Whether iconv support is available
 */
-#if 1
+#if 0
 #define LIBXML_ICONV_ENABLED
 #endif

--- a/contrib/mariadb-connector-c-cmake/CMakeLists.txt
+++ b/contrib/mariadb-connector-c-cmake/CMakeLists.txt
@ -236,8 +236,7 @@ set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb
 add_library(mariadbclient STATIC ${LIBMARIADB_SOURCES})
 target_link_libraries(mariadbclient ${SYSTEM_LIBS})

-target_include_directories(mariadbclient
-    PRIVATE ${CC_BINARY_DIR}/include-private
-    PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb)
+target_include_directories(mariadbclient PRIVATE ${CC_BINARY_DIR}/include-private)
+target_include_directories(mariadbclient SYSTEM PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb)

 set_target_properties(mariadbclient PROPERTIES IMPORTED_INTERFACE_LINK_LIBRARIES "${SYSTEM_LIBS}")
--- a/contrib/poco
+++ b/contrib/poco
@ -1 +1 @@
-Subproject commit 258b9ba6cd245ff88e9346f75c43464c403f329d
+Subproject commit 520a90e02e3e5cb90afeae1846d161dbc508a6f1
--- a/contrib/protobuf
+++ b/contrib/protobuf
@ -1 +1 @@
-Subproject commit c1c5d02026059f4c3cb51aaa08e82288d3e08b89
+Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2
--- a/contrib/replxx-cmake/CMakeLists.txt
+++ b/contrib/replxx-cmake/CMakeLists.txt
@ -8,7 +8,7 @@ if (NOT ENABLE_REPLXX)
    add_library(replxx INTERFACE)
    target_compile_definitions(replxx INTERFACE USE_REPLXX=0)

-    message (STATUS "Not using replxx (Beware! Runtime fallback to readline is possible!)")
+    message (STATUS "Not using replxx")
    return()
 endif()

--- a/contrib/s2geometry
+++ b/contrib/s2geometry
@ -1 +1 @@
-Subproject commit 38b7a290f927cc372218c2094602b83e35b18c05
+Subproject commit 471fe9dc931a4bb560333545186e9b5da168ac83
--- a/contrib/s2geometry-cmake/CMakeLists.txt
+++ b/contrib/s2geometry-cmake/CMakeLists.txt
@ -1,8 +1,12 @@
 set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src")

+set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
+if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt")
+    message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
+endif()
+
+
 set(S2_SRCS
-    "${S2_SOURCE_DIR}/s2/base/stringprintf.cc"
-    "${S2_SOURCE_DIR}/s2/base/strtoint.cc"
    "${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc"
    "${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc"
    "${S2_SOURCE_DIR}/s2/encoded_s2shape_index.cc"
@ -14,11 +18,14 @@ set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/s1chord_angle.cc"
    "${S2_SOURCE_DIR}/s2/s1interval.cc"
    "${S2_SOURCE_DIR}/s2/s2boolean_operation.cc"
+    "${S2_SOURCE_DIR}/s2/s2buffer_operation.cc"
    "${S2_SOURCE_DIR}/s2/s2builder.cc"
    "${S2_SOURCE_DIR}/s2/s2builder_graph.cc"
    "${S2_SOURCE_DIR}/s2/s2builderutil_closed_set_normalizer.cc"
    "${S2_SOURCE_DIR}/s2/s2builderutil_find_polygon_degeneracies.cc"
+    "${S2_SOURCE_DIR}/s2/s2builderutil_get_snapped_winding_delta.cc"
    "${S2_SOURCE_DIR}/s2/s2builderutil_lax_polygon_layer.cc"
+    "${S2_SOURCE_DIR}/s2/s2builderutil_lax_polyline_layer.cc"
    "${S2_SOURCE_DIR}/s2/s2builderutil_s2point_vector_layer.cc"
    "${S2_SOURCE_DIR}/s2/s2builderutil_s2polygon_layer.cc"
    "${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_layer.cc"
@ -44,7 +51,6 @@ set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/s2edge_crossings.cc"
    "${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
    "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
-    "${S2_SOURCE_DIR}/s2/s2error.cc"
    "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
    "${S2_SOURCE_DIR}/s2/s2latlng.cc"
    "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
@ -55,6 +61,7 @@ set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/s2loop.cc"
    "${S2_SOURCE_DIR}/s2/s2loop_measures.cc"
    "${S2_SOURCE_DIR}/s2/s2measures.cc"
+    "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc"
    "${S2_SOURCE_DIR}/s2/s2metrics.cc"
    "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc"
    "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc"
@ -82,28 +89,15 @@ set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
+    "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
    "${S2_SOURCE_DIR}/s2/s2text_format.cc"
    "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
-    "${S2_SOURCE_DIR}/s2/strings/ostringstream.cc"
+    "${S2_SOURCE_DIR}/s2/s2winding_operation.cc"
    "${S2_SOURCE_DIR}/s2/strings/serialize.cc"
-    # ClickHouse doesn't use strings from abseil.
-    # So, there is no duplicate symbols.
-    "${S2_SOURCE_DIR}/s2/third_party/absl/base/dynamic_annotations.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/raw_logging.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/throw_delegate.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/numeric/int128.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/ascii.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/match.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/numbers.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_cat.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_split.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/string_view.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/strip.cc"
-    "${S2_SOURCE_DIR}/s2/third_party/absl/strings/internal/memutil.cc"
    "${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc"
    "${S2_SOURCE_DIR}/s2/util/bits/bits.cc"
    "${S2_SOURCE_DIR}/s2/util/coding/coder.cc"
@ -111,17 +105,41 @@ set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc"
    "${S2_SOURCE_DIR}/s2/util/math/mathutil.cc"
    "${S2_SOURCE_DIR}/s2/util/units/length-units.cc"
+
 )

 add_library(s2 ${S2_SRCS})
-
-set_property(TARGET s2 PROPERTY CXX_STANDARD 11)
+set_property(TARGET s2 PROPERTY CXX_STANDARD 17)

 if (OPENSSL_FOUND)
    target_link_libraries(s2 PRIVATE ${OPENSSL_LIBRARIES})
 endif()

+# Copied from contrib/s2geometry/CMakeLists
+target_link_libraries(s2 PRIVATE
+        absl::base
+        absl::btree
+        absl::config
+        absl::core_headers
+        absl::dynamic_annotations
+        absl::endian
+        absl::fixed_array
+        absl::flat_hash_map
+        absl::flat_hash_set
+        absl::hash
+        absl::inlined_vector
+        absl::int128
+        absl::log_severity
+        absl::memory
+        absl::span
+        absl::str_format
+        absl::strings
+        absl::type_traits
+        absl::utility
+        )
+
 target_include_directories(s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")
+target_include_directories(s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}")

 if(M_LIBRARY)
    target_link_libraries(s2 PRIVATE ${M_LIBRARY})
--- a/contrib/sysroot
+++ b/contrib/sysroot
@ -1 +1 @@
-Subproject commit 410845187f582c5e6692b53dddbe43efbb728734
+Subproject commit bbcac834526d90d1e764164b861be426891d1743
--- a/contrib/unixodbc-cmake/linux_x86_64/private/config.h
+++ b/contrib/unixodbc-cmake/linux_x86_64/private/config.h
@ -202,10 +202,10 @@
 #define HAVE_READDIR 1

 /* Add readline support */
-#define HAVE_READLINE 1
+/* #undef HAVE_READLINE */

 /* Define to 1 if you have the <readline/history.h> header file. */
-#define HAVE_READLINE_HISTORY_H 1
+/* #undef HAVE_READLINE_HISTORY_H */

 /* Use the scandir lib */
 /* #undef HAVE_SCANDIR */
--- a/debian/rules
+++ b/debian/rules
@ -45,6 +45,10 @@ ifdef DEB_CXX
 ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
        CC := $(DEB_CC)
        CXX := $(DEB_CXX)
+else ifeq (clang,$(findstring clang,$(DEB_CXX)))
+# If we crosscompile with clang, it knows what to do
+        CC := $(DEB_CC)
+        CXX := $(DEB_CXX)
 else
        CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
        CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
@ -77,10 +81,6 @@ else
    THREADS_COUNT = 1
 endif

-ifneq ($(THREADS_COUNT),)
-    THREADS_COUNT:=-j$(THREADS_COUNT)
-endif
-
 %:
 	dh $@ $(DH_FLAGS) --buildsystem=cmake

@ -89,11 +89,11 @@ override_dh_auto_configure:

 override_dh_auto_build:
 	# Fix for ninja. Do not add -O.
-	$(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
+	$(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)

 override_dh_auto_test:
 ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V
+	cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V
 endif

 override_dh_clean:
@ -120,7 +120,7 @@ override_dh_install:
 	dh_install --list-missing --sourcedir=$(DESTDIR)

 override_dh_auto_install:
-	env DESTDIR=$(DESTDIR) $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) install
+	env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install

 override_dh_shlibdeps:
 	true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency.
--- a/docker/images.json
+++ b/docker/images.json
@ -46,7 +46,6 @@
        "name": "clickhouse/stateless-test",
        "dependent": [
            "docker/test/stateful",
-            "docker/test/coverage",
            "docker/test/unit"
        ]
    },
@ -56,10 +55,6 @@
            "docker/test/stress"
        ]
    },
-    "docker/test/coverage": {
-        "name": "clickhouse/test-coverage",
-        "dependent": []
-    },
    "docker/test/unit": {
        "name": "clickhouse/unit-test",
        "dependent": []
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -24,41 +24,34 @@ RUN apt-get update \
    && apt-key add /tmp/llvm-snapshot.gpg.key \
    && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
-        /etc/apt/sources.list
+        /etc/apt/sources.list \
+    && apt-get clean

 # initial packages
-RUN apt-get update \
-    && apt-get install \
-        bash \
-        fakeroot \
-        ccache \
-        curl \
-        software-properties-common \
-        --yes --no-install-recommends
-
 RUN apt-get update \
    && apt-get install \
        bash \
        build-essential \
        ccache \
+        clang-${LLVM_VERSION} \
+        clang-tidy-${LLVM_VERSION} \
        cmake \
        curl \
+        fakeroot \
        gdb \
        git \
        gperf \
-        clang-${LLVM_VERSION} \
-        clang-tidy-${LLVM_VERSION} \
        lld-${LLVM_VERSION} \
        llvm-${LLVM_VERSION} \
        llvm-${LLVM_VERSION}-dev \
-        libicu-dev \
-        libreadline-dev \
        moreutils \
        ninja-build \
        pigz \
        rename \
+        software-properties-common \
        tzdata \
-        --yes --no-install-recommends
+        --yes --no-install-recommends \
+    && apt-get clean

 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
@ -67,7 +60,7 @@ ENV CC=clang-${LLVM_VERSION}
 ENV CXX=clang++-${LLVM_VERSION}

 # libtapi is required to support .tbh format from recent MacOS SDKs
-RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
+RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
    && cd apple-libtapi \
    && INSTALLPREFIX=/cctools ./build.sh \
    && ./install.sh \
@ -75,7 +68,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
    && rm -rf apple-libtapi

 # Build and install tools for cross-linking to Darwin (x86-64)
-RUN git clone https://github.com/tpoechtrager/cctools-port.git \
+RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
    && cd cctools-port/cctools \
    && ./configure --prefix=/cctools --with-libtapi=/cctools \
        --target=x86_64-apple-darwin \
@ -84,7 +77,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
    && rm -rf cctools-port

 # Build and install tools for cross-linking to Darwin (aarch64)
-RUN git clone https://github.com/tpoechtrager/cctools-port.git \
+RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
    && cd cctools-port/cctools \
    && ./configure --prefix=/cctools --with-libtapi=/cctools \
        --target=aarch64-apple-darwin \
@ -98,7 +91,8 @@ RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacO
 # NOTE: Seems like gcc-11 is too new for ubuntu20 repository
 RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
    && apt-get update \
-    && apt-get install gcc-11 g++-11 --yes
+    && apt-get install gcc-11 g++-11 --yes \
+    && apt-get clean


 COPY build.sh /
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -64,8 +64,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
    && apt-get install gcc-11 g++-11 --yes


-# This symlink required by gcc to find lld compiler
-RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
+# These symlinks are required:
+# /usr/bin/ld.lld: by gcc to find lld compiler
+# /usr/bin/aarch64-linux-gnu-obj*: for debug symbols stripping
+RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld \
+    && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-strip \
+    && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-objcopy \
+    && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objdump /usr/bin/aarch64-linux-gnu-objdump
+

 COPY build.sh /

--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -29,7 +29,13 @@ def pull_image(image_name):
        return False

 def build_image(image_name, filepath):
-    subprocess.check_call("docker build --network=host -t {} -f {} .".format(image_name, filepath), shell=True)
+    context = os.path.dirname(filepath)
+    subprocess.check_call(
+        "docker build --network=host -t {} -f {} {}".format(
+            image_name, filepath, context
+        ),
+        shell=True,
+    )

 def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version):
    env_part = " -e ".join(env_variables)
@ -90,6 +96,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
    elif is_cross_arm:
        cc = compiler[:-len(ARM_SUFFIX)]
        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
+        result.append("DEB_ARCH_FLAG=-aarm64")
    elif is_cross_freebsd:
        cc = compiler[:-len(FREEBSD_SUFFIX)]
        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake")
@ -98,6 +105,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake")
    else:
        cc = compiler
+        result.append("DEB_ARCH_FLAG=-aamd64")

    cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')

--- a/docker/server/README.md
+++ b/docker/server/README.md
@ -17,6 +17,8 @@ $ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 clic

 By default ClickHouse will be accessible only via docker network. See the [networking section below](#networking).

+By default, starting above server instance will be run as default user without password.
+
 ### connect to it from a native client
 ```bash
 $ docker run -it --rm --link some-clickhouse-server:clickhouse-server clickhouse/clickhouse-client --host clickhouse-server
--- a/docker/test/codebrowser/Dockerfile
+++ b/docker/test/codebrowser/Dockerfile
@ -6,7 +6,7 @@ FROM clickhouse/binary-builder
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

-RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev
+RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev

 # repo versions doesn't work correctly with C++17
 # also we push reports to s3, so we add index.html to subfolder urls
@ -23,12 +23,12 @@ ENV SOURCE_DIRECTORY=/repo_folder
 ENV BUILD_DIRECTORY=/build
 ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report
 ENV SHA=nosha
-ENV DATA="data"
+ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"

 CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
    cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-13 -DCMAKE_C_COMPILER=/usr/bin/clang-13 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
    mkdir -p $HTML_RESULT_DIRECTORY && \
    $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
    cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
-    $CODEINDEX $HTML_RESULT_DIRECTORY -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
+    $CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \
    mv $HTML_RESULT_DIRECTORY /test_output
--- a/docker/test/coverage/Dockerfile
+++ b/docker/test/coverage/Dockerfile
@ -1,18 +0,0 @@
-# docker build -t clickhouse/test-coverage .
-FROM clickhouse/stateless-test
-
-RUN apt-get update -y \
-    && env DEBIAN_FRONTEND=noninteractive \
-        apt-get install --yes --no-install-recommends \
-            cmake
-
-COPY s3downloader /s3downloader
-COPY run.sh /run.sh
-
-ENV DATASETS="hits visits"
-ENV COVERAGE_DIR=/coverage_reports
-ENV SOURCE_DIR=/build
-ENV OUTPUT_DIR=/output
-ENV IGNORE='.*contrib.*'
-
-CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/coverage/run.sh
+++ b/docker/test/coverage/run.sh
@ -1,112 +0,0 @@
-#!/bin/bash
-
-kill_clickhouse () {
-    echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
-    pkill -f "clickhouse-server" 2>/dev/null
-
-
-    for _ in {1..120}
-    do
-        if ! pkill -0 -f "clickhouse-server" ; then break ; fi
-        echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
-        sleep 1
-    done
-
-    if pkill -0 -f "clickhouse-server"
-    then
-        pstree -apgT
-        jobs
-        echo "Failed to kill the ClickHouse server"  | ts '%Y-%m-%d %H:%M:%S'
-        return 1
-    fi
-}
-
-start_clickhouse () {
-    LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
-    counter=0
-    until clickhouse-client --query "SELECT 1"
-    do
-        if [ "$counter" -gt 120 ]
-        then
-            echo "Cannot start clickhouse-server"
-            cat /var/log/clickhouse-server/stdout.log
-            tail -n1000 /var/log/clickhouse-server/stderr.log
-            tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
-            break
-        fi
-        sleep 0.5
-        counter=$((counter + 1))
-    done
-}
-
-
-chmod 777 /
-
-dpkg -i package_folder/clickhouse-common-static_*.deb; \
-    dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
-    dpkg -i package_folder/clickhouse-server_*.deb;  \
-    dpkg -i package_folder/clickhouse-client_*.deb; \
-    dpkg -i package_folder/clickhouse-test_*.deb
-
-mkdir -p /var/lib/clickhouse
-mkdir -p /var/log/clickhouse-server
-chmod 777 -R /var/log/clickhouse-server/
-
-# install test configs
-/usr/share/clickhouse-test/config/install.sh
-
-start_clickhouse
-
-# shellcheck disable=SC2086 # No quotes because I want to split it into words.
-if ! /s3downloader --dataset-names $DATASETS; then
-    echo "Cannot download datatsets"
-    exit 1
-fi
-
-
-chmod 777 -R /var/lib/clickhouse
-
-
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW DATABASES"
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "CREATE DATABASE test"
-
-kill_clickhouse
-start_clickhouse
-
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
-
-LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test -j 8 --testname --shard --zookeeper --print-time 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_result.txt
-
-readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "/test_result.txt")
-
-kill_clickhouse
-
-sleep 3
-
-if [[ -n "${FAILED_TESTS[*]}" ]]
-then
-    # Clean the data so that there is no interference from the previous test run.
-    rm -rf /var/lib/clickhouse/{{meta,}data,user_files} ||:
-
-    start_clickhouse
-
-    echo "Going to run again: ${FAILED_TESTS[*]}"
-
-    LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test --order=random --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_result.txt
-else
-    echo "No failed tests"
-fi
-
-mkdir -p "$COVERAGE_DIR"
-mv /*.profraw "$COVERAGE_DIR"
-
-mkdir -p "$SOURCE_DIR"/obj-x86_64-linux-gnu
-cd "$SOURCE_DIR"/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /
-llvm-profdata-11 merge -sparse "${COVERAGE_DIR}"/* -o clickhouse.profdata
-llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex "$IGNORE" > output.lcov
-genhtml output.lcov --ignore-errors source --output-directory "${OUTPUT_DIR}"
--- a/docker/test/coverage/s3downloader
+++ b/docker/test/coverage/s3downloader
@ -1,101 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import os
-import sys
-import time
-import tarfile
-import logging
-import argparse
-import requests
-import tempfile
-
-
-DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net'
-
-AVAILABLE_DATASETS = {
-    'hits': 'hits_v1.tar',
-    'visits': 'visits_v1.tar',
-}
-
-RETRIES_COUNT = 5
-
-def _get_temp_file_name():
-    return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
-
-def build_url(base_url, dataset):
-    return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
-
-def dowload_with_progress(url, path):
-    logging.info("Downloading from %s to temp path %s", url, path)
-    for i in range(RETRIES_COUNT):
-        try:
-            with open(path, 'wb') as f:
-                response = requests.get(url, stream=True)
-                response.raise_for_status()
-                total_length = response.headers.get('content-length')
-                if total_length is None or int(total_length) == 0:
-                    logging.info("No content-length, will download file without progress")
-                    f.write(response.content)
-                else:
-                    dl = 0
-                    total_length = int(total_length)
-                    logging.info("Content length is %ld bytes", total_length)
-                    for data in response.iter_content(chunk_size=4096):
-                        dl += len(data)
-                        f.write(data)
-                        if sys.stdout.isatty():
-                            done = int(50 * dl / total_length)
-                            percent = int(100 * float(dl) / total_length)
-                            sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
-                            sys.stdout.flush()
-            break
-        except Exception as ex:
-            sys.stdout.write("\n")
-            time.sleep(3)
-            logging.info("Exception while downloading %s, retry %s", ex, i + 1)
-            if os.path.exists(path):
-                os.remove(path)
-    else:
-        raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
-
-    sys.stdout.write("\n")
-    logging.info("Downloading finished")
-
-def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
-    logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
-    with tarfile.open(tar_path, 'r') as comp_file:
-        comp_file.extractall(path=clickhouse_path)
-    logging.info("Unpack finished")
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-
-    parser = argparse.ArgumentParser(
-        description="Simple tool for dowloading datasets for clickhouse from S3")
-
-    parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
-    parser.add_argument('--url-prefix', default=DEFAULT_URL)
-    parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
-
-    args = parser.parse_args()
-    datasets = args.dataset_names
-    logging.info("Will fetch following datasets: %s", ', '.join(datasets))
-    for dataset in datasets:
-        logging.info("Processing %s", dataset)
-        temp_archive_path = _get_temp_file_name()
-        try:
-            download_url_for_dataset = build_url(args.url_prefix, dataset)
-            dowload_with_progress(download_url_for_dataset, temp_archive_path)
-            unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
-        except Exception as ex:
-            logging.info("Some exception occured %s", str(ex))
-            raise
-        finally:
-            logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
-            if os.path.exists(temp_archive_path):
-                os.remove(temp_archive_path)
-        logging.info("Processing of %s finished", dataset)
-    logging.info("Fetch finished, enjoy your tables!")
-
-
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -111,19 +111,6 @@ function start_server
    fi

    echo "ClickHouse server pid '$server_pid' started and responded"
-
-    echo "
-set follow-fork-mode child
-handle all noprint
-handle SIGSEGV stop print
-handle SIGBUS stop print
-handle SIGABRT stop print
-continue
-thread apply all backtrace
-continue
-" > script.gdb
-
-    gdb -batch -command script.gdb -p "$server_pid" &
 }

 function clone_root
@ -186,6 +173,8 @@ function clone_submodules
            contrib/dragonbox
            contrib/fast_float
            contrib/NuRaft
+            contrib/jemalloc
+            contrib/replxx
        )

        git submodule sync
@ -206,6 +195,8 @@ function run_cmake
        "-DENABLE_THINLTO=0"
        "-DUSE_UNWIND=1"
        "-DENABLE_NURAFT=1"
+        "-DENABLE_JEMALLOC=1"
+        "-DENABLE_REPLXX=1"
    )

    # TODO remove this? we don't use ccache anyway. An option would be to download it
@ -266,7 +257,13 @@ function run_tests
    start_server

    set +e
-    time clickhouse-test --hung-check -j 8 --order=random \
+    local NPROC
+    NPROC=$(nproc)
+    NPROC=$((NPROC / 2))
+    if [[ $NPROC == 0 ]]; then
+      NPROC=1
+    fi
+    time clickhouse-test --hung-check -j "${NPROC}" --order=random \
            --fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \
            -- "$FASTTEST_FOCUS" 2>&1 \
        | ts '%Y-%m-%d %H:%M:%S' \
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-# shellcheck disable=SC2086,SC2001,SC2046
+# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031

 set -eux
 set -o pipefail
@ -35,7 +35,7 @@ function clone
            fi
            git diff --name-only master HEAD | tee ci-changed-files.txt
        else
-            if [ -v COMMIT_SHA ]; then
+            if [ -v SHA_TO_TEST ]; then
                git fetch --depth 2 origin "$SHA_TO_TEST"
                git checkout "$SHA_TO_TEST"
                echo "Checked out nominal SHA $SHA_TO_TEST for master"
@ -52,9 +52,21 @@ function clone

 }

+function wget_with_retry
+{
+    for _ in 1 2 3 4; do
+        if wget -nv -nd -c "$1";then
+            return 0
+        else
+            sleep 0.5
+        fi
+    done
+    return 1
+}
+
 function download
 {
-    wget -nv -nd -c "$BINARY_URL_TO_DOWNLOAD"
+    wget_with_retry "$BINARY_URL_TO_DOWNLOAD"

    chmod +x clickhouse
    ln -s ./clickhouse ./clickhouse-server
@ -155,21 +167,47 @@ function fuzz

    kill -0 $server_pid

+    # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
+    # and clickhouse-server can do fork-exec, for example, to run some bridge.
+    # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
+    # explicitly ignore non-fatal signals that are used by server.
+    # Number of SIGRTMIN can be determined only in runtime.
+    RTMIN=$(kill -l SIGRTMIN)
    echo "
-set follow-fork-mode child
-handle all noprint
-handle SIGSEGV stop print
-handle SIGBUS stop print
-continue
-thread apply all backtrace
+set follow-fork-mode parent
+handle SIGHUP nostop noprint pass
+handle SIGINT nostop noprint pass
+handle SIGQUIT nostop noprint pass
+handle SIGPIPE nostop noprint pass
+handle SIGTERM nostop noprint pass
+handle SIGUSR1 nostop noprint pass
+handle SIGUSR2 nostop noprint pass
+handle SIG$RTMIN nostop noprint pass
+info signals
 continue
+backtrace full
+info locals
+info registers
+disassemble /s
+up
+info locals
+disassemble /s
+up
+info locals
+disassemble /s
+p \"done\"
+detach
+quit
 " > script.gdb

-    gdb -batch -command script.gdb -p $server_pid &
+    gdb -batch -command script.gdb -p $server_pid  &
+    sleep 5
+    # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
+    time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:

    # Check connectivity after we attach gdb, because it might cause the server
-    # to freeze and the fuzzer will fail.
-    for _ in {1..60}
+    # to freeze and the fuzzer will fail. In debug build it can take a lot of time.
+    for _ in {1..180}
    do
        sleep 1
        if clickhouse-client --query "select 1"
@ -189,6 +227,7 @@ continue
        --receive_data_timeout_ms=10000 \
        --stacktrace \
        --query-fuzzer-runs=1000 \
+        --testmode \
        --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
        $NEW_TESTS_OPT \
        > >(tail -n 100000 > fuzzer.log) \
--- a/docker/test/integration/base/Dockerfile
+++ b/docker/test/integration/base/Dockerfile
@ -7,7 +7,6 @@ RUN apt-get update \
    && env DEBIAN_FRONTEND=noninteractive apt-get -y install \
        tzdata \
        python3 \
-        libreadline-dev \
        libicu-dev \
        bsdutils \
        gdb \
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -21,7 +21,6 @@ RUN apt-get update \
    cgroupfs-mount \
    python3-pip \
    tzdata \
-    libreadline-dev \
    libicu-dev \
    bsdutils \
    curl \
@ -73,11 +72,13 @@ RUN python3 -m pip install \
    grpcio-tools \
    kafka-python \
    kazoo \
+    lz4 \
    minio \
    protobuf \
    psycopg2-binary==2.8.6 \
-    pymongo \
+    pymongo==3.11.0 \
    pytest \
+    pytest-order==1.0.0 \
    pytest-timeout \
    pytest-xdist \
    pytest-repeat \
@ -86,7 +87,8 @@ RUN python3 -m pip install \
    tzlocal==2.1 \
    urllib3 \
    requests-kerberos \
-    pyhdfs
+    pyhdfs \
+    azure-storage-blob

 COPY modprobe.sh /usr/local/bin/modprobe
 COPY dockerd-entrypoint.sh /usr/local/bin/
--- a/docker/test/integration/runner/compose/docker_compose_azurite.yml
+++ b/docker/test/integration/runner/compose/docker_compose_azurite.yml
@ -0,0 +1,13 @@
+version: '2.3'
+
+services:
+  azurite1:
+    image: mcr.microsoft.com/azure-storage/azurite
+    ports:
+      - "10000:10000"
+    volumes:
+      - data1-1:/data1
+    command: azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log
+
+volumes:
+  data1-1:
--- a/docker/test/integration/runner/compose/docker_compose_mongo.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml
@ -1,7 +1,7 @@
 version: '2.3'
 services:
    mongo1:
-        image: mongo:3.6
+        image: mongo:5.0
        restart: always
        environment:
            MONGO_INITDB_ROOT_USERNAME: root
@ -9,3 +9,9 @@ services:
        ports:
            - ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT}
        command: --profile=2 --verbose
+
+    mongo2:
+        image: mongo:5.0
+        restart: always
+        ports:
+            - "27018:27017"
--- a/docker/test/integration/runner/dockerd-entrypoint.sh
+++ b/docker/test/integration/runner/dockerd-entrypoint.sh
@ -8,8 +8,8 @@ echo '{
    "ip-forward": true,
    "log-level": "debug",
    "storage-driver": "overlay2",
-    "insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
-    "registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"]
+    "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
+    "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
 }' | dd of=/etc/docker/daemon.json 2>/dev/null

 dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log &
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -193,7 +193,7 @@ function run_tests
    then
        # Run only explicitly specified tests, if any.
        # shellcheck disable=SC2010
-        test_files=$(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}")
+        test_files=($(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}"))
    elif [ "$PR_TO_TEST" -ne 0 ] \
        && [ "$(wc -l < changed-test-definitions.txt)" -gt 0 ] \
        && [ "$(wc -l < other-changed-files.txt)" -eq 0 ]
@ -201,10 +201,26 @@ function run_tests
        # If only the perf tests were changed in the PR, we will run only these
        # tests. The lists of changed files are prepared in entrypoint.sh because
        # it has the repository.
-        test_files=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt)
+        test_files=($(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt))
    else
        # The default -- run all tests found in the test dir.
-        test_files=$(ls "$test_prefix"/*.xml)
+        test_files=($(ls "$test_prefix"/*.xml))
+    fi
+
+    # We split perf tests into multiple checks to make them faster
+    if [ -v CHPC_TEST_RUN_BY_HASH_TOTAL ]; then
+        # filter tests array in bash https://stackoverflow.com/a/40375567
+        for index in "${!test_files[@]}"; do
+            # sorry for this, just calculating hash(test_name) % total_tests_group == my_test_group_num
+            test_hash_result=$(echo test_files[$index] | perl -ne 'use Digest::MD5 qw(md5); print unpack('Q', md5($_)) % $ENV{CHPC_TEST_RUN_BY_HASH_TOTAL} == $ENV{CHPC_TEST_RUN_BY_HASH_NUM};')
+            # BTW, for some reason when hash(test_name) % total_tests_group != my_test_group_num perl outputs nothing, not zero
+            if [ "$test_hash_result" != "1" ]; then
+                # deleting element from array
+                unset -v 'test_files[$index]'
+            fi
+        done
+        # to have sequential indexes...
+        test_files=("${test_files[@]}")
    fi

    # For PRs w/o changes in test definitons, test only a subset of queries,
@ -212,21 +228,26 @@ function run_tests
    # already set, keep those values.
    #
    # NOTE: too high CHPC_RUNS/CHPC_MAX_QUERIES may hit internal CI timeout.
-    if [ "$PR_TO_TEST" -ne 0 ] && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ]
-    then
-        CHPC_RUNS=${CHPC_RUNS:-7}
-        CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
-    else
-        CHPC_RUNS=${CHPC_RUNS:-13}
-        CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
-    fi
+    # NOTE: Currently we disabled complete run even for master branch
+    #if [ "$PR_TO_TEST" -ne 0 ] && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ]
+    #then
+    #    CHPC_RUNS=${CHPC_RUNS:-7}
+    #    CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
+    #else
+    #    CHPC_RUNS=${CHPC_RUNS:-13}
+    #    CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
+    #fi
+
+    CHPC_RUNS=${CHPC_RUNS:-7}
+    CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
+
    export CHPC_RUNS
    export CHPC_MAX_QUERIES

    # Determine which concurrent benchmarks to run. For now, the only test
    # we run as a concurrent benchmark is 'website'. Run it as benchmark if we
    # are also going to run it as a normal test.
-    for test in $test_files; do echo "$test"; done | sed -n '/website/p' > benchmarks-to-run.txt
+    for test in ${test_files[@]}; do echo "$test"; done | sed -n '/website/p' > benchmarks-to-run.txt

    # Delete old report files.
    for x in {test-times,wall-clock-times}.tsv
@ -235,8 +256,8 @@ function run_tests
        touch "$x"
    done

-    # Randomize test order.
-    test_files=$(for f in $test_files; do echo "$f"; done | sort -R)
+    # Randomize test order. BTW, it's not an array no more.
+    test_files=$(for f in ${test_files[@]}; do echo "$f"; done | sort -R)

    # Limit profiling time to 10 minutes, not to run for too long.
    profile_seconds_left=600
@ -261,16 +282,24 @@ function run_tests
        # Use awk because bash doesn't support floating point arithmetic.
        profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }")

-        TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
-        # The grep is to filter out set -x output and keep only time output.
-        # The '2>&1 >/dev/null' redirects stderr to stdout, and discards stdout.
-        { \
-            time "$script_dir/perf.py" --host localhost localhost --port $LEFT_SERVER_PORT $RIGHT_SERVER_PORT \
-                --runs "$CHPC_RUNS" --max-queries "$CHPC_MAX_QUERIES" \
-                --profile-seconds "$profile_seconds" \
-                -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \
-        } 2>&1 >/dev/null | tee >(grep -v ^+ >> "wall-clock-times.tsv") \
-            || echo "Test $test_name failed with error code $?" >> "$test_name-err.log"
+        (
+            set +x
+            argv=(
+                --host localhost localhost
+                --port "$LEFT_SERVER_PORT" "$RIGHT_SERVER_PORT"
+                --runs "$CHPC_RUNS"
+                --max-queries "$CHPC_MAX_QUERIES"
+                --profile-seconds "$profile_seconds"
+
+                "$test"
+            )
+            TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
+            # one more subshell to suppress trace output for "set +x"
+            (
+                time "$script_dir/perf.py" "${argv[@]}" > "$test_name-raw.tsv" 2> "$test_name-err.log"
+            ) 2>>wall-clock-times.tsv >/dev/null \
+                || echo "Test $test_name failed with error code $?" >> "$test_name-err.log"
+        ) 2>/dev/null

        profile_seconds_left=$(awk -F'	' \
            'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \
@ -278,8 +307,6 @@ function run_tests
        current_test=$((current_test + 1))
    done

-    unset TIMEFORMAT
-
    wait
 }

@ -291,7 +318,7 @@ function get_profiles_watchdog

    for pid in $(pgrep -f clickhouse)
    do
-        gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
+        sudo gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
    done
    wait

@ -518,7 +545,9 @@ unset IFS
 # all nodes.
 numactl --show
 numactl --cpunodebind=all --membind=all numactl --show
-numactl --cpunodebind=all --membind=all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
+# Use less jobs to avoid OOM. Some queries can consume 8+ GB of memory.
+jobs_count=$(($(grep -c ^processor /proc/cpuinfo) / 3))
+numactl --cpunodebind=all --membind=all parallel --jobs  $jobs_count --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log

 clickhouse-local --query "
 -- Join the metric names back to the metric statistics we've calculated, and make
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -16,16 +16,28 @@ right_sha=$4
 datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}

 declare -A dataset_paths
-dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
-dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
-dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
-dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
+if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
+    dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
+    dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
+    dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
+    dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
+else
+    dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
+    dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
+    dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
+    dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
+fi
+

 function download
 {
    # Historically there were various paths for the performance test package.
    # Test all of them.
-    for path in "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/"{,clickhouse_build_check/}"performance/performance.tgz"
+    declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz"
+                            "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz"
+                           )
+
+    for path in "${urls_to_try[@]}"
    do
        if curl --fail --head "$path"
        then
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -4,6 +4,27 @@ set -ex
 CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
 export CHPC_CHECK_START_TIMESTAMP

+S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"}
+
+COMMON_BUILD_PREFIX="/clickhouse_build_check"
+if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
+    COMMON_BUILD_PREFIX=""
+fi
+
+# Sometimes AWS responde with DNS error and it's impossible to retry it with
+# current curl version options.
+function curl_with_retry
+{
+    for _ in 1 2 3 4; do
+        if curl --fail --head "$1";then
+            return 0
+        else
+            sleep 0.5
+        fi
+    done
+    return 1
+}
+
 # Use the packaged repository to find the revision we will compare to.
 function find_reference_sha
 {
@ -43,9 +64,12 @@ function find_reference_sha
        # Historically there were various path for the performance test package,
        # test all of them.
        unset found
-        for path in "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/"{,clickhouse_build_check/}"performance/performance.tgz"
+        declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz"
+                                "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz"
+                               )
+        for path in "${urls_to_try[@]}"
        do
-            if curl --fail --head "$path"
+            if curl_with_retry "$path"
            then
                found="$path"
                break
@ -65,14 +89,11 @@ chmod 777 workspace output

 cd workspace

-# Download the package for the version we are going to test
-for path in "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/"{,clickhouse_build_check/}"performance/performance.tgz"
-do
-    if curl --fail --head "$path"
-    then
-        right_path="$path"
-    fi
-done
+# Download the package for the version we are going to test.
+if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
+then
+    right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
+fi

 mkdir right
 wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -45,6 +45,7 @@ parser.add_argument('--runs', type=int, default=1, help='Number of query runs pe
 parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
 parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
 parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
+parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
 parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
 parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
 parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
@ -284,7 +285,7 @@ for query_index in queries_to_run:
                #   it makes the results unstable.
                res = c.execute(q, query_id = prewarm_id,
                    settings = {
-                        'max_execution_time': args.max_query_seconds,
+                        'max_execution_time': args.prewarm_max_query_seconds,
                        'query_profiler_real_time_period_ns': 10000000,
                        'memory_profiler_step': '4Mi',
                    })
@ -354,11 +355,9 @@ for query_index in queries_to_run:
            print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')

            if elapsed > args.max_query_seconds:
-                # Stop processing pathologically slow queries, to avoid timing out
-                # the entire test task. This shouldn't really happen, so we don't
-                # need much handling for this case and can just exit.
+                # Do not stop processing pathologically slow queries,
+                # since this may hide errors in other queries.
                print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
-                exit(2)

        # Be careful with the counter, after this line it's the next iteration
        # already.
--- a/docker/test/pvs/Dockerfile
+++ b/docker/test/pvs/Dockerfile
@ -42,7 +42,7 @@ ENV CCACHE_DIR=/test_output/ccache
 CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic  \
    && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"DISABLE_HERMETIC_BUILD"=ON -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \
    && ninja re2_st clickhouse_grpc_protos \
-    && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
+    && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j "$(nproc)" -l ./licence.lic; \
    cp /repo_folder/pvs-studio.log /test_output; \
    plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \
    plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -123,7 +123,12 @@ function run_tests()
 export -f run_tests
 timeout "$MAX_RUN_TIME" bash -c run_tests ||:

-./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
+echo "Files in current directory"
+ls -la ./
+echo "Files in root directory"
+ls -la /
+
+/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

 grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:

--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -49,7 +49,6 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 ENV NUM_TRIES=1
 ENV MAX_RUN_TIME=0

-
 # Download Minio-related binaries
 RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \
    && chmod +x ./minio \
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -96,6 +96,13 @@ function run_tests()
        ADDITIONAL_OPTIONS+=('8')
    fi

+    if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then
+        ADDITIONAL_OPTIONS+=('--run-by-hash-num')
+        ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM")
+        ADDITIONAL_OPTIONS+=('--run-by-hash-total')
+        ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL")
+    fi
+
    set +e
    clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
            --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
@ -108,7 +115,12 @@ export -f run_tests

 timeout "$MAX_RUN_TIME" bash -c run_tests ||:

-./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
+echo "Files in current directory"
+ls -la ./
+echo "Files in root directory"
+ls -la /
+
+/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

 clickhouse-client -q "system flush logs" ||:

--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -1,6 +1,7 @@
 #!/bin/bash
 # shellcheck disable=SC2094
 # shellcheck disable=SC2086
+# shellcheck disable=SC2024

 set -x

@ -127,14 +128,35 @@ function start()
        counter=$((counter + 1))
    done

+    # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
+    # and clickhouse-server can do fork-exec, for example, to run some bridge.
+    # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
+    # explicitly ignore non-fatal signals that are used by server.
+    # Number of SIGRTMIN can be determined only in runtime.
+    RTMIN=$(kill -l SIGRTMIN)
    echo "
-set follow-fork-mode child
-handle all noprint
-handle SIGSEGV stop print
-handle SIGBUS stop print
-handle SIGABRT stop print
+set follow-fork-mode parent
+handle SIGHUP nostop noprint pass
+handle SIGINT nostop noprint pass
+handle SIGQUIT nostop noprint pass
+handle SIGPIPE nostop noprint pass
+handle SIGTERM nostop noprint pass
+handle SIGUSR1 nostop noprint pass
+handle SIGUSR2 nostop noprint pass
+handle SIG$RTMIN nostop noprint pass
+info signals
 continue
-thread apply all backtrace
+backtrace full
+info locals
+info registers
+disassemble /s
+up
+info locals
+disassemble /s
+up
+info locals
+disassemble /s
+p \"done\"
 detach
 quit
 " > script.gdb
@ -142,7 +164,10 @@ quit
    # FIXME Hung check may work incorrectly because of attached gdb
    # 1. False positives are possible
    # 2. We cannot attach another gdb to get stacktraces if some queries hung
-    gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log &
+    gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
+    sleep 5
+    # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
+    time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
 }

 configure
@ -213,6 +238,9 @@ zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/n
 zgrep -Fa "########################################" /test_output/* > /dev/null \
    && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv

+zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
+    && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
+
 # Put logs into /test_output/
 for log_file in /var/log/clickhouse-server/clickhouse-server.log*
 do
--- a/docker/test/testflows/runner/Dockerfile
+++ b/docker/test/testflows/runner/Dockerfile
@ -21,7 +21,6 @@ RUN apt-get update \
    cgroupfs-mount \
    python3-pip \
    tzdata \
-    libreadline-dev \
    libicu-dev \
    bsdutils \
    curl \
--- a/docker/test/testflows/runner/dockerd-entrypoint.sh
+++ b/docker/test/testflows/runner/dockerd-entrypoint.sh
@ -5,8 +5,8 @@ echo "Configure to use Yandex dockerhub-proxy"
 mkdir -p /etc/docker/
 cat > /etc/docker/daemon.json << EOF
 {
-    "insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
-    "registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"]
+    "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
+    "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
 }
 EOF

--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@ -106,20 +106,20 @@ Build ClickHouse. Run ClickHouse from the terminal: change directory to `program

 Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`.

-Alternatively you can install ClickHouse package: either stable release from Yandex repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo service clickhouse-server start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.
+Alternatively you can install ClickHouse package: either stable release from ClickHouse repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo clickhouse start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.

 When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary:

 ``` bash
-$ sudo service clickhouse-server stop
+$ sudo clickhouse stop
 $ sudo cp ./clickhouse /usr/bin/
-$ sudo service clickhouse-server start
+$ sudo clickhouse start
 ```

 Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal:

 ``` bash
-$ sudo service clickhouse-server stop
+$ sudo clickhouse stop
 $ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
 ```

@ -257,9 +257,9 @@ There are five variants (Debug, ASan, TSan, MSan, UBSan).

 Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases.

-## Security Audit {#security-audit}
+## Security Audit

-People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.
+People from Yandex Security Team did some basic overview of ClickHouse capabilities from the security standpoint.

 ## Static Analyzers {#static-analyzers}

@ -326,15 +326,11 @@ There is automated check for flaky tests. It runs all new tests 100 times (for f

 ## Testflows

-[Testflows](https://testflows.com/) is an enterprise-grade testing framework. It is used by Altinity for some of the tests and we run these tests in our CI.
-
-## Yandex Checks (only for Yandex employees)
-
-These checks are importing ClickHouse code into Yandex internal monorepository, so ClickHouse codebase can be used as a library by other products at Yandex (YT and YDB). Note that clickhouse-server itself is not being build from internal repo and unmodified open-source build is used for Yandex applications.
+[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse.

 ## Test Automation {#test-automation}

-We run tests with Yandex internal CI and job automation system named “Sandbox”.
+We run tests with [GitHub Actions](https://github.com/features/actions).

 Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.

--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@ -17,6 +17,7 @@ ClickHouse server works as MySQL replica. It reads binlog and performs DDL and D
 ``` sql
 CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
 ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
+[TABLE OVERRIDE table1 (...), TABLE OVERRIDE table2 (...)]
 ```

 **Engine Parameters**
@ -82,6 +83,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
 | VARCHAR, VAR_STRING     | [String](../../sql-reference/data-types/string.md)           |
 | BLOB                    | [String](../../sql-reference/data-types/string.md)           |
 | BINARY                  | [FixedString](../../sql-reference/data-types/fixedstring.md) |
+| BIT                     | [UInt64](../../sql-reference/data-types/int-uint.md)         |

 [Nullable](../../sql-reference/data-types/nullable.md) is supported.

@ -109,15 +111,19 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([

 - MySQL `DELETE` query is converted into `INSERT` with `_sign=-1`.

- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1`.
+- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1` if the primary key has been changed, or
+  `INSERT` with `_sign=1` if not.

 ### Selecting from MaterializedMySQL Tables {#select}

 `SELECT` query from `MaterializedMySQL` tables has some specifics:

- If `_version` is not specified in the `SELECT` query, [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used. So only rows with `MAX(_version)` are selected.
+- If `_version` is not specified in the `SELECT` query, the
+  [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used, so only rows with
+  `MAX(_version)` are returned for each primary key value.

- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not included into the result set.
+- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not
+  included into the result set.

 - The result includes columns comments in case they exist in MySQL database tables.

@ -125,15 +131,95 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([

 MySQL `PRIMARY KEY` and `INDEX` clauses are converted into `ORDER BY` tuples in ClickHouse tables.

-ClickHouse has only one physical order, which is determined by `ORDER BY` clause. To create a new physical order, use [materialized views](../../sql-reference/statements/create/view.md#materialized).
+ClickHouse has only one physical order, which is determined by `ORDER BY` clause. To create a new physical order, use
+[materialized views](../../sql-reference/statements/create/view.md#materialized).

 **Notes**

 - Rows with `_sign=-1` are not deleted physically from the tables.
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializedMySQL` engine.
+- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializedMySQL` engine, as they are not visible in the
+  MySQL binlog.
 - Replication can be easily broken.
 - Manual operations on database and tables are forbidden.
- `MaterializedMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializedMySQL` database when a table in the MySQL server changes.
+- `MaterializedMySQL` is affected by the [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert)
+  setting. Data is merged in the corresponding table in the `MaterializedMySQL` database when a table in the MySQL
+  server changes.
+
+### Table Overrides {#table-overrides}
+
+Table overrides can be used to customize the ClickHouse DDL queries, allowing you to make schema optimizations for your
+application. This is especially useful for controlling partitioning, which is important for the overall performance of
+MaterializedMySQL.
+
+These are the schema conversion manipulations you can do with table overrides for MaterializedMySQL:
+
+ * Modify column type. Must be compatible with the original type, or replication will fail. For example,
+   you can modify a UInt32 column to UInt64, but you can not modify a String column to Array(String).
+ * Modify [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl).
+ * Modify [column compression codec](../../sql-reference/statements/create/table/#codecs).
+ * Add [ALIAS columns](../../sql-reference/statements/create/table/#alias).
+ * Add [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes)
+ * Add [projections](../table-engines/mergetree-family/mergetree/#projections). Note that projection optimizations are
+   disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here.
+   `INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)
+   may be more useful in this case.
+ * Modify [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/)
+ * Modify [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+ * Modify [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+ * Add [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+ * Add [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses)
+
+```sql
+CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
+[SETTINGS ...]
+[TABLE OVERRIDE table_name (
+    [COLUMNS (
+        [col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...]
+        [INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...]
+        [PROJECTION projection_name (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]), ...]
+    )]
+    [ORDER BY expr]
+    [PRIMARY KEY expr]
+    [PARTITION BY expr]
+    [SAMPLE BY expr]
+    [TTL expr]
+), ...]
+```
+
+Example:
+
+```sql
+CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
+TABLE OVERRIDE table1 (
+    COLUMNS (
+        userid UUID,
+        category LowCardinality(String),
+        timestamp DateTime CODEC(Delta, Default)
+    )
+    PARTITION BY toYear(timestamp)
+),
+TABLE OVERRIDE table2 (
+    COLUMNS (
+        client_ip String TTL created + INTERVAL 72 HOUR
+    )
+    SAMPLE BY ip_hash
+)
+```
+
+The `COLUMNS` list is sparse; existing columns are modified as specified, extra ALIAS columns are added. It is not
+possible to add ordinary or MATERIALIZED columns.  Modified columns with a different type must be assignable from the
+original type. There is currently no validation of this or similar issues when the `CREATE DATABASE` query executes, so
+extra care needs to be taken.
+
+You may specify overrides for tables that do not exist yet.
+
+!!! warning "Warning"
+    It is easy to break replication with table overrides if not used with care. For example:
+    
+    * If an ALIAS column is added with a table override, and a column with the same name is later added to the source
+	    MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops.
+    * It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in
+      `ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop.

 ## Examples of Use {#examples-of-use}

@ -150,11 +236,9 @@ mysql> SELECT * FROM test;
 ```

 ```text
-+---+------+------+
-| a |    b |    c |
-+---+------+------+
-| 2 |  222 | Wow! |
-+---+------+------+
+┌─a─┬───b─┬─c────┐
+│ 2 │ 222 │ Wow! │
+└───┴─────┴──────┘
 ```

 Database in ClickHouse, exchanging data with the MySQL server:
--- a/docs/en/engines/database-engines/materialized-postgresql.md
+++ b/docs/en/engines/database-engines/materialized-postgresql.md
@ -5,15 +5,15 @@ toc_title: MaterializedPostgreSQL

 # [experimental] MaterializedPostgreSQL {#materialize-postgresql}

-Creates ClickHouse database with an initial data dump of PostgreSQL database tables and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL database tables in the remote PostgreSQL database.
+Creates a ClickHouse database with tables from PostgreSQL database. Firstly, database with engine `MaterializedPostgreSQL` creates a snapshot of PostgreSQL database and loads required tables. Required tables can include any subset of tables from any subset of schemas from specified database. Along with the snapshot database engine acquires LSN and once initial dump of tables is performed - it starts pulling updates from WAL. After database is created, newly added tables to PostgreSQL database are not automatically added to replication. They have to be added manually with `ATTACH TABLE db.table` query.

-ClickHouse server works as PostgreSQL replica. It reads WAL and performs DML queries. DDL is not replicated, but can be handled (described below).
+Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on. Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).

 ## Creating a Database {#creating-a-database}

 ``` sql
 CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
-ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
+ENGINE = MaterializedPostgreSQL('host:port', 'database', 'user', 'password') [SETTINGS ...]
 ```

 **Engine Parameters**
@ -23,51 +23,39 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p
 -   `user` — PostgreSQL user.
 -   `password` — User password.

+## Example of Use {#example-of-use}
+
+``` sql
+CREATE DATABASE postgresql;
+ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password');
+
+SHOW TABLES FROM postgres_db;
+
+┌─name───┐
+│ table1 │
+└────────┘
+
+SELECT * FROM postgresql_db.postgres_table;
+```
+
 ## Dynamically adding new tables to replication {#dynamically-adding-table-to-replication}

+After `MaterializedPostgreSQL` database is created, it does not automatically detect new tables in according PostgreSQL database. Such tables can be added manually:
+
 ``` sql
 ATTACH TABLE postgres_database.new_table;
 ```

-When specifying a specific list of tables in the database using the setting [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list), it will be updated to the current state, taking into account the tables which were added by the `ATTACH TABLE` query.
+Warning: before version 21.13 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 21.13, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 21.13.

 ## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}

+It is possible to remove specific tables from replication:
+
 ``` sql
 DETACH TABLE postgres_database.table_to_remove;
 ```

-## Settings {#settings}
-
-   [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list)
-
-   [materialized_postgresql_schema](../../operations/settings/settings.md#materialized-postgresql-schema)
-
-   [materialized_postgresql_schema_list](../../operations/settings/settings.md#materialized-postgresql-schema-list)
-
-   [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update)
-
-   [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
-
-   [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot)
-
-   [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot)
-
-``` sql
-CREATE DATABASE database1
-ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
-SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3';
-
-SELECT * FROM database1.table1;
-```
-
-The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query.
-
-``` sql
-ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
-```
-
-
 ## PostgreSQL schema {#schema}

 PostgreSQL [schema](https://www.postgresql.org/docs/9.1/ddl-schemas.html) can be configured in 3 ways (starting from version 21.12).
@ -150,13 +138,63 @@ WHERE oid = 'postgres_table'::regclass;
 !!! warning "Warning"
    Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.

-## Example of Use {#example-of-use}
+## Settings {#settings}
+
+1. materialized_postgresql_tables_list {#materialized-postgresql-tables-list}
+
+Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine.
+
+Default value: empty list — means whole PostgreSQL database will be replicated.
+
+2. materialized_postgresql_schema {#materialized-postgresql-schema}
+
+Default value: empty string. (Default schema is used)
+
+3. materialized_postgresql_schema_list {#materialized-postgresql-schema-list}
+
+Default value: empty list. (Default schema is used)
+
+4. materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
+
+Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
+
+Possible values:
+
+-   0 — The table is not automatically updated in the background, when schema changes are detected.
+-   1 — The table is automatically updated in the background, when schema changes are detected.
+
+Default value: `0`.
+
+5. materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size}
+
+Sets the number of rows collected in memory before flushing data into PostgreSQL database table.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `65536`.
+
+6. materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot}
+
+A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`.
+
+7. materialized_postgresql_snapshot {#materialized-postgresql-snapshot}
+
+A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`.

 ``` sql
-CREATE DATABASE postgresql_db
-ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password');
+CREATE DATABASE database1
+ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
+SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3';

-SELECT * FROM postgresql_db.postgres_table;
+SELECT * FROM database1.table1;
+```
+
+The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query.
+
+``` sql
+ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
 ```

 ## Notes {#notes}
@ -165,11 +203,11 @@ SELECT * FROM postgresql_db.postgres_table;

 Logical Replication Slots which exist on the primary are not available on standby replicas.
 So if there is a failover, new primary (the old physical standby) won’t be aware of any slots which were existing with old primary. This will lead to a broken replication from PostgreSQL.
-A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot) setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot) setting.
+A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via `materialized_postgresql_replication_slot` setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via `materialized_postgresql_snapshot` setting.

 Please note that this should be used only if it is actually needed. If there is no real need for that or full understanding why, then it is better to allow the table engine to create and manage its own replication slot.

-**Example (from [@bchrobot](https://github.com/bchrobot))** 
+**Example (from [@bchrobot](https://github.com/bchrobot))**

 1. Configure replication slot in PostgreSQL.

@ -214,3 +252,23 @@ SETTINGS
 ```bash
 kubectl exec acid-demo-cluster-0 -c postgres -- su postgres -c 'patronictl failover --candidate acid-demo-cluster-1 --force'
 ```
+
+### Required permissions
+
+1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
+
+2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
+
+3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.
+
+4. [DROP PUBLICATION](https://postgrespro.ru/docs/postgresql/10/sql-droppublication) -- owner of publication (`username` in MaterializedPostgreSQL engine itself).
+
+It is possible to avoid executing `2` and `3` commands and having those permissions. Use settings `materialized_postgresql_replication_slot` and `materialized_postgresql_snapshot`. But with much care.
+
+Access to tables:
+
+1. pg_publication
+
+2. pg_replication_slots
+
+3. pg_publication_tables
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@ -5,8 +5,7 @@ toc_title: HDFS

 # HDFS {#table_engines-hdfs}

-This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar
-to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
+This engine provides integration with the [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.

 ## Usage {#usage}

@ -14,12 +13,13 @@ to the [File](../../../engines/table-engines/special/file.md#table_engines-file)
 ENGINE = HDFS(URI, format)
 ```

-The `URI` parameter is the whole file URI in HDFS.
-The `format` parameter specifies one of the available file formats. To perform
+**Engine Parameters**
+
+- `URI` - whole file URI in HDFS. The path part of `URI` may contain globs. In this case the table would be readonly.
+-  `format` - specifies one of the available file formats. To perform
 `SELECT` queries, the format must be supported for input, and to perform
 `INSERT` queries – for output. The available formats are listed in the
 [Formats](../../../interfaces/formats.md#formats) section.
-The path part of `URI` may contain globs. In this case the table would be readonly.

 **Example:**

@ -71,12 +71,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table

 1.  Suppose we have several files in TSV format with the following URIs on HDFS:

-   'hdfs://hdfs1:9000/some_dir/some_file_1'
-   'hdfs://hdfs1:9000/some_dir/some_file_2'
-   'hdfs://hdfs1:9000/some_dir/some_file_3'
-   'hdfs://hdfs1:9000/another_dir/some_file_1'
-   'hdfs://hdfs1:9000/another_dir/some_file_2'
-   'hdfs://hdfs1:9000/another_dir/some_file_3'
+    -  'hdfs://hdfs1:9000/some_dir/some_file_1'
+    -  'hdfs://hdfs1:9000/some_dir/some_file_2'
+    -  'hdfs://hdfs1:9000/some_dir/some_file_3'
+    -  'hdfs://hdfs1:9000/another_dir/some_file_1'
+    -  'hdfs://hdfs1:9000/another_dir/some_file_2'
+    -  'hdfs://hdfs1:9000/another_dir/some_file_3'

 1.  There are several ways to make a table consisting of all six files:

@ -132,6 +132,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us


 | **parameter**                                         | **default value**       |
+| -                                                     | -                       |
 | rpc\_client\_connect\_tcpnodelay                      | true                    |
 | dfs\_client\_read\_shortcircuit                       | true                    |
 | output\_replace-datanode-on-failure                   | true                    |
@ -181,25 +182,26 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
 #### ClickHouse extras {#clickhouse-extras}

 | **parameter**                                         | **default value**       |
+| -                                                     | -                       |
 |hadoop\_kerberos\_keytab                               | ""                      |
 |hadoop\_kerberos\_principal                            | ""                      |
 |hadoop\_kerberos\_kinit\_command                       | kinit                   |
 |libhdfs3\_conf                                         | ""                      |

 ### Limitations {#limitations}
-  * hadoop\_security\_kerberos\_ticket\_cache\_path and libhdfs3\_conf can be global only, not user specific
+* `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific

 ## Kerberos support {#kerberos-support}

-If hadoop\_security\_authentication parameter has value 'kerberos', ClickHouse authentifies via Kerberos facility.
-Parameters [here](#clickhouse-extras) and hadoop\_security\_kerberos\_ticket\_cache\_path may be of help.
+If the `hadoop_security_authentication` parameter has the value `kerberos`, ClickHouse authenticates via Kerberos.
+Parameters are [here](#clickhouse-extras) and `hadoop_security_kerberos_ticket_cache_path` may be of help.
 Note that due to libhdfs3 limitations only old-fashioned approach is supported,
-datanode communications are not secured by SASL (HADOOP\_SECURE\_DN\_USER is a reliable indicator of such
-security approach). Use tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh for reference.
+datanode communications are not secured by SASL (`HADOOP_SECURE_DN_USER` is a reliable indicator of such
+security approach). Use `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` for reference.

-If hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal or hadoop\_kerberos\_kinit\_command is specified, kinit will be invoked. hadoop\_kerberos\_keytab and hadoop\_kerberos\_principal are mandatory in this case. kinit tool and krb5 configuration files are required.
+If `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` or `hadoop_kerberos_kinit_command` is specified, `kinit` will be invoked. `hadoop_kerberos_keytab` and `hadoop_kerberos_principal` are mandatory in this case. `kinit` tool and krb5 configuration files are required.

-## HDFS Namenode HA support{#namenode-ha}
+## HDFS Namenode HA support {#namenode-ha}

 libhdfs3 support HDFS namenode HA.

--- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md
+++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
@ -7,7 +7,7 @@ toc_title: MaterializedPostgreSQL

 Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.

-If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the [materialized_postgresql_tables_list](../../../operations/settings/settings.md#materialized-postgresql-tables-list) setting, which specifies the tables to be replicated. It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
+If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.

 ## Creating a Table {#creating-a-table}

@ -38,7 +38,7 @@ PRIMARY KEY key;
 -   `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).

 -   `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values:
-    - `1` — Row is not deleted, 
+    - `1` — Row is not deleted,
    - `-1` — Row is deleted.

 These columns do not need to be added when a table is created. They are always accessible in `SELECT` query.
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@ -36,6 +36,31 @@ The table structure can differ from the original PostgreSQL table structure:
 -   `schema` — Non-default table schema. Optional.
 -   `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient.

+or via config (since version 21.11):
+
+```
+<named_collections>
+    <postgres1>
+        <host></host>
+        <port></port>
+        <username></username>
+        <password></password>
+        <table></table>
+    </postgres1>
+    <postgres2>
+        <host></host>
+        <port></port>
+        <username></username>
+        <password></password>
+    </postgres2>
+</named_collections>
+```
+
+Some parameters can be overriden by key value arguments:
+``` sql
+SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
+```
+
 ## Implementation Details {#implementation-details}

 `SELECT` queries on PostgreSQL side run as `COPY (SELECT ...) TO STDOUT` inside read-only PostgreSQL transaction with commit after each `SELECT` query.
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@ -37,6 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    [rabbitmq_skip_broken_messages = N,]
    [rabbitmq_max_block_size = N,]
    [rabbitmq_flush_interval_ms = N]
+    [rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish']
 ```

 Required parameters:
@ -59,6 +60,7 @@ Optional parameters:
 -   `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
+-   `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.

 SSL connection:

--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@ -66,9 +66,9 @@ WHERE table = 'visits'
 └───────────┴────────────────┴────────┘
 ```

-The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](#alter_manipulations-with-partitions) queries.
+The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries.

-The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](#alter_attach-partition) query.
+The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.

 Let’s break down the name of the first part: `201901_1_3_1`:

--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@ -8,24 +8,43 @@ toc_title: Distributed
 Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.
 Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.

-The Distributed engine accepts parameters:
+## Creating a Table {#distributed-creating-a-table}

-   the cluster name in the server’s config file
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
+    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
+    ...
+) ENGINE = Distributed(cluster, database, table[, sharding_key[, policy_name]])
+[SETTINGS name=value, ...]
+```

-   the name of a remote database
+### From a Table {#distributed-from-a-table}
+When the `Distributed` table is pointing to a table on the current server you can adopt that table's schema:

-   the name of a remote table
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 ENGINE = Distributed(cluster, database, table[, sharding_key[, policy_name]]) [SETTINGS name=value, ...]
+```

-   (optionally) sharding key
+**Distributed Parameters**

-   (optionally) policy name, it will be used to store temporary files for async send
+-   `cluster` - the cluster name in the server’s config file

-    See also:
+-   `database` - the name of a remote database

-    -   [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
-    -   [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
+-   `table` - the name of a remote table

-Also, it accepts the following settings:
+-   `sharding_key` - (optionally) sharding key
+
+-   `policy_name` - (optionally) policy name, it will be used to store temporary files for async send
+
+**See Also**
+
+ - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
+ - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
+
+**Distributed Settings**

 - `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk.

@ -59,24 +78,25 @@ Also, it accepts the following settings:
    - [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting
    - `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert`

-Example:
+**Example**

 ``` sql
-Distributed(logs, default, hits[, sharding_key[, policy_name]])
+CREATE TABLE hits_all AS hits
+ENGINE = Distributed(logs, default, hits[, sharding_key[, policy_name]])
 SETTINGS
    fsync_after_insert=0,
    fsync_directories=0;
 ```

-Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
+Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster.
 Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
-For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
+For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.

-Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
+Instead of the database name, you can use a constant expression that returns a string. For example: `currentDatabase()`.

-logs – The cluster name in the server’s config file.
+## Clusters {#distributed-clusters}

-Clusters are set like this:
+Clusters are configured in the [server configuration file](../../../operations/configuration-files.md):

 ``` xml
 <remote_servers>
@ -132,12 +152,13 @@ Replicas are duplicating servers (in order to read all the data, you can access
 Cluster names must not contain dots.

 The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
+
 - `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server does not start. If you change the DNS record, restart the server.
- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http_port.
- `user` – Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
+- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Not to be confused with `http_port`.
+- `user` – Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../operations/access-rights.md).
 - `password` – The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
- `compression` - Use data compression. Default value: true.
+- `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and be configured with correct certificates.
+- `compression` - Use data compression. Default value: `true`.

 When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
 If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times.
@ -149,40 +170,42 @@ You can specify as many clusters as you wish in the configuration.

 To view your clusters, use the `system.clusters` table.

-The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster’s servers).
+The `Distributed` engine allows working with a cluster like a local server. However, the cluster's configuration cannot be specified dynamically, it has to be configured in the server config file. Usually, all servers in a cluster will have the same cluster config (though this is not required). Clusters from the config file are updated on the fly, without restarting the server.

-The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you do not need to create a Distributed table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
+If you need to send a query to an unknown set of shards and replicas each time, you do not need to create a `Distributed` table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
+
+## Writing data {#distributed-writing-data}

 There are two methods for writing data to a cluster:

-First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
+First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform direct `INSERT` statements on the remote tables in the cluster that the `Distributed` table is pointing to. This is the most flexible solution as you can use any sharding scheme, even one that is non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.

-Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it does not mean anything in this case.
+Second, you can perform `INSERT` statements on a `Distributed` table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a `Distributed` table, it must have the `sharding_key` parameter configured (except if there is only one shard).

-Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
+Each shard can have a `<weight>` defined in the config file. By default, the weight is `1`. Data is distributed across shards in the amount proportional to the shard weight. All shard weights are summed up, then each shard's weight is divided by the total to determine each shard's proportion. For example, if there are two shards and the first has a weight of 1 while the second has a weight of 2, the first will be sent one third (1 / 3) of inserted rows and the second will be sent two thirds (2 / 3).

-Each shard can have the `internal_replication` parameter defined in the config file.
+Each shard can have the `internal_replication` parameter defined in the config file. If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this if the tables underlying the `Distributed` table are replicated tables (e.g. any of the `Replicated*MergeTree` table engines). One of the table replicas will receive the write and it will be replicated to the other replicas automatically.

-If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
-
-If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
+If `internal_replication` is set to `false` (the default), data is written to all replicas. In this case, the `Distributed` table replicates data itself. This is worse than using replicated tables because the consistency of replicas is not checked and, over time, they will contain slightly different data.

 To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weights` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).

-The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
+The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running `IN` and `JOIN` by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function e.g. `intHash64(UserID)`.

-A simple remainder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
-
-SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
+A simple remainder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area rather than using entries in `Distributed` tables.

 You should be concerned about the sharding scheme in the following cases:

-   Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
-   A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
+-   Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient.
+-   A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries.

 Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.

-If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
+If the server ceased to exist or had a rough restart (for example, due to a hardware failure) after an `INSERT` to a `Distributed` table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
+
+## Reading data {#distributed-reading-data}
+
+When querying a `Distributed` table, `SELECT` queries are sent to all shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently.

 When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).

--- a/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md
+++ b/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md
@ -0,0 +1,15 @@
+---
+title: How do I contribute code to ClickHouse?
+toc_hidden: true
+toc_priority: 120
+---
+
+# How do I contribute code to ClickHouse? {#how-do-i-contribute-code-to-clickhouse}
+
+ClickHouse is an open-source project [developed on GitHub](https://github.com/ClickHouse/ClickHouse).
+
+As customary, contribution instructions are published in [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md) file in the root of the source code repository.
+
+If you want to suggest a substantial change to ClickHouse, consider [opening a GitHub issue](https://github.com/ClickHouse/ClickHouse/issues/new/choose) explaining what you want to do, to discuss it with maintainers and community first. [Examples of such RFC issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc).
+
+If your contributions are security related, please check out [our security policy](https://github.com/ClickHouse/ClickHouse/security/policy/) too.
--- a/docs/en/faq/general/index.md
+++ b/docs/en/faq/general/index.md
@ -17,6 +17,7 @@ Questions:
 -   [What is OLAP?](../../faq/general/olap.md)
 -   [What is a columnar database?](../../faq/general/columnar-database.md)
 -   [Why not use something like MapReduce?](../../faq/general/mapreduce.md)
+-   [How do I contribute code to ClickHouse?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)

 !!! info "Don’t see what you were looking for?"
    Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
--- a/docs/en/faq/operations/index.md
+++ b/docs/en/faq/operations/index.md
@ -11,6 +11,7 @@ Questions:

 -   [Which ClickHouse version to use in production?](../../faq/operations/production.md)
 -   [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md)
+-   [Does ClickHouse support multi-region replication?](../../faq/operations/multi-region-replication.md)

 !!! info "Don’t see what you were looking for?"
    Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
--- a/docs/en/faq/operations/multi-region-replication.md
+++ b/docs/en/faq/operations/multi-region-replication.md
@ -0,0 +1,13 @@
+---
+title: Does ClickHouse support multi-region replication?
+toc_hidden: true
+toc_priority: 30
+---
+
+# Does ClickHouse support multi-region replication? {#does-clickhouse-support-multi-region-replication}
+
+The short answer is "yes". However, we recommend keeping latency between all regions/datacenters in two-digit range, otherwise write performance will suffer as it goes through distributed consensus protocol. For example, replication between US coasts will likely work fine, but between the US and Europe won't.
+
+Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas.
+
+For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md).
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@ -142,6 +142,12 @@ On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sourc

 To start the server as a daemon, run:

+``` bash
+$ sudo clickhouse start
+```
+
+There are also another ways to run ClickHouse:
+
 ``` bash
 $ sudo service clickhouse-server start
 ```
@ -152,6 +158,12 @@ If you do not have `service` command, run as
 $ sudo /etc/init.d/clickhouse-server start
 ```

+If you have `systemctl` command, run as
+
+``` bash
+$ sudo systemctl start clickhouse-server.service
+```
+
 See the logs in the `/var/log/clickhouse-server/` directory.

 If the server does not start, check the configurations in the file `/etc/clickhouse-server/config.xml`.
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -204,7 +204,7 @@ When parsing with this format, tabs or linefeeds are not allowed in each field.

 This format is also available under the name `TSVRawWithNames`.

-## TabSeparatedWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
+## TabSeparatedRawWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}

 Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
--- a/docs/en/interfaces/grpc.md
+++ b/docs/en/interfaces/grpc.md
@ -0,0 +1,99 @@
+---
+toc_priority: 19
+toc_title: gRPC Interface
+---
+
+# gRPC Interface {#grpc-interface}
+
+## Introduction {#grpc-interface-introduction}
+
+ClickHouse supports [gRPC](https://grpc.io/) interface. It is an open source remote procedure call system that uses HTTP/2 and [Protocol Buffers](https://en.wikipedia.org/wiki/Protocol_Buffers). The implementation of gRPC in ClickHouse supports:
+
+-   SSL; 
+-   authentication; 
+-   sessions; 
+-   compression; 
+-   parallel queries through the same channel; 
+-   cancellation of queries; 
+-   getting progress and logs; 
+-   external tables.
+
+The specification of the interface is described in [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto).
+
+## gRPC Configuration {#grpc-interface-configuration}
+
+To use the gRPC interface set `grpc_port` in the main [server configuration](../operations/configuration-files.md). Other configuration options see in the following example:
+
+```xml
+<grpc_port>9100</grpc_port>
+    <grpc>
+        <enable_ssl>false</enable_ssl>
+
+        <!-- The following two files are used only if SSL is enabled -->
+        <ssl_cert_file>/path/to/ssl_cert_file</ssl_cert_file>
+        <ssl_key_file>/path/to/ssl_key_file</ssl_key_file>
+
+        <!-- Whether server requests client for a certificate -->
+        <ssl_require_client_auth>false</ssl_require_client_auth>
+
+        <!-- The following file is used only if ssl_require_client_auth=true -->
+        <ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file>
+
+        <!-- Default compression algorithm (applied if client doesn't specify another algorithm, see result_compression in QueryInfo).
+             Supported algorithms: none, deflate, gzip, stream_gzip -->
+        <compression>deflate</compression>
+
+        <!-- Default compression level (applied if client doesn't specify another level, see result_compression in QueryInfo).
+             Supported levels: none, low, medium, high -->
+        <compression_level>medium</compression_level>
+
+        <!-- Send/receive message size limits in bytes. -1 means unlimited -->
+        <max_send_message_size>-1</max_send_message_size>
+        <max_receive_message_size>-1</max_receive_message_size>
+
+        <!-- Enable if you want to get detailed logs -->
+        <verbose_logs>false</verbose_logs>
+    </grpc>
+```
+
+## Built-in Client {#grpc-client}
+
+You can write a client in any of the programming languages supported by gRPC using the provided [specification](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto).
+Or you can use a built-in Python client. It is placed in [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) in the repository. The built-in client requires [grpcio and grpcio-tools](https://grpc.io/docs/languages/python/quickstart) Python modules. 
+
+The client supports the following arguments:
+
+-   `--help` – Shows a help message and exits.
+-   `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also.
+-   `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). Default value: `9100`.
+-   `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`.
+-   `--password PASSWORD` – A password. Default value: empty string.
+-   `--query QUERY, -q QUERY` – A query to process when using non-interactive mode.
+-   `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default).
+-   `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). Default value for interactive mode: `PrettyCompact`.
+-   `--debug` – Enables showing debug information.
+
+To run the client in an interactive mode call it without `--query` argument.
+
+In a batch mode query data can be passed via `stdin`.
+
+**Client Usage Example**
+
+In the following example a table is created and loaded with data from a CSV file. Then the content of the table is queried.
+
+``` bash
+./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;"
+echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt
+cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV"
+
+./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;"
+```
+
+Result:
+
+``` text
+┌─id─┬─text──────────────────┐
+│  0 │ Input data for        │
+│  1 │ gRPC protocol example │
+└────┴───────────────────────┘
+```
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@ -424,7 +424,10 @@ Next are the configuration methods for different `type`.

 `query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.

-The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
+The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
+
+!!! note "Warning"
+    To keep the default `handlers` such as` query`, `play`,` ping`, use the `<defaults/>` rule.

 Example:

@ -443,13 +446,14 @@ Example:
            <query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
        </handler>
    </rule>
+    <defaults/>
 </http_handlers>
 ```

 ``` bash
-$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
+$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
 1
-max_alter_threads   2
+max_final_threads   2
 ```

 !!! note "caution"
@ -461,7 +465,7 @@ In `dynamic_query_handler`, the query is written in the form of param of the HTT

 ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.

-To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
+To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully.

 Example:

@ -475,13 +479,14 @@ Example:
        <query_param_name>query_param</query_param_name>
    </handler>
    </rule>
+    <defaults/>
 </http_handlers>
 ```

 ``` bash
-$ curl  -H 'XXX:TEST_HEADER_VALUE_DYNAMIC'  'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
+$ curl  -H 'XXX:TEST_HEADER_VALUE_DYNAMIC'  'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
 max_threads 1
-max_alter_threads   2
+max_final_threads   2
 ```

 ### static {#static}
@ -505,6 +510,7 @@ Return a message.
                <response_content>Say Hi!</response_content>
            </handler>
        </rule>
+        <defaults/>
 </http_handlers>
 ```

--- a/docs/en/interfaces/index.md
+++ b/docs/en/interfaces/index.md
@ -6,10 +6,11 @@ toc_title: Introduction

 # Interfaces {#interfaces}

-ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security):
+ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security):

 -   [HTTP](http.md), which is documented and easy to use directly.
 -   [Native TCP](../interfaces/tcp.md), which has less overhead.
+-   [gRPC](grpc.md).

 In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following:

@ -24,4 +25,3 @@ There are also a wide range of third-party libraries for working with ClickHouse
 -   [Integrations](../interfaces/third-party/integrations.md)
 -   [Visual interfaces](../interfaces/third-party/gui.md)

-[Original article](https://clickhouse.com/docs/en/interfaces/) <!--hide-->
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit ac4b763d4ca40122275f1497cbdc5451337461d9`