Merge branch 'master' into clickhouse-test-unknown

2024-09-21 01:00:48 +00:00 · 2021-12-21 10:55:11 +00:00 · 2021-12-21 10:55:11 +00:00 · 1daf469799
commit 1daf469799
parent b3f1951c15 c5dfbfc6f5
659 changed files with 16348 additions and 4392 deletions
--- a/.github/workflows/anchore-analysis.yml
+++ b/.github/workflows/anchore-analysis.yml
@ -8,6 +8,10 @@
 name: Docker Container Scan (clickhouse-server)
 env:
  # Force the stdout and stderr streams to be unbuffered
  PYTHONUNBUFFERED: 1
 "on":
  pull_request:
    paths:
--- a/.github/workflows/backport.yml
+++ b/.github/workflows/backport.yml
@ -1,4 +1,9 @@
 name: CherryPick
 env:
  # Force the stdout and stderr streams to be unbuffered
  PYTHONUNBUFFERED: 1
 concurrency:
  group: cherry-pick
 on: # yamllint disable-line rule:truthy
@ -8,18 +13,24 @@ jobs:
  CherryPick:
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Set envs
        # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/cherry_pick
          ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
          ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
          RCSK
          REPO_OWNER=ClickHouse
          REPO_NAME=ClickHouse
          REPO_TEAM=core
          EOF
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
          token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
          fetch-depth: 0
      - name: Cherry pick
        env:
          TEMP_PATH: ${{runner.temp}}/cherry_pick
          ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
          REPO_OWNER: "ClickHouse"
          REPO_NAME: "ClickHouse"
          REPO_TEAM: "core"
        run: |
          sudo pip install GitPython
          cd $GITHUB_WORKSPACE/tests/ci
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -1,4 +1,9 @@
 name: BackportPR
 env:
  # Force the stdout and stderr streams to be unbuffered
  PYTHONUNBUFFERED: 1
 on: # yamllint disable-line rule:truthy
  push:
    branches:
@ -7,6 +12,9 @@ jobs:
  DockerHubPush:
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
@ -22,17 +30,23 @@ jobs:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/compatibility_check
          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
          REPORTS_PATH=${{runner.temp}}/reports_dir
          EOF
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
      - name: CompatibilityCheck
        env:
          TEMP_PATH: ${{runner.temp}}/compatibility_check
          REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse
          REPORTS_PATH: ${{runner.temp}}/reports_dir
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -51,24 +65,30 @@ jobs:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/build_check
          IMAGES_PATH=${{runner.temp}}/images_path
          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH=${{runner.temp}}/../ccaches
          CHECK_NAME=ClickHouse build check (actions)
          BUILD_NAME=package_release
          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        env:
          TEMP_PATH: ${{runner.temp}}/build_check
          IMAGES_PATH: ${{runner.temp}}/images_path
          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH: ${{runner.temp}}/../ccaches
          CHECK_NAME: 'ClickHouse build check (actions)'
          BUILD_NAME: 'package_release'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -78,35 +98,41 @@ jobs:
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
  BuilderDebAsan:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/build_check
          IMAGES_PATH=${{runner.temp}}/images_path
          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH=${{runner.temp}}/../ccaches
          CHECK_NAME=ClickHouse build check (actions)
          BUILD_NAME=package_asan
          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        env:
          TEMP_PATH: ${{runner.temp}}/build_check
          IMAGES_PATH: ${{runner.temp}}/images_path
          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH: ${{runner.temp}}/../ccaches
          CHECK_NAME: 'ClickHouse build check (actions)'
          BUILD_NAME: 'package_asan'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -116,35 +142,41 @@ jobs:
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
  BuilderDebTsan:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/build_check
          IMAGES_PATH=${{runner.temp}}/images_path
          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH=${{runner.temp}}/../ccaches
          CHECK_NAME=ClickHouse build check (actions)
          BUILD_NAME=package_tsan
          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        env:
          TEMP_PATH: ${{runner.temp}}/build_check
          IMAGES_PATH: ${{runner.temp}}/images_path
          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH: ${{runner.temp}}/../ccaches
          CHECK_NAME: 'ClickHouse build check (actions)'
          BUILD_NAME: 'package_tsan'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -154,35 +186,41 @@ jobs:
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
  BuilderDebDebug:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/build_check
          IMAGES_PATH=${{runner.temp}}/images_path
          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH=${{runner.temp}}/../ccaches
          CHECK_NAME=ClickHouse build check (actions)
          BUILD_NAME=package_debug
          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/images_path
+          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
-          submodules: 'recursive'
+          submodules: 'true'
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        env:
          TEMP_PATH: ${{runner.temp}}/build_check
          IMAGES_PATH: ${{runner.temp}}/images_path
          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH: ${{runner.temp}}/../ccaches
          CHECK_NAME: 'ClickHouse build check (actions)'
          BUILD_NAME: 'package_debug'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -192,13 +230,13 @@ jobs:
        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.BUILD_NAME }}
-          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
      - name: Cleanup
        if: always()
        run: |
          docker kill $(docker ps -q) ||:
          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          sudo rm -fr $TEMP_PATH $CACHES_PATH
 ############################################################################################
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
@ -210,17 +248,23 @@ jobs:
      - BuilderDebDebug
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/report_check
          REPORTS_PATH=${{runner.temp}}/reports_dir
          CHECK_NAME=ClickHouse build check (actions)
          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Report Builder
        env:
          TEMP_PATH: ${{runner.temp}}/report_check
          REPORTS_PATH: ${{runner.temp}}/reports_dir
          CHECK_NAME: 'ClickHouse build check (actions)'
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -239,19 +283,25 @@ jobs:
    needs: [BuilderDebAsan]
    runs-on: [self-hosted, func-tester]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
          CHECK_NAME=Stateless tests (address, actions)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Functional test
        env:
          TEMP_PATH: ${{runner.temp}}/stateless_debug
          REPORTS_PATH: ${{runner.temp}}/reports_dir
          CHECK_NAME: 'Stateless tests (address, actions)'
          REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT: 10800
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -271,19 +321,25 @@ jobs:
    needs: [BuilderDebDebug]
    runs-on: [self-hosted, func-tester]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
          CHECK_NAME=Stateful tests (debug, actions)
          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
          KILL_TIMEOUT=3600
          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Functional test
        env:
          TEMP_PATH: ${{runner.temp}}/stateful_debug
          REPORTS_PATH: ${{runner.temp}}/reports_dir
          CHECK_NAME: 'Stateful tests (debug, actions)'
          REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
          KILL_TIMEOUT: 3600
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -301,20 +357,30 @@ jobs:
 ##############################################################################################
  StressTestTsan:
    needs: [BuilderDebTsan]
-    runs-on: [self-hosted, stress-tester]
+    # func testers have 16 cores + 128 GB memory
    # while stress testers have 36 cores + 72 memory
    # It would be better to have something like 32 + 128,
    # but such servers almost unavailable as spot instances.
    runs-on: [self-hosted, func-tester]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
          CHECK_NAME=Stress test (thread, actions)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Stress test
        env:
          TEMP_PATH: ${{runner.temp}}/stress_thread
          REPORTS_PATH: ${{runner.temp}}/reports_dir
          CHECK_NAME: 'Stress test (thread, actions)'
          REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -334,18 +400,24 @@ jobs:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, stress-tester]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_release
          REPORTS_PATH=${{runner.temp}}/reports_dir
          CHECK_NAME=Integration tests (release, actions)
          REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
          EOF
      - name: Download json reports
        uses: actions/download-artifact@v2
        with:
-          path: ${{runner.temp}}/reports_dir
+          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Integration test
        env:
          TEMP_PATH: ${{runner.temp}}/integration_tests_release
          REPORTS_PATH: ${{runner.temp}}/reports_dir
          CHECK_NAME: 'Integration tests (release, actions)'
          REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
@ -369,6 +441,9 @@ jobs:
      - CompatibilityCheck
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Finish label
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@ -1,4 +1,9 @@
 name: Cancel
 env:
  # Force the stdout and stderr streams to be unbuffered
  PYTHONUNBUFFERED: 1
 on: # yamllint disable-line rule:truthy
  workflow_run:
    workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"]
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@ -1,4 +1,9 @@
 name: DocsCheck
 env:
  # Force the stdout and stderr streams to be unbuffered
  PYTHONUNBUFFERED: 1
 on: # yamllint disable-line rule:truthy
  pull_request:
    types:
@ -14,6 +19,9 @@ jobs:
  CheckLabels:
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Clear repository
        run: |
          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Labels check
@ -24,6 +32,9 @@ jobs:
    needs: CheckLabels
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Clear repository
        run: |
          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
@ -39,17 +50,23 @@ jobs:
    needs: DockerHubPush
    runs-on: [self-hosted, func-tester]
    steps:
      - name: Set envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/docs_check
          REPO_COPY=${{runner.temp}}/docs_check/ClickHouse
          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/docs_check
+          path: ${{ env.TEMP_PATH }}
      - name: Clear repository
        run: |
          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Docs Check
        env:
          TEMP_PATH: ${{runner.temp}}/docs_check
          REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
        run: |
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,4 +1,9 @@
 name: DocsReleaseChecks
 env:
  # Force the stdout and stderr streams to be unbuffered
  PYTHONUNBUFFERED: 1
 concurrency:
  group: master-release
  cancel-in-progress: true
@ -11,10 +16,15 @@ on: # yamllint disable-line rule:truthy
      - 'website/**'
      - 'benchmark/**'
      - 'docker/**'
      - '.github/**'
  workflow_dispatch:
 jobs:
  DockerHubPush:
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
@ -30,20 +40,31 @@ jobs:
    needs: DockerHubPush
    runs-on: [self-hosted, func-tester]
    steps:
      - name: Set envs
        # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/docs_release
          REPO_COPY=${{runner.temp}}/docs_release/ClickHouse
          CLOUDFLARE_TOKEN=${{secrets.CLOUDFLARE}}
          ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
          ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
          RCSK
          EOF
      - name: Clear repository
        run: |
          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Download changed images
        uses: actions/download-artifact@v2
        with:
          name: changed_images
-          path: ${{runner.temp}}/docs_release
+          path: ${{ env.TEMP_PATH }}
      - name: Docs Release
        env:
          TEMP_PATH: ${{runner.temp}}/docs_release
          REPO_COPY: ${{runner.temp}}/docs_release/ClickHouse
          CLOUDFLARE_TOKEN: ${{secrets.CLOUDFLARE}}
          ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
        run: |
          sudo rm -fr $TEMP_PATH
          mkdir -p $TEMP_PATH
          cp -r $GITHUB_WORKSPACE $TEMP_PATH
          cd $REPO_COPY/tests/ci
          python3 docs_release.py
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
--- a/.gitmodules
+++ b/.gitmodules
@ -247,3 +247,6 @@
 [submodule "contrib/sysroot"]
 	path = contrib/sysroot
 	url = https://github.com/ClickHouse-Extras/sysroot.git
 [submodule "contrib/azure"]
 	path = contrib/azure
 	url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,4 +1,4 @@
-### ClickHouse release v21.12, 2021-12-13
+### ClickHouse release v21.12, 2021-12-15
 #### Backward Incompatible Change
@ -71,8 +71,8 @@
 * Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Recreate `system.*_log` tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)).
 * `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)).
-* ClickHouse dictionary source: support named collections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* ClickHouse dictionary source: support predefined connections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Allow to use named collections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Allow to use predefined connections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)) (author: Amos Bird).
 * Add key bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)).
 * Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)).
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -447,7 +447,7 @@ if (MAKE_STATIC_LIBRARIES)
        # It's disabled for ARM because otherwise ClickHouse cannot run on Android.
        set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
        set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie")
+        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie")
    endif ()
 else ()
    set (CMAKE_POSITION_INDEPENDENT_CODE ON)
@ -508,6 +508,7 @@ include (cmake/find/hdfs3.cmake) # uses protobuf
 include (cmake/find/poco.cmake)
 include (cmake/find/curl.cmake)
 include (cmake/find/s3.cmake)
 include (cmake/find/blob_storage.cmake)
 include (cmake/find/base64.cmake)
 include (cmake/find/parquet.cmake)
 include (cmake/find/simdjson.cmake)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -2,7 +2,13 @@
 ClickHouse is an open project, and you can contribute to it in many ways. You can help with ideas, code, or documentation. We appreciate any efforts that help us to make the project better.
-Thank you.
+Thank you!
 ## Legal Info
 When you open your first pull-request to ClickHouse repo, a bot will invite you to accept ClickHouse Individual CLA (Contributor License Agreement). It is a simple few click process. For subsequent pull-requests the bot will check if you have already signed it and won't bother you again.
 Optionally, to make contributions even more tight legally, your employer as a legal entity may want to sign a ClickHouse Corporate CLA with ClickHouse, Inc. If you're interested to do so, contact us at [legal@clickhouse.com](mailto:legal@clickhouse.com).
 ## Technical Info
--- a/base/mysqlxx/PoolWithFailover.cpp
+++ b/base/mysqlxx/PoolWithFailover.cpp
@ -82,7 +82,9 @@ PoolWithFailover::PoolWithFailover(
        unsigned default_connections_,
        unsigned max_connections_,
        size_t max_tries_,
-        uint64_t wait_timeout_)
+        uint64_t wait_timeout_,
        size_t connect_timeout_,
        size_t rw_timeout_)
    : max_tries(max_tries_)
    , shareable(false)
    , wait_timeout(wait_timeout_)
@ -93,8 +95,8 @@ PoolWithFailover::PoolWithFailover(
        replicas_by_priority[0].emplace_back(std::make_shared<Pool>(database,
            host, user, password, port,
            /* socket_ = */ "",
-            MYSQLXX_DEFAULT_TIMEOUT,
+            connect_timeout_,
-            MYSQLXX_DEFAULT_RW_TIMEOUT,
+            rw_timeout_,
            default_connections_,
            max_connections_));
    }
--- a/base/mysqlxx/PoolWithFailover.h
+++ b/base/mysqlxx/PoolWithFailover.h
@ -6,6 +6,7 @@
 #define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1
 #define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16
 #define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3
 #define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_CONNECTION_WAIT_TIMEOUT 5 /// in seconds
 namespace mysqlxx
@ -121,7 +122,9 @@ namespace mysqlxx
            unsigned default_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS,
            unsigned max_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS,
            size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES,
-            uint64_t wait_timeout_ = UINT64_MAX);
+            uint64_t wait_timeout_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_CONNECTION_WAIT_TIMEOUT,
            size_t connect_timeout = MYSQLXX_DEFAULT_TIMEOUT,
            size_t rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT);
        PoolWithFailover(const PoolWithFailover & other);
--- a/cmake/find/blob_storage.cmake
+++ b/cmake/find/blob_storage.cmake
@ -0,0 +1,30 @@
 option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
 option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
    "Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
    ON)
 if (ENABLE_AZURE_BLOB_STORAGE)
    set(USE_AZURE_BLOB_STORAGE 1)
    set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
 endif()
 if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
        OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
        AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
    message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
    set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
    set(USE_AZURE_BLOB_STORAGE 0)
 endif ()
 if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
    message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
 endif()
 if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
    message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
 endif()
 if (USE_AZURE_BLOB_STORAGE)
    message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")
 endif()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -249,6 +249,10 @@ endif()
 # - sentry-native
 add_subdirectory (curl-cmake)
 if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
    add_subdirectory(azure-cmake)
 endif()
 if (USE_SENTRY)
    add_subdirectory (sentry-native-cmake)
 endif()
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit d10351f312c1ae1ca3fdda433693dfbef3acfece
+Subproject commit bb69d48e0ee35c87a0f19e509a09a914f71f0cff
--- a/contrib/azure
+++ b/contrib/azure
@ -0,0 +1 @@
 Subproject commit ac4b763d4ca40122275f1497cbdc5451337461d9
--- a/contrib/azure-cmake/CMakeLists.txt
+++ b/contrib/azure-cmake/CMakeLists.txt
@ -0,0 +1,74 @@
 set(AZURE_DIR "${ClickHouse_SOURCE_DIR}/contrib/azure")
 set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk")
 file(GLOB AZURE_SDK_CORE_SRC
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.hpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/winhttp/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/io/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/private/*.hpp"
 )
 file(GLOB AZURE_SDK_IDENTITY_SRC
    "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/private/*.hpp"
 )
 file(GLOB AZURE_SDK_STORAGE_COMMON_SRC
    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/private/*.cpp"
 )
 file(GLOB AZURE_SDK_STORAGE_BLOBS_SRC
    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.hpp"
 )
 file(GLOB AZURE_SDK_UNIFIED_SRC
    ${AZURE_SDK_CORE_SRC}
    ${AZURE_SDK_IDENTITY_SRC}
    ${AZURE_SDK_STORAGE_COMMON_SRC}
    ${AZURE_SDK_STORAGE_BLOBS_SRC}
 )
 set(AZURE_SDK_INCLUDES
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/inc/"
    "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/inc/"
    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/inc/"
    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/inc/"
 )
 include("${AZURE_DIR}/cmake-modules/AzureTransportAdapters.cmake")
 add_library(azure_sdk ${AZURE_SDK_UNIFIED_SRC})
 if (COMPILER_CLANG)
    target_compile_options(azure_sdk PRIVATE
        -Wno-deprecated-copy-dtor
        -Wno-extra-semi
        -Wno-suggest-destructor-override
        -Wno-inconsistent-missing-destructor-override
        -Wno-error=unknown-warning-option
    )
    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
        target_compile_options(azure_sdk PRIVATE -Wno-reserved-identifier)
    endif()
 endif()
 # Originally, on Windows azure-core is built with bcrypt and crypt32 by default
 if (OPENSSL_FOUND)
    target_link_libraries(azure_sdk PRIVATE ${OPENSSL_LIBRARIES})
 endif()
 # Originally, on Windows azure-core is built with winhttp by default
 if (CURL_FOUND)
    target_link_libraries(azure_sdk PRIVATE ${CURL_LIBRARY})
 endif()
 target_link_libraries(azure_sdk PRIVATE ${LIBXML2_LIBRARIES})
 target_include_directories(azure_sdk SYSTEM PUBLIC ${AZURE_SDK_INCLUDES})
--- a/contrib/boringssl-cmake/CMakeLists.txt
+++ b/contrib/boringssl-cmake/CMakeLists.txt
@ -639,6 +639,7 @@ add_library(
  "${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c"
  "${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c"
  "${BORINGSSL_SOURCE_DIR}/decrepit/bio/base64_bio.c"
 )
 add_executable(
--- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
+++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
@ -268,7 +268,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 *
 * Whether iconv support is available
 */
-#if 1
+#if 0
 #define LIBXML_ICONV_ENABLED
 #endif
--- a/debian/rules
+++ b/debian/rules
@ -45,6 +45,10 @@ ifdef DEB_CXX
 ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
        CC := $(DEB_CC)
        CXX := $(DEB_CXX)
 else ifeq (clang,$(findstring clang,$(DEB_CXX)))
 # If we crosscompile with clang, it knows what to do
        CC := $(DEB_CC)
        CXX := $(DEB_CXX)
 else
        CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
        CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
@ -77,10 +81,6 @@ else
    THREADS_COUNT = 1
 endif
 ifneq ($(THREADS_COUNT),)
    THREADS_COUNT:=-j$(THREADS_COUNT)
 endif
 %:
 	dh $@ $(DH_FLAGS) --buildsystem=cmake
@ -89,11 +89,11 @@ override_dh_auto_configure:
 override_dh_auto_build:
 	# Fix for ninja. Do not add -O.
-	$(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
+	$(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
 override_dh_auto_test:
 ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V
+	cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V
 endif
 override_dh_clean:
@ -120,7 +120,7 @@ override_dh_install:
 	dh_install --list-missing --sourcedir=$(DESTDIR)
 override_dh_auto_install:
-	env DESTDIR=$(DESTDIR) $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) install
+	env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install
 override_dh_shlibdeps:
 	true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency.
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -24,40 +24,34 @@ RUN apt-get update \
    && apt-key add /tmp/llvm-snapshot.gpg.key \
    && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
-        /etc/apt/sources.list
+        /etc/apt/sources.list \
    && apt-get clean
 # initial packages
 RUN apt-get update \
    && apt-get install \
        bash \
        fakeroot \
        ccache \
        curl \
        software-properties-common \
        --yes --no-install-recommends
 RUN apt-get update \
    && apt-get install \
        bash \
        build-essential \
        ccache \
        clang-${LLVM_VERSION} \
        clang-tidy-${LLVM_VERSION} \
        cmake \
        curl \
        fakeroot \
        gdb \
        git \
        gperf \
        clang-${LLVM_VERSION} \
        clang-tidy-${LLVM_VERSION} \
        lld-${LLVM_VERSION} \
        llvm-${LLVM_VERSION} \
        llvm-${LLVM_VERSION}-dev \
        libicu-dev \
        moreutils \
        ninja-build \
        pigz \
        rename \
        software-properties-common \
        tzdata \
-        --yes --no-install-recommends
+        --yes --no-install-recommends \
    && apt-get clean
 # This symlink required by gcc to find lld compiler
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
@ -66,7 +60,7 @@ ENV CC=clang-${LLVM_VERSION}
 ENV CXX=clang++-${LLVM_VERSION}
 # libtapi is required to support .tbh format from recent MacOS SDKs
-RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
+RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \
    && cd apple-libtapi \
    && INSTALLPREFIX=/cctools ./build.sh \
    && ./install.sh \
@ -74,7 +68,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
    && rm -rf apple-libtapi
 # Build and install tools for cross-linking to Darwin (x86-64)
-RUN git clone https://github.com/tpoechtrager/cctools-port.git \
+RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
    && cd cctools-port/cctools \
    && ./configure --prefix=/cctools --with-libtapi=/cctools \
        --target=x86_64-apple-darwin \
@ -83,7 +77,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
    && rm -rf cctools-port
 # Build and install tools for cross-linking to Darwin (aarch64)
-RUN git clone https://github.com/tpoechtrager/cctools-port.git \
+RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \
    && cd cctools-port/cctools \
    && ./configure --prefix=/cctools --with-libtapi=/cctools \
        --target=aarch64-apple-darwin \
@ -97,7 +91,8 @@ RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacO
 # NOTE: Seems like gcc-11 is too new for ubuntu20 repository
 RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
    && apt-get update \
-    && apt-get install gcc-11 g++-11 --yes
+    && apt-get install gcc-11 g++-11 --yes \
    && apt-get clean
 COPY build.sh /
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -64,8 +64,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
    && apt-get install gcc-11 g++-11 --yes
-# This symlink required by gcc to find lld compiler
+# These symlinks are required:
-RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
+# /usr/bin/ld.lld: by gcc to find lld compiler
 # /usr/bin/aarch64-linux-gnu-obj*: for debug symbols stripping
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld \
    && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-strip \
    && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-objcopy \
    && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objdump /usr/bin/aarch64-linux-gnu-objdump
 COPY build.sh /
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -29,7 +29,13 @@ def pull_image(image_name):
        return False
 def build_image(image_name, filepath):
-    subprocess.check_call("docker build --network=host -t {} -f {} .".format(image_name, filepath), shell=True)
+    context = os.path.dirname(filepath)
    subprocess.check_call(
        "docker build --network=host -t {} -f {} {}".format(
            image_name, filepath, context
        ),
        shell=True,
    )
 def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version):
    env_part = " -e ".join(env_variables)
@ -90,6 +96,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
    elif is_cross_arm:
        cc = compiler[:-len(ARM_SUFFIX)]
        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
        result.append("DEB_ARCH_FLAG=-aarm64")
    elif is_cross_freebsd:
        cc = compiler[:-len(FREEBSD_SUFFIX)]
        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake")
@ -98,6 +105,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
        cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake")
    else:
        cc = compiler
        result.append("DEB_ARCH_FLAG=-aamd64")
    cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -111,19 +111,6 @@ function start_server
    fi
    echo "ClickHouse server pid '$server_pid' started and responded"
    echo "
 set follow-fork-mode child
 handle all noprint
 handle SIGSEGV stop print
 handle SIGBUS stop print
 handle SIGABRT stop print
 continue
 thread apply all backtrace
 continue
 " > script.gdb
    gdb -batch -command script.gdb -p "$server_pid" &
 }
 function clone_root
@ -186,6 +173,8 @@ function clone_submodules
            contrib/dragonbox
            contrib/fast_float
            contrib/NuRaft
            contrib/jemalloc
            contrib/replxx
        )
        git submodule sync
@ -206,6 +195,8 @@ function run_cmake
        "-DENABLE_THINLTO=0"
        "-DUSE_UNWIND=1"
        "-DENABLE_NURAFT=1"
        "-DENABLE_JEMALLOC=1"
        "-DENABLE_REPLXX=1"
    )
    # TODO remove this? we don't use ccache anyway. An option would be to download it
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -155,17 +155,43 @@ function fuzz
    kill -0 $server_pid
    # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
    # and clickhouse-server can do fork-exec, for example, to run some bridge.
    # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
    # explicitly ignore non-fatal signals that are used by server.
    # Number of SIGRTMIN can be determined only in runtime.
    RTMIN=$(kill -l SIGRTMIN)
    echo "
-set follow-fork-mode child
+set follow-fork-mode parent
-handle all noprint
+handle SIGHUP nostop noprint pass
-handle SIGSEGV stop print
+handle SIGINT nostop noprint pass
-handle SIGBUS stop print
+handle SIGQUIT nostop noprint pass
-continue
+handle SIGPIPE nostop noprint pass
-thread apply all backtrace
+handle SIGTERM nostop noprint pass
 handle SIGUSR1 nostop noprint pass
 handle SIGUSR2 nostop noprint pass
 handle SIG$RTMIN nostop noprint pass
 info signals
 continue
 backtrace full
 info locals
 info registers
 disassemble /s
 up
 info locals
 disassemble /s
 up
 info locals
 disassemble /s
 p \"done\"
 detach
 quit
 " > script.gdb
-    sudo gdb -batch -command script.gdb -p $server_pid &
+    gdb -batch -command script.gdb -p $server_pid  &
    sleep 5
    # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
    time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
    # Check connectivity after we attach gdb, because it might cause the server
    # to freeze and the fuzzer will fail.
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -72,11 +72,13 @@ RUN python3 -m pip install \
    grpcio-tools \
    kafka-python \
    kazoo \
    lz4 \
    minio \
    protobuf \
    psycopg2-binary==2.8.6 \
    pymongo==3.11.0 \
    pytest \
    pytest-order==1.0.0 \
    pytest-timeout \
    pytest-xdist \
    pytest-repeat \
@ -85,7 +87,8 @@ RUN python3 -m pip install \
    tzlocal==2.1 \
    urllib3 \
    requests-kerberos \
-    pyhdfs
+    pyhdfs \
    azure-storage-blob
 COPY modprobe.sh /usr/local/bin/modprobe
 COPY dockerd-entrypoint.sh /usr/local/bin/
--- a/docker/test/integration/runner/compose/docker_compose_azurite.yml
+++ b/docker/test/integration/runner/compose/docker_compose_azurite.yml
@ -0,0 +1,13 @@
 version: '2.3'
 services:
  azurite1:
    image: mcr.microsoft.com/azure-storage/azurite
    ports:
      - "10000:10000"
    volumes:
      - data1-1:/data1
    command: azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log
 volumes:
  data1-1:
--- a/docker/test/integration/runner/dockerd-entrypoint.sh
+++ b/docker/test/integration/runner/dockerd-entrypoint.sh
@ -8,8 +8,8 @@ echo '{
    "ip-forward": true,
    "log-level": "debug",
    "storage-driver": "overlay2",
-    "insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
+    "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
-    "registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"]
+    "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
 }' | dd of=/etc/docker/daemon.json 2>/dev/null
 dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log &
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -193,7 +193,7 @@ function run_tests
    then
        # Run only explicitly specified tests, if any.
        # shellcheck disable=SC2010
-        test_files=$(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}")
+        test_files=($(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}"))
    elif [ "$PR_TO_TEST" -ne 0 ] \
        && [ "$(wc -l < changed-test-definitions.txt)" -gt 0 ] \
        && [ "$(wc -l < other-changed-files.txt)" -eq 0 ]
@ -201,10 +201,26 @@ function run_tests
        # If only the perf tests were changed in the PR, we will run only these
        # tests. The lists of changed files are prepared in entrypoint.sh because
        # it has the repository.
-        test_files=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt)
+        test_files=($(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt))
    else
        # The default -- run all tests found in the test dir.
-        test_files=$(ls "$test_prefix"/*.xml)
+        test_files=($(ls "$test_prefix"/*.xml))
    fi
    # We split perf tests into multiple checks to make them faster
    if [ -v CHPC_TEST_RUN_BY_HASH_TOTAL ]; then
        # filter tests array in bash https://stackoverflow.com/a/40375567
        for index in "${!test_files[@]}"; do
            # sorry for this, just calculating hash(test_name) % total_tests_group == my_test_group_num
            test_hash_result=$(echo test_files[$index] | perl -ne 'use Digest::MD5 qw(md5); print unpack('Q', md5($_)) % $ENV{CHPC_TEST_RUN_BY_HASH_TOTAL} == $ENV{CHPC_TEST_RUN_BY_HASH_NUM};')
            # BTW, for some reason when hash(test_name) % total_tests_group != my_test_group_num perl outputs nothing, not zero
            if [ "$test_hash_result" != "1" ]; then
                # deleting element from array
                unset -v 'test_files[$index]'
            fi
        done
        # to have sequential indexes...
        test_files=("${test_files[@]}")
    fi
    # For PRs w/o changes in test definitons, test only a subset of queries,
@ -212,21 +228,26 @@ function run_tests
    # already set, keep those values.
    #
    # NOTE: too high CHPC_RUNS/CHPC_MAX_QUERIES may hit internal CI timeout.
-    if [ "$PR_TO_TEST" -ne 0 ] && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ]
+    # NOTE: Currently we disabled complete run even for master branch
-    then
+    #if [ "$PR_TO_TEST" -ne 0 ] && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ]
    #then
    #    CHPC_RUNS=${CHPC_RUNS:-7}
    #    CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
    #else
    #    CHPC_RUNS=${CHPC_RUNS:-13}
    #    CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
    #fi
    CHPC_RUNS=${CHPC_RUNS:-7}
    CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
-    else
+
        CHPC_RUNS=${CHPC_RUNS:-13}
        CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
    fi
    export CHPC_RUNS
    export CHPC_MAX_QUERIES
    # Determine which concurrent benchmarks to run. For now, the only test
    # we run as a concurrent benchmark is 'website'. Run it as benchmark if we
    # are also going to run it as a normal test.
-    for test in $test_files; do echo "$test"; done | sed -n '/website/p' > benchmarks-to-run.txt
+    for test in ${test_files[@]}; do echo "$test"; done | sed -n '/website/p' > benchmarks-to-run.txt
    # Delete old report files.
    for x in {test-times,wall-clock-times}.tsv
@ -235,8 +256,8 @@ function run_tests
        touch "$x"
    done
-    # Randomize test order.
+    # Randomize test order. BTW, it's not an array no more.
-    test_files=$(for f in $test_files; do echo "$f"; done | sort -R)
+    test_files=$(for f in ${test_files[@]}; do echo "$f"; done | sort -R)
    # Limit profiling time to 10 minutes, not to run for too long.
    profile_seconds_left=600
@ -261,16 +282,24 @@ function run_tests
        # Use awk because bash doesn't support floating point arithmetic.
        profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }")
        (
            set +x
            argv=(
                --host localhost localhost
                --port "$LEFT_SERVER_PORT" "$RIGHT_SERVER_PORT"
                --runs "$CHPC_RUNS"
                --max-queries "$CHPC_MAX_QUERIES"
                --profile-seconds "$profile_seconds"
                "$test"
            )
            TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
-        # The grep is to filter out set -x output and keep only time output.
+            # one more subshell to suppress trace output for "set +x"
-        # The '2>&1 >/dev/null' redirects stderr to stdout, and discards stdout.
+            (
-        { \
+                time "$script_dir/perf.py" "${argv[@]}" > "$test_name-raw.tsv" 2> "$test_name-err.log"
-            time "$script_dir/perf.py" --host localhost localhost --port $LEFT_SERVER_PORT $RIGHT_SERVER_PORT \
+            ) 2>>wall-clock-times.tsv >/dev/null \
                --runs "$CHPC_RUNS" --max-queries "$CHPC_MAX_QUERIES" \
                --profile-seconds "$profile_seconds" \
                -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \
        } 2>&1 >/dev/null | tee >(grep -v ^+ >> "wall-clock-times.tsv") \
                || echo "Test $test_name failed with error code $?" >> "$test_name-err.log"
        ) 2>/dev/null
        profile_seconds_left=$(awk -F'	' \
            'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \
@ -278,8 +307,6 @@ function run_tests
        current_test=$((current_test + 1))
    done
    unset TIMEFORMAT
    wait
 }
@ -518,7 +545,9 @@ unset IFS
 # all nodes.
 numactl --show
 numactl --cpunodebind=all --membind=all numactl --show
-numactl --cpunodebind=all --membind=all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
+# Use less jobs to avoid OOM. Some queries can consume 8+ GB of memory.
 jobs_count=$(($(grep -c ^processor /proc/cpuinfo) / 3))
 numactl --cpunodebind=all --membind=all parallel --jobs  $jobs_count --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
 clickhouse-local --query "
 -- Join the metric names back to the metric statistics we've calculated, and make
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -16,16 +16,28 @@ right_sha=$4
 datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
 declare -A dataset_paths
-dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
+if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
-dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
+    dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
-dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
+    dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
-dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
+    dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
    dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
 else
    dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
    dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
    dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
    dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
 fi
 function download
 {
    # Historically there were various paths for the performance test package.
    # Test all of them.
-    for path in "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/"{,clickhouse_build_check/}"performance/performance.tgz"
+    declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz"
                            "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz"
                           )
    for path in "${urls_to_try[@]}"
    do
        if curl --fail --head "$path"
        then
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -4,6 +4,27 @@ set -ex
 CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
 export CHPC_CHECK_START_TIMESTAMP
 S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"}
 COMMON_BUILD_PREFIX="/clickhouse_build_check"
 if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
    COMMON_BUILD_PREFIX=""
 fi
 # Sometimes AWS responde with DNS error and it's impossible to retry it with
 # current curl version options.
 function curl_with_retry
 {
    for _ in 1 2 3 4; do
        if curl --fail --head "$1";then
            return 0
        else
            sleep 0.5
        fi
    done
    return 1
 }
 # Use the packaged repository to find the revision we will compare to.
 function find_reference_sha
 {
@ -43,9 +64,12 @@ function find_reference_sha
        # Historically there were various path for the performance test package,
        # test all of them.
        unset found
-        for path in "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/"{,clickhouse_build_check/}"performance/performance.tgz"
+        declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz"
                                "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz"
                               )
        for path in "${urls_to_try[@]}"
        do
-            if curl --fail --head "$path"
+            if curl_with_retry "$path"
            then
                found="$path"
                break
@ -65,14 +89,11 @@ chmod 777 workspace output
 cd workspace
-# Download the package for the version we are going to test
+# Download the package for the version we are going to test.
-for path in "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/"{,clickhouse_build_check/}"performance/performance.tgz"
+if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
-do
+then
-    if curl --fail --head "$path"
+    right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
-    then
+fi
        right_path="$path"
    fi
 done
 mkdir right
 wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -45,6 +45,7 @@ parser.add_argument('--runs', type=int, default=1, help='Number of query runs pe
 parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
 parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
 parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
 parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
 parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
 parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
 parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
@ -284,7 +285,7 @@ for query_index in queries_to_run:
                #   it makes the results unstable.
                res = c.execute(q, query_id = prewarm_id,
                    settings = {
-                        'max_execution_time': args.max_query_seconds,
+                        'max_execution_time': args.prewarm_max_query_seconds,
                        'query_profiler_real_time_period_ns': 10000000,
                        'memory_profiler_step': '4Mi',
                    })
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -128,14 +128,35 @@ function start()
        counter=$((counter + 1))
    done
    # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
    # and clickhouse-server can do fork-exec, for example, to run some bridge.
    # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
    # explicitly ignore non-fatal signals that are used by server.
    # Number of SIGRTMIN can be determined only in runtime.
    RTMIN=$(kill -l SIGRTMIN)
    echo "
-set follow-fork-mode child
+set follow-fork-mode parent
-handle all noprint
+handle SIGHUP nostop noprint pass
-handle SIGSEGV stop print
+handle SIGINT nostop noprint pass
-handle SIGBUS stop print
+handle SIGQUIT nostop noprint pass
-handle SIGABRT stop print
+handle SIGPIPE nostop noprint pass
 handle SIGTERM nostop noprint pass
 handle SIGUSR1 nostop noprint pass
 handle SIGUSR2 nostop noprint pass
 handle SIG$RTMIN nostop noprint pass
 info signals
 continue
-thread apply all backtrace
+backtrace full
 info locals
 info registers
 disassemble /s
 up
 info locals
 disassemble /s
 up
 info locals
 disassemble /s
 p \"done\"
 detach
 quit
 " > script.gdb
@ -143,7 +164,10 @@ quit
    # FIXME Hung check may work incorrectly because of attached gdb
    # 1. False positives are possible
    # 2. We cannot attach another gdb to get stacktraces if some queries hung
-    sudo gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log &
+    gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
    sleep 5
    # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
    time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
 }
 configure
@ -214,6 +238,9 @@ zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/n
 zgrep -Fa "########################################" /test_output/* > /dev/null \
    && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
 zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
    && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
 # Put logs into /test_output/
 for log_file in /var/log/clickhouse-server/clickhouse-server.log*
 do
--- a/docker/test/testflows/runner/dockerd-entrypoint.sh
+++ b/docker/test/testflows/runner/dockerd-entrypoint.sh
@ -5,8 +5,8 @@ echo "Configure to use Yandex dockerhub-proxy"
 mkdir -p /etc/docker/
 cat > /etc/docker/daemon.json << EOF
 {
-    "insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
+    "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"],
-    "registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"]
+    "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
 }
 EOF
--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@ -106,20 +106,20 @@ Build ClickHouse. Run ClickHouse from the terminal: change directory to `program
 Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`.
-Alternatively you can install ClickHouse package: either stable release from Yandex repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo service clickhouse-server start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.
+Alternatively you can install ClickHouse package: either stable release from ClickHouse repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo clickhouse start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.
 When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary:
 ``` bash
-$ sudo service clickhouse-server stop
+$ sudo clickhouse stop
 $ sudo cp ./clickhouse /usr/bin/
-$ sudo service clickhouse-server start
+$ sudo clickhouse start
 ```
 Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal:
 ``` bash
-$ sudo service clickhouse-server stop
+$ sudo clickhouse stop
 $ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
 ```
@ -257,9 +257,9 @@ There are five variants (Debug, ASan, TSan, MSan, UBSan).
 Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases.
-## Security Audit {#security-audit}
+## Security Audit
-People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.
+People from Yandex Security Team did some basic overview of ClickHouse capabilities from the security standpoint.
 ## Static Analyzers {#static-analyzers}
@ -326,15 +326,11 @@ There is automated check for flaky tests. It runs all new tests 100 times (for f
 ## Testflows
-[Testflows](https://testflows.com/) is an enterprise-grade testing framework. It is used by Altinity for some of the tests and we run these tests in our CI.
+[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse.
 ## Yandex Checks (only for Yandex employees)
 These checks are importing ClickHouse code into Yandex internal monorepository, so ClickHouse codebase can be used as a library by other products at Yandex (YT and YDB). Note that clickhouse-server itself is not being build from internal repo and unmodified open-source build is used for Yandex applications.
 ## Test Automation {#test-automation}
-We run tests with Yandex internal CI and job automation system named “Sandbox”.
+We run tests with [GitHub Actions](https://github.com/features/actions).
 Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.
--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@ -17,6 +17,7 @@ ClickHouse server works as MySQL replica. It reads binlog and performs DDL and D
 ``` sql
 CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
 ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
 [TABLE OVERRIDE table1 (...), TABLE OVERRIDE table2 (...)]
 ```
 **Engine Parameters**
@ -109,15 +110,19 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([
 - MySQL `DELETE` query is converted into `INSERT` with `_sign=-1`.
- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1`.
+- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1` if the primary key has been changed, or
  `INSERT` with `_sign=1` if not.
 ### Selecting from MaterializedMySQL Tables {#select}
 `SELECT` query from `MaterializedMySQL` tables has some specifics:
- If `_version` is not specified in the `SELECT` query, [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used. So only rows with `MAX(_version)` are selected.
+- If `_version` is not specified in the `SELECT` query, the
  [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used, so only rows with
  `MAX(_version)` are returned for each primary key value.
- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not included into the result set.
+- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not
  included into the result set.
 - The result includes columns comments in case they exist in MySQL database tables.
@ -125,15 +130,77 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([
 MySQL `PRIMARY KEY` and `INDEX` clauses are converted into `ORDER BY` tuples in ClickHouse tables.
-ClickHouse has only one physical order, which is determined by `ORDER BY` clause. To create a new physical order, use [materialized views](../../sql-reference/statements/create/view.md#materialized).
+ClickHouse has only one physical order, which is determined by `ORDER BY` clause. To create a new physical order, use
 [materialized views](../../sql-reference/statements/create/view.md#materialized).
 **Notes**
 - Rows with `_sign=-1` are not deleted physically from the tables.
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializedMySQL` engine.
+- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializedMySQL` engine, as they are not visible in the
  MySQL binlog.
 - Replication can be easily broken.
 - Manual operations on database and tables are forbidden.
- `MaterializedMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializedMySQL` database when a table in the MySQL server changes.
+- `MaterializedMySQL` is affected by the [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert)
  setting. Data is merged in the corresponding table in the `MaterializedMySQL` database when a table in the MySQL
  server changes.
 ### Table Overrides {#table-overrides}
 Table overrides can be used to customize the ClickHouse DDL queries, allowing you to make schema optimizations for your
 application. This is especially useful for controlling partitioning, which is important for the overall performance of
 MaterializedMySQL.
 ```sql
 CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
 [SETTINGS ...]
 [TABLE OVERRIDE table_name (
    [COLUMNS (
        [name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], ...]
        [INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, ...]
        [PROJECTION projection_name_1 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]), ...]
 	)]
 	[ORDER BY expr]
 	[PRIMARY KEY expr]
 	[PARTITION BY expr]
 	[SAMPLE BY expr]
 	[TTL expr]
 ), ...]
 ```
 Example:
 ```sql
 CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
 TABLE OVERRIDE table1 (
    COLUMNS (
 	    userid UUID,
 	    category LowCardinality(String),
 		timestamp DateTime CODEC(Delta, Default)
    )
    PARTITION BY toYear(timestamp)
 ),
 TABLE OVERRIDE table2 (
    COLUMNS (
 	    ip_hash UInt32 MATERIALIZED xxHash32(client_ip),
 		client_ip String TTL created + INTERVAL 72 HOUR
 	)
 	SAMPLE BY ip_hash
 )
 ```
 The `COLUMNS` list is sparse; it contains only modified or extra (MATERIALIZED or ALIAS) columns. Modified columns with
 a different type must be assignable from the original type. There is currently no validation of this or similar issues
 when the `CREATE DATABASE` query executes, so extra care needs to be taken.
 You may specify overrides for tables that do not exist yet.
 !!! note "Warning"
    It is easy to break replication with TABLE OVERRIDEs if not used with care. For example:
    * If a column is added with a table override, but then later added to the source MySQL table, the converted ALTER TABLE
      query in ClickHouse will fail because the column already exists.
    * It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in
      `ORDER BY` or `PARTITION BY`.
 ## Examples of Use {#examples-of-use}
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@ -5,8 +5,7 @@ toc_title: HDFS
 # HDFS {#table_engines-hdfs}
-This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar
+This engine provides integration with the [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
 to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
 ## Usage {#usage}
@ -14,12 +13,13 @@ to the [File](../../../engines/table-engines/special/file.md#table_engines-file)
 ENGINE = HDFS(URI, format)
 ```
-The `URI` parameter is the whole file URI in HDFS.
+**Engine Parameters**
-The `format` parameter specifies one of the available file formats. To perform
+
 - `URI` - whole file URI in HDFS. The path part of `URI` may contain globs. In this case the table would be readonly.
 -  `format` - specifies one of the available file formats. To perform
 `SELECT` queries, the format must be supported for input, and to perform
 `INSERT` queries – for output. The available formats are listed in the
 [Formats](../../../interfaces/formats.md#formats) section.
 The path part of `URI` may contain globs. In this case the table would be readonly.
 **Example:**
@ -71,12 +71,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table
 1.  Suppose we have several files in TSV format with the following URIs on HDFS:
-   'hdfs://hdfs1:9000/some_dir/some_file_1'
+    -  'hdfs://hdfs1:9000/some_dir/some_file_1'
-   'hdfs://hdfs1:9000/some_dir/some_file_2'
+    -  'hdfs://hdfs1:9000/some_dir/some_file_2'
-   'hdfs://hdfs1:9000/some_dir/some_file_3'
+    -  'hdfs://hdfs1:9000/some_dir/some_file_3'
-   'hdfs://hdfs1:9000/another_dir/some_file_1'
+    -  'hdfs://hdfs1:9000/another_dir/some_file_1'
-   'hdfs://hdfs1:9000/another_dir/some_file_2'
+    -  'hdfs://hdfs1:9000/another_dir/some_file_2'
-   'hdfs://hdfs1:9000/another_dir/some_file_3'
+    -  'hdfs://hdfs1:9000/another_dir/some_file_3'
 1.  There are several ways to make a table consisting of all six files:
@ -132,6 +132,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
 | **parameter**                                         | **default value**       |
 | -                                                     | -                       |
 | rpc\_client\_connect\_tcpnodelay                      | true                    |
 | dfs\_client\_read\_shortcircuit                       | true                    |
 | output\_replace-datanode-on-failure                   | true                    |
@ -181,25 +182,26 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
 #### ClickHouse extras {#clickhouse-extras}
 | **parameter**                                         | **default value**       |
 | -                                                     | -                       |
 |hadoop\_kerberos\_keytab                               | ""                      |
 |hadoop\_kerberos\_principal                            | ""                      |
 |hadoop\_kerberos\_kinit\_command                       | kinit                   |
 |libhdfs3\_conf                                         | ""                      |
 ### Limitations {#limitations}
-  * hadoop\_security\_kerberos\_ticket\_cache\_path and libhdfs3\_conf can be global only, not user specific
+  * `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific
 ## Kerberos support {#kerberos-support}
-If hadoop\_security\_authentication parameter has value 'kerberos', ClickHouse authentifies via Kerberos facility.
+If the `hadoop_security_authentication` parameter has the value `kerberos`, ClickHouse authenticates via Kerberos.
-Parameters [here](#clickhouse-extras) and hadoop\_security\_kerberos\_ticket\_cache\_path may be of help.
+Parameters are [here](#clickhouse-extras) and `hadoop_security_kerberos_ticket_cache_path` may be of help.
 Note that due to libhdfs3 limitations only old-fashioned approach is supported,
-datanode communications are not secured by SASL (HADOOP\_SECURE\_DN\_USER is a reliable indicator of such
+datanode communications are not secured by SASL (`HADOOP_SECURE_DN_USER` is a reliable indicator of such
-security approach). Use tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh for reference.
+security approach). Use `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` for reference.
-If hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal or hadoop\_kerberos\_kinit\_command is specified, kinit will be invoked. hadoop\_kerberos\_keytab and hadoop\_kerberos\_principal are mandatory in this case. kinit tool and krb5 configuration files are required.
+If `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` or `hadoop_kerberos_kinit_command` is specified, `kinit` will be invoked. `hadoop_kerberos_keytab` and `hadoop_kerberos_principal` are mandatory in this case. `kinit` tool and krb5 configuration files are required.
-## HDFS Namenode HA support{#namenode-ha}
+## HDFS Namenode HA support {#namenode-ha}
 libhdfs3 support HDFS namenode HA.
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@ -37,6 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    [rabbitmq_skip_broken_messages = N,]
    [rabbitmq_max_block_size = N,]
    [rabbitmq_flush_interval_ms = N]
    [rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish']
 ```
 Required parameters:
@ -59,6 +60,7 @@ Optional parameters:
 -   `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 -   `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.
 SSL connection:
--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@ -66,9 +66,9 @@ WHERE table = 'visits'
 └───────────┴────────────────┴────────┘
 ```
-The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](#alter_manipulations-with-partitions) queries.
+The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries.
-The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](#alter_attach-partition) query.
+The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.
 Let’s break down the name of the first part: `201901_1_3_1`:
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@ -8,24 +8,43 @@ toc_title: Distributed
 Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.
 Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
-The Distributed engine accepts parameters:
+## Creating a Table {#distributed-creating-a-table}
-   the cluster name in the server’s config file
+``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
    ...
 ) ENGINE = Distributed(cluster, database, table[, sharding_key[, policy_name]])
 [SETTINGS name=value, ...]
 ```
-   the name of a remote database
+### From a Table {#distributed-from-a-table}
 When the `Distributed` table is pointing to a table on the current server you can adopt that table's schema:
-   the name of a remote table
+``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 ENGINE = Distributed(cluster, database, table[, sharding_key[, policy_name]]) [SETTINGS name=value, ...]
 ```
-   (optionally) sharding key
+**Distributed Parameters**
-   (optionally) policy name, it will be used to store temporary files for async send
+-   `cluster` - the cluster name in the server’s config file
-    See also:
+-   `database` - the name of a remote database
 -   `table` - the name of a remote table
 -   `sharding_key` - (optionally) sharding key
 -   `policy_name` - (optionally) policy name, it will be used to store temporary files for async send
 **See Also**
 - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
 - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
-Also, it accepts the following settings:
+**Distributed Settings**
 - `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk.
@ -59,24 +78,25 @@ Also, it accepts the following settings:
    - [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting
    - `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert`
-Example:
+**Example**
 ``` sql
-Distributed(logs, default, hits[, sharding_key[, policy_name]])
+CREATE TABLE hits_all AS hits
 ENGINE = Distributed(logs, default, hits[, sharding_key[, policy_name]])
 SETTINGS
    fsync_after_insert=0,
    fsync_directories=0;
 ```
-Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
+Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster.
 Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
-For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
+For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
-Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
+Instead of the database name, you can use a constant expression that returns a string. For example: `currentDatabase()`.
-logs – The cluster name in the server’s config file.
+## Clusters {#distributed-clusters}
-Clusters are set like this:
+Clusters are configured in the [server configuration file](../../../operations/configuration-files.md):
 ``` xml
 <remote_servers>
@ -132,12 +152,13 @@ Replicas are duplicating servers (in order to read all the data, you can access
 Cluster names must not contain dots.
 The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
 - `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server does not start. If you change the DNS record, restart the server.
- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http_port.
+- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Not to be confused with `http_port`.
- `user` – Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
+- `user` – Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../operations/access-rights.md).
 - `password` – The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
+- `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and be configured with correct certificates.
- `compression` - Use data compression. Default value: true.
+- `compression` - Use data compression. Default value: `true`.
 When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
 If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times.
@ -149,40 +170,42 @@ You can specify as many clusters as you wish in the configuration.
 To view your clusters, use the `system.clusters` table.
-The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster’s servers).
+The `Distributed` engine allows working with a cluster like a local server. However, the cluster's configuration cannot be specified dynamically, it has to be configured in the server config file. Usually, all servers in a cluster will have the same cluster config (though this is not required). Clusters from the config file are updated on the fly, without restarting the server.
-The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you do not need to create a Distributed table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
+If you need to send a query to an unknown set of shards and replicas each time, you do not need to create a `Distributed` table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
 ## Writing data {#distributed-writing-data}
 There are two methods for writing data to a cluster:
-First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
+First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform direct `INSERT` statements on the remote tables in the cluster that the `Distributed` table is pointing to. This is the most flexible solution as you can use any sharding scheme, even one that is non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
-Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it does not mean anything in this case.
+Second, you can perform `INSERT` statements on a `Distributed` table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a `Distributed` table, it must have the `sharding_key` parameter configured (except if there is only one shard).
-Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
+Each shard can have a `<weight>` defined in the config file. By default, the weight is `1`. Data is distributed across shards in the amount proportional to the shard weight. All shard weights are summed up, then each shard's weight is divided by the total to determine each shard's proportion. For example, if there are two shards and the first has a weight of 1 while the second has a weight of 2, the first will be sent one third (1 / 3) of inserted rows and the second will be sent two thirds (2 / 3).
-Each shard can have the `internal_replication` parameter defined in the config file.
+Each shard can have the `internal_replication` parameter defined in the config file. If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this if the tables underlying the `Distributed` table are replicated tables (e.g. any of the `Replicated*MergeTree` table engines). One of the table replicas will receive the write and it will be replicated to the other replicas automatically.
-If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
+If `internal_replication` is set to `false` (the default), data is written to all replicas. In this case, the `Distributed` table replicates data itself. This is worse than using replicated tables because the consistency of replicas is not checked and, over time, they will contain slightly different data.
 If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
 To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weights` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
-The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
+The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running `IN` and `JOIN` by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function e.g. `intHash64(UserID)`.
-A simple remainder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
+A simple remainder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area rather than using entries in `Distributed` tables.
 SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
 You should be concerned about the sharding scheme in the following cases:
-   Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
+-   Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient.
-   A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
+-   A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries.
 Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
-If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
+If the server ceased to exist or had a rough restart (for example, due to a hardware failure) after an `INSERT` to a `Distributed` table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
 ## Reading data {#distributed-reading-data}
 When querying a `Distributed` table, `SELECT` queries are sent to all shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
 When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -204,7 +204,7 @@ When parsing with this format, tabs or linefeeds are not allowed in each field.
 This format is also available under the name `TSVRawWithNames`.
-## TabSeparatedWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
+## TabSeparatedRawWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
 Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@ -178,5 +178,9 @@ toc_title: Adopters
 | <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
 | <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
 | <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
 | <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
 | <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
 | <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
 | <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
 [Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->
--- a/docs/en/operations/clickhouse-keeper.md
+++ b/docs/en/operations/clickhouse-keeper.md
@ -16,6 +16,11 @@ ZooKeeper is one of the first well-known open-source coordination systems. It's
 By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but `clickhouse-keeper-converter` tool allows to convert ZooKeeper data to ClickHouse Keeper snapshot. Interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so mixed ZooKeeper / ClickHouse Keeper cluster is impossible.
 ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. Digest authentication scheme uses pair `username:password`. Password is encoded in Base64. 
 !!! info "Note"
    External integrations are not supported.
 ## Configuration
 ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
@ -118,13 +123,13 @@ echo mntr | nc localhost 9181
 Bellow is the detailed 4lw commands:
- ruok : Tests if server is running in a non-error state. The server will respond with imok if it is running. Otherwise it will not respond at all. A response of "imok" does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
+- `ruok`: Tests if server is running in a non-error state. The server will respond with imok if it is running. Otherwise it will not respond at all. A response of "imok" does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
 ```
 imok
 ```
- mntr : Outputs a list of variables that could be used for monitoring the health of the cluster.
+- `mntr`: Outputs a list of variables that could be used for monitoring the health of the cluster.
 ```
 zk_version      v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
@ -146,12 +151,11 @@ zk_followers    0
 zk_synced_followers     0
 ```
- srvr : Lists full details for the server.
+- `srvr`: Lists full details for the server.
 ```
 ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
 Latency min/avg/max: 0/0/0
 Received: 2
 Sent : 2
 Connections: 1
@ -161,16 +165,14 @@ Mode: leader
 Node count: 4
 ```
- stat : Lists brief details for the server and connected clients.
+- `stat`: Lists brief details for the server and connected clients.
 ```
 ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
 Clients:
 192.168.1.1:52852(recved=0,sent=0)
 192.168.1.1:52042(recved=24,sent=48)
 Latency min/avg/max: 0/0/0
 Received: 4
 Sent : 4
 Connections: 1
@ -178,16 +180,15 @@ Outstanding: 0
 Zxid: 36
 Mode: leader
 Node count: 4
 ```
- srst : Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`.
+- `srst`: Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`.
 ```
 Server stats reset.
 ```
- conf : Print details about serving configuration.
+- `conf`: Print details about serving configuration.
 ```
 server_id=1
@ -220,20 +221,20 @@ compress_snapshots_with_zstd_format=true
 configuration_change_tries_count=20
 ```
- cons : List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc...
+- `cons`: List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc...
 ```
 192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0)
 192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0)
 ```
- crst : Reset connection/session statistics for all connections.
+- `crst`: Reset connection/session statistics for all connections.
 ```
 Connection stats reset.
 ```
- envi : Print details about serving environment
+- `envi`: Print details about serving environment
 ```
 Environment:
@ -250,41 +251,41 @@ user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/
 ```
- dirs : Shows the total size of snapshot and log files in bytes
+- `dirs`: Shows the total size of snapshot and log files in bytes
 ```
 snapshot_dir_size: 0
 log_dir_size: 3875
 ```
- isro: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode.
+- `isro`: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode.
 ```
 rw
 ```
- wchs : Lists brief information on watches for the server.
+- `wchs`: Lists brief information on watches for the server.
 ```
 1 connections watching 1 paths
 Total watches:1
 ```
- wchc : Lists detailed information on watches for the server, by session. This outputs a list of sessions(connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
+- `wchc`: Lists detailed information on watches for the server, by session. This outputs a list of sessions (connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
 ```
 0x0000000000000001
    /clickhouse/task_queue/ddl
 ```
- wchp : Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
+- `wchp`: Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (i. e. impact server performance), use it carefully.
 ```
 /clickhouse/task_queue/ddl
    0x0000000000000001
 ```
- dump : Lists the outstanding sessions and ephemeral nodes. This only works on the leader.
+- `dump`: Lists the outstanding sessions and ephemeral nodes. This only works on the leader.
 ```
 Sessions dump (2):
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -505,7 +505,7 @@ Keys:
 -   `level` – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`.
 -   `log` – The log file. Contains all the entries according to `level`.
 -   `errorlog` – Error log file.
-   `size` – Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place.
+-   `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place.
 -   `count` – The number of archived log files that ClickHouse stores.
 **Example**
@ -750,9 +750,13 @@ The value 0 means that you can delete all tables without any restrictions.
 ## max_thread_pool_size {#max-thread-pool-size}
-The maximum number of threads in the Global Thread pool.
+ClickHouse uses threads from the Global Thread pool to process queries. If there is no idle thread to process a query, then a new thread is created in the pool. `max_thread_pool_size` limits the maximum number of threads in the pool.
-Default value: 10000.
+Possible values: 
 -   Positive integer.
 Default value: `10000`.
 **Example**
@ -762,9 +766,13 @@ Default value: 10000.
 ## max_thread_pool_free_size {#max-thread-pool-free-size}
-The number of threads that are always held in the Global Thread pool.
+If the number of **idle** threads in the Global Thread pool is greater than `max_thread_pool_free_size`, then ClickHouse releases resources occupied by some threads and the pool size is decreased. Threads can be created again if necessary.
-Default value: 1000.
+Possible values: 
 -   Positive integer.
 Default value: `1000`.
 **Example**
@ -774,9 +782,13 @@ Default value: 1000.
 ## thread_pool_queue_size {#thread-pool-queue-size}
-The limit to the number of jobs that can be scheduled on the Global Thread pool. Increasing queue size leads to larger memory usage. It is recommended to keep this value equal to the `max_thread_pool_size`.
+The maximum number of jobs that can be scheduled on the Global Thread pool. Increasing queue size leads to larger memory usage. It is recommended to keep this value equal to [max_thread_pool_size](#max-thread-pool-size).
-Default value: 10000.
+Possible values: 
 -   Positive integer.
 Default value: `10000`.
 **Example**
@ -1443,7 +1455,7 @@ You can also define sections `memory` — means storing information only in memo
 To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters:
 -   `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty.
-   `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authenthication attept will fail as if the provided password was incorrect.
+-   `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authentication attempt will fail as if the provided password was incorrect.
 **Example**
@ -1507,3 +1519,4 @@ Possible values:
 -   Positive integer.
 Default value: `10000`.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -1687,18 +1687,17 @@ Quorum writes
 `INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written.
-All the replicas in the quorum are consistent, i.e., they contain data from all previous `INSERT` queries. The `INSERT` sequence is linearized.
+When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#settings-select_sequential_consistency).
-When reading the data written from the `insert_quorum`, you can use the [select_sequential_consistency](#settings-select_sequential_consistency) option.
+ClickHouse generates an exception:
 ClickHouse generates an exception
 -   If the number of available replicas at the time of the query is less than the `insert_quorum`.
-   At an attempt to write data when the previous block has not yet been inserted in the `insert_quorum` of replicas. This situation may occur if the user tries to perform an `INSERT` before the previous one with the `insert_quorum` is completed.
+-   When `insert_quorum_parallel` is disabled and an attempt to write data is made when the previous block has not yet been inserted in `insert_quorum` of replicas. This situation may occur if the user tries to perform another `INSERT` query to the same table before the previous one with `insert_quorum` is completed.
 See also:
 -   [insert_quorum_timeout](#settings-insert_quorum_timeout)
 -   [insert_quorum_parallel](#settings-insert_quorum_parallel)
 -   [select_sequential_consistency](#settings-select_sequential_consistency)
 ## insert_quorum_timeout {#settings-insert_quorum_timeout}
@ -1710,11 +1709,29 @@ Default value: 600 000 milliseconds (ten minutes).
 See also:
 -   [insert_quorum](#settings-insert_quorum)
 -   [insert_quorum_parallel](#settings-insert_quorum_parallel)
 -   [select_sequential_consistency](#settings-select_sequential_consistency)
 ## insert_quorum_parallel {#settings-insert_quorum_parallel}
 Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.
 Possible values:
 -   0 — Disabled.
 -   1 — Enabled.
 Default value: 1.
 See also:
 -   [insert_quorum](#settings-insert_quorum)
 -   [insert_quorum_timeout](#settings-insert_quorum_timeout)
 -   [select_sequential_consistency](#settings-select_sequential_consistency)
 ## select_sequential_consistency {#settings-select_sequential_consistency}
-Enables or disables sequential consistency for `SELECT` queries:
+Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default).
 Possible values:
@ -1727,10 +1744,13 @@ Usage
 When sequential consistency is enabled, ClickHouse allows the client to execute the `SELECT` query only for those replicas that contain data from all previous `INSERT` queries executed with `insert_quorum`. If the client refers to a partial replica, ClickHouse will generate an exception. The SELECT query will not include data that has not yet been written to the quorum of replicas.
 When `insert_quorum_parallel` is enabled (the default), then `select_sequential_consistency` does not work. This is because parallel `INSERT` queries can be written to different sets of quorum replicas so there is no guarantee a single replica will have received all writes.
 See also:
 -   [insert_quorum](#settings-insert_quorum)
 -   [insert_quorum_timeout](#settings-insert_quorum_timeout)
 -   [insert_quorum_parallel](#settings-insert_quorum_parallel)
 ## insert_deduplicate {#settings-insert-deduplicate}
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md
@ -41,7 +41,7 @@ Example of a polygon dictionary configuration:
 </dictionary>
 ```
-Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query):
+The corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query):
 ``` sql
 CREATE DICTIONARY polygon_dict_name (
    key Array(Array(Array(Array(Float64)))),
--- a/docs/en/sql-reference/statements/create/role.md
+++ b/docs/en/sql-reference/statements/create/role.md
@ -31,7 +31,7 @@ CREATE ROLE accountant;
 GRANT SELECT ON db.* TO accountant;
 ```
-This sequence of queries creates the role `accountant` that has the privilege of reading data from the `accounting` database.
+This sequence of queries creates the role `accountant` that has the privilege of reading data from the `db` database.
 Assigning the role to the user `mira`:
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -22,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 ) ENGINE = engine
 ```
-Creates a table named `name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
+Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
 The structure of the table is a list of column descriptions, secondary indexes and constraints . If [primary key](#primary-key) is supported by the engine, it will be indicated as parameter for the table engine.
 A column description is `name type` in the simplest case. Example: `RegionID UInt32`.
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@ -298,13 +298,16 @@ Note that elements emitted by a late firing should be treated as updated results
 ### Monitoring New Windows {#window-view-monitoring}
-Window view supports the `WATCH` query to constantly append the processing results to the console or use `TO` syntax to output the results to a table.
+Window view supports the [WATCH](../../../sql-reference/statements/watch.md) query to monitoring changes, or use `TO` syntax to output the results to a table.
 ``` sql
-WATCH [db.]name [LIMIT n]
+WATCH [db.]window_view
 [EVENTS]
 [LIMIT n]
 [FORMAT format]
 ```
-`WATCH` query acts similar as in `LIVE VIEW`. A `LIMIT` can be specified to set the number of updates to receive before terminating the query. 
+`WATCH` query acts similar as in `LIVE VIEW`. A `LIMIT` can be specified to set the number of updates to receive before terminating the query. The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query watermark.
 ### Settings {#window-view-settings}
--- a/docs/en/sql-reference/statements/select/group-by.md
+++ b/docs/en/sql-reference/statements/select/group-by.md
@ -206,6 +206,9 @@ This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` forma
 -   In `Pretty*` formats, the row is output as a separate table after the main result.
 -   In the other formats it is not available.
 !!! note "Note"
    totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. 
 `WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having.md) is present. The behavior depends on the `totals_mode` setting.
 ### Configuring Totals Processing {#configuring-totals-processing}
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@ -129,6 +129,9 @@ world
 Каждый элемент структуры типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) представляется как отдельный массив.
 Входящие параметры типа "перечисление" (`ENUM`) могут передаваться в виде значений или порядковых номеров. Сначала переданное значение будет сопоставляться с элементами перечисления. Если совпадение не будет найдено и при этом переданное значение является числом, оно будет трактоваться как порядковый номер в перечислении.
 Если входящие параметры типа `ENUM` содержат только порядковые номера, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для ускорения парсинга.
 Например:
 ``` sql
@ -362,6 +365,9 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
 Если установлена настройка [input_format_defaults_for_omitted_fields = 1](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) и тип столбца не `Nullable(T)`, то пустые значения без кавычек заменяются значениями по умолчанию для типа данных столбца.
 Входящие параметры типа "перечисление" (`ENUM`) могут передаваться в виде значений или порядковых номеров. Сначала переданное значение будет сопоставляться с элементами перечисления. Если совпадение не будет найдено и при этом переданное значение является числом, оно будет трактоваться как порядковый номер в перечислении.
 Если входящие параметры типа `ENUM` содержат только порядковые номера, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для ускорения парсинга.
 Формат CSV поддерживает вывод totals и extremes аналогично `TabSeparated`.
 ## CSVWithNames {#csvwithnames}
@ -693,7 +699,7 @@ CREATE TABLE IF NOT EXISTS example_table
 -   Если `input_format_defaults_for_omitted_fields = 1`, то значение по умолчанию для `x` равно `0`, а значение по умолчанию `a` равно `x * 2`.
 !!! note "Предупреждение"
-    Если `input_format_defaults_for_omitted_fields = 1`, то при обработке запросов ClickHouse потребляет больше вычислительных ресурсов, чем если `input_format_defaults_for_omitted_fields = 0`.
+    При добавлении данных с помощью `input_format_defaults_for_omitted_fields = 1`, ClickHouse потребляет больше вычислительных ресурсов по сравнению с `input_format_defaults_for_omitted_fields = 0`.
 ### Выборка данных {#vyborka-dannykh}
--- a/docs/ru/operations/clickhouse-keeper.md
+++ b/docs/ru/operations/clickhouse-keeper.md
@ -16,12 +16,17 @@ ZooKeeper — один из первых широко известных сер
 По умолчанию ClickHouse Keeper предоставляет те же гарантии, что и ZooKeeper (линеаризуемость записей, последовательная согласованность чтений). У него есть совместимый клиент-серверный протокол, поэтому любой стандартный клиент ZooKeeper может использоваться для взаимодействия с ClickHouse Keeper. Снэпшоты и журналы имеют несовместимый с ZooKeeper формат, однако можно конвертировать данные Zookeeper в снэпшот ClickHouse Keeper с помощью `clickhouse-keeper-converter`. Межсерверный протокол ClickHouse Keeper также несовместим с ZooKeeper, поэтому создание смешанного кластера ZooKeeper / ClickHouse Keeper невозможно.
 Система управления доступом (ACL) ClickHouse Keeper реализована так же, как в [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl). ClickHouse Keeper поддерживает тот же набор разрешений и идентичные схемы: `world`, `auth`, `digest`, `host` и `ip`. Digest для аутентификации использует пару значений `username:password`. Пароль кодируется в Base64.
 !!! info "Примечание"
    Внешние интеграции не поддерживаются.
 ## Конфигурация
 ClickHouse Keeper может использоваться как равноценная замена ZooKeeper или как внутренняя часть сервера ClickHouse, но в обоих случаях конфигурация представлена файлом `.xml`. Главный тег конфигурации ClickHouse Keeper — это `<keeper_server>`. Параметры конфигурации:
 -    `tcp_port` — порт для подключения клиента (по умолчанию для ZooKeeper: `2181`).
-    `tcp_port_secure` — зашифрованный порт для подключения клиента.
+-    `tcp_port_secure` — зашифрованный порт для SSL-соединения между клиентом и сервером сервиса.
 -    `server_id` — уникальный идентификатор сервера, каждый участник кластера должен иметь уникальный номер&nbsp;(1,&nbsp;2,&nbsp;3&nbsp;и&nbsp;т.&nbsp;д.).
 -    `log_storage_path` — путь к журналам координации, лучше хранить их на незанятом устройстве (актуально и для ZooKeeper).
 -    `snapshot_storage_path` — путь к снэпшотам координации.
@ -50,7 +55,11 @@ ClickHouse Keeper может использоваться как равноце
 -    `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000).
 -    `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000).
-Конфигурация кворума находится в `<keeper_server>.<raft_configuration>` и содержит описание серверов. Единственный параметр для всего кворума — `secure`, который включает зашифрованное соединение для связи между участниками кворума. Параметры для каждого `<server>`:
+Конфигурация кворума находится в `<keeper_server>.<raft_configuration>` и содержит описание серверов. 
 Единственный параметр для всего кворума — `secure`, который включает зашифрованное соединение для связи между участниками кворума. Параметру можно задать значение `true`, если для внутренней коммуникации между узлами требуется SSL-соединение, в ином случае не указывайте ничего.  
 Параметры для каждого `<server>`:
 -    `id` — идентификатор сервера в кворуме.
 -    `hostname` — имя хоста, на котором размещен сервер.
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@ -52,7 +52,7 @@ ClickHouse перезагружает встроенные словари с з
 ClickHouse проверяет условия для `min_part_size` и `min_part_size_ratio` и выполнит те блоки `case`, для которых условия совпали.
 -   Если кусок данных совпадает с условиями, ClickHouse использует указанные метод сжатия.
-   Если кусок данных совпадает с несколькими блоками `case`, ClickHouse использует перый совпавший блок условий.
+-   Если кусок данных совпадает с несколькими блоками `case`, ClickHouse использует первый совпавший блок условий.
 Если ни один `<case>` не подходит, то ClickHouse применит алгоритм сжатия `lz4`.
@ -554,13 +554,13 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 Ключи:
 -   `enabled` – Булевый флаг чтобы включить функциональность, по умолчанию `false`. Установите `true` чтобы разрешить отправку отчетов о сбоях.
-   `endpoint` – Вы можете переопределить URL на который будут отсылаться отчеты об ошибках и использовать собственную инсталяцию Sentry. Используйте URL синтаксис [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk).
+-   `endpoint` – Вы можете переопределить URL на который будут отсылаться отчеты об ошибках и использовать собственную инсталляцию Sentry. Используйте URL синтаксис [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk).
 -   `anonymize` - Запретить отсылку имени хоста сервера в отчете о сбое.
 -   `http_proxy` - Настройка HTTP proxy для отсылки отчетов о сбоях.
 -   `debug` - Настроить клиентскую библиотеку Sentry в debug режим.
 -   `tmp_path` - Путь в файловой системе для временного хранения состояния отчетов о сбоях перед отправкой на сервер Sentry.
-**Рекомендованые настройки**
+**Рекомендованные настройки**
 ``` xml
 <send_crash_reports>
@ -751,9 +751,13 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 ## max_thread_pool_size {#max-thread-pool-size}
-Максимальное количество потоков в глобальном пуле потоков.
+ClickHouse использует потоки из глобального пула потоков для обработки запросов. Если в пуле нет свободных потоков, то в нем создается еще один. Параметр `max_thread_pool_size` ограничивает максимальное количество потоков в пуле.
-Значение по умолчанию: 10000.
+Возможные значения: 
 -   Положительное целое число.
 Значение по умолчанию: `10000`.
 **Пример**
@ -761,6 +765,38 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 <max_thread_pool_size>12000</max_thread_pool_size>
 ```
 ## max_thread_pool_free_size {#max-thread-pool-free-size}
 Если в глобальном пуле потоков количество **свободных** потоков больше, чем задано параметром `max_thread_pool_free_size`, то ClickHouse освобождает ресурсы, занятые некоторыми потоками. В таком случае размер пула уменьшается. При необходимости потоки будут созданы заново.
 Возможные значения: 
 -   Положительное целое число.
 Значение по умолчанию: `1000`.
 **Пример**
 ``` xml
 <max_thread_pool_free_size>1200</max_thread_pool_free_size>
 ```
 ## thread_pool_queue_size {#thread-pool-queue-size}
 Максимальное количество задач, которые запланированы для выполнения в глобальном пуле потоков. При увеличении этого параметра возрастает использование памяти. Рекомендуется, чтобы значение этого параметра совпадало со значением параметра [max_thread_pool_size](#max-thread-pool-size).
 Возможные значения: 
 -   Положительное целое число.
 Значение по умолчанию: `10000`.
 **Пример**
 ``` xml
 <thread_pool_queue_size>12000</thread_pool_queue_size>
 ```
 ## merge_tree {#server_configuration_parameters-merge_tree}
 Тонкая настройка таблиц семейства [MergeTree](../../operations/server-configuration-parameters/settings.md).
@ -1011,7 +1047,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
-**Example**
+**Пример**
 ``` xml
 <query_views_log>
@ -1075,9 +1111,8 @@ Parameters:
 ## query_masking_rules {#query-masking-rules}
-Правила основанные на регурялных выражениях, которые будут применены для всех запросов а также  для всех сообщений перед сохранением их в лог на сервере,
+Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере,
-`system.query_log`, `system.text_log`, `system.processes` таблицы, а также в логах отсылаемых клиенту. Это позволяет предотвратить
+`system.query_log`, `system.text_log`, `system.processes` таблицы, а также в логах, отсылаемых клиенту. Это позволяет предотвратить утечку конфиденциальных данных из SQL запросов (такие как имена, электронные письма, личные идентификаторы или номера кредитных карт) в логи.
 утечку конфиденциальных данных из SQL запросов (такие как имена, электронные письма, личные идентификаторы или номера кредитных карт) в логи.
 **Пример**
@ -1096,7 +1131,7 @@ Parameters:
 - `regexp` - совместимое с RE2 регулярное выражение (обязательное)
 - `replace` - строка замены для конфиденциальных данных (опционально, по умолчанию - шесть звездочек)
-Правила маскировки применяются ко всему запросу (для предотвращения утечки конфиденциальных данных из неправильно оформленных / не интерпритируемых запросов).
+Правила маскировки применяются ко всему запросу (для предотвращения утечки конфиденциальных данных из неправильно оформленных / не интерпретируемых запросов).
 `system.events` таблица содержит счетчик `QueryMaskingRulesMatch` который считает общее кол-во совпадений правил маскировки.
@ -1418,7 +1453,7 @@ ClickHouse использует ZooKeeper для хранения метадан
 Также вы можете добавить секции `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol).
 Чтобы добавить LDAP-сервер в качестве удаленного каталога пользователей, которые не определены локально, определите один раздел `ldap` со следующими параметрами:
-   `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурациионного файла. Этот параметр явялется необязательным и может быть пустым.
+-   `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурационного файла. Этот параметр является необязательным и может быть пустым.
 -   `roles` — раздел со списком локально определенных ролей, которые будут назначены каждому пользователю, полученному с LDAP-сервера. Если роли не заданы, пользователь не сможет выполнять никаких действий после аутентификации. Если какая-либо из перечисленных ролей не определена локально во время проверки подлинности, попытка проверки подлинности завершится неудачей, как если бы предоставленный пароль был неверным.
 **Пример**
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -391,12 +391,14 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
 ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
-Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV.
+Включает или отключает парсинг значений перечислений как порядковых номеров. 
 Если режим включен, то во входящих данных в формате `TCV` значения перечисления (тип `ENUM`) всегда трактуются как порядковые номера, а не как элементы перечисления. Эту настройку рекомендуется включать для оптимизации парсинга, если данные типа `ENUM` содержат только порядковые номера, а не сами элементы перечисления.
 Возможные значения:
-   0 — парсинг значений перечисления как значений.
+-   0 — входящие значения типа `ENUM` сначала сопоставляются с элементами перечисления, а если совпадений не найдено, то трактуются как порядковые номера.
-   1 — парсинг значений перечисления как идентификаторов перечисления.
+-   1 — входящие значения типа `ENUM` сразу трактуются как порядковые номера.
 Значение по умолчанию: 0.
@ -410,10 +412,39 @@ CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first'
 При включенной настройке `input_format_tsv_enum_as_number`:
 Запрос:
 ```sql
 SET input_format_tsv_enum_as_number = 1;
 INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
-INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103	1;
+SELECT * FROM table_with_enum_column_for_tsv_insert;
 ```
 Результат:
 ```text
 ┌──Id─┬─Value──┐
 │ 102 │ second │
 └─────┴────────┘
 ```
 Запрос:
 ```sql
 SET input_format_tsv_enum_as_number = 1;
 INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103	'first';
 ```
 сгенерирует исключение.
 При отключенной настройке `input_format_tsv_enum_as_number`:
 Запрос:
 ```sql
 SET input_format_tsv_enum_as_number = 0;
 INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103	'first';
 SELECT * FROM table_with_enum_column_for_tsv_insert;
 ```
@ -428,15 +459,6 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
 └─────┴────────┘
 ```
 При отключенной настройке `input_format_tsv_enum_as_number` запрос `INSERT`:
 ```sql
 SET input_format_tsv_enum_as_number = 0;
 INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 ```
 сгенерирует исключение.
 ## input_format_null_as_default {#settings-input-format-null-as-default}
 Включает или отключает инициализацию [значениями по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) ячеек с [NULL](../../sql-reference/syntax.md#null-literal), если тип данных столбца не позволяет [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
@ -1511,12 +1533,13 @@ SELECT area/period FROM account_orders FORMAT JSON;
 ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
-Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата CSV.
+Включает или отключает парсинг значений перечислений как порядковых номеров. 
 Если режим включен, то во входящих данных в формате `CSV` значения перечисления (тип `ENUM`) всегда трактуются как порядковые номера, а не как элементы перечисления. Эту настройку рекомендуется включать для оптимизации парсинга, если данные типа `ENUM` содержат только порядковые номера, а не сами элементы перечисления.
 Возможные значения:
-   0 — парсинг значений перечисления как значений.
+-   0 — входящие значения типа `ENUM` сначала сопоставляются с элементами перечисления, а если совпадений не найдено, то трактуются как порядковые номера.
-   1 — парсинг значений перечисления как идентификаторов перечисления.
+-   1 — входящие значения типа `ENUM` сразу трактуются как порядковые номера.
 Значение по умолчанию: 0.
@ -1530,10 +1553,11 @@ CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first'
 При включенной настройке `input_format_csv_enum_as_number`:
 Запрос:
 ```sql
 SET input_format_csv_enum_as_number = 1;
 INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
 SELECT * FROM table_with_enum_column_for_csv_insert;
 ```
 Результат:
@ -1544,15 +1568,37 @@ SELECT * FROM table_with_enum_column_for_csv_insert;
 └─────┴────────┘
 ```
-При отключенной настройке `input_format_csv_enum_as_number` запрос `INSERT`:
+Запрос:
 ```sql
-SET input_format_csv_enum_as_number = 0;
+SET input_format_csv_enum_as_number = 1;
-INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
+INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first'
 ```
 сгенерирует исключение.
 При отключенной настройке `input_format_csv_enum_as_number`:
 Запрос:
 ```sql
 SET input_format_csv_enum_as_number = 0;
 INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2
 INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first'
 SELECT * FROM table_with_enum_column_for_csv_insert;
 ```
 Результат:
 ```text
 ┌──Id─┬─Value──┐
 │ 102 │ second │
 └─────┴────────┘
 ┌──Id─┬─Value─┐
 │ 103 │ first │
 └─────┴───────┘
 ```
 ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}
 Использовать в качестве разделителя строк для CSV формата CRLF (DOS/Windows стиль) вместо LF (Unix стиль).
--- a/docs/ru/sql-reference/statements/select/group-by.md
+++ b/docs/ru/sql-reference/statements/select/group-by.md
@ -203,6 +203,9 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
 -   В `Pretty*` форматах, строка выводится в виде отдельной таблицы после основного результата.
 -   В других форматах она не доступна.
 !!! note "Примечание"
    totals выводится только в результатах запросов `SELECT`, и не вывоводится в `INSERT INTO ... SELECT`.
 При использовании секции [HAVING](having.md) поведение `WITH TOTALS` контролируется настройкой `totals_mode`.
 ### Настройка обработки итогов {#configuring-totals-processing}
--- a/docs/ru/sql-reference/statements/select/order-by.md
+++ b/docs/ru/sql-reference/statements/select/order-by.md
@ -271,7 +271,7 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
 ## Модификатор ORDER BY expr WITH FILL  {#orderby-with-fill}
-Этот модификатор также может быть скобинирован с модификатором [LIMIT ... WITH TIES](../../../sql-reference/statements/select/limit.md#limit-with-ties)
+Этот модификатор также может быть скомбинирован с модификатором [LIMIT ... WITH TIES](../../../sql-reference/statements/select/limit.md#limit-with-ties)
 Модификатор `WITH FILL` может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`. 
 Все пропущенные значения для колонки `expr` будут заполнены значениями, соответствующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значениями по умолчанию.
--- a/docs/zh/operations/system-tables/merge_tree_settings.md
+++ b/docs/zh/operations/system-tables/merge_tree_settings.md
@ -1,16 +1,55 @@
---
+# system.merge_tree_settings {#system-merge_tree_settings}
 machine_translated: true
 machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
 ---
-# 系统。merge_tree_settings {#system-merge_tree_settings}
+包含 `MergeTree` 表的设置 (Setting) 信息。
 包含有关以下设置的信息 `MergeTree` 桌子
 列:
-   `name` (String) — Setting name.
+-   `name` (String) — 设置名称。
-   `value` (String) — Setting value.
+-   `value` (String) — 设置的值。
-   `description` (String) — Setting description.
+-   `description` (String) — 设置描述。
-   `type` (String) — Setting type (implementation specific string value).
+-   `type` (String) — 设置类型 (执行特定的字符串值)。
-   `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed.
+-   `changed` (UInt8) — 该设置是否在配置中明确定义或是明确改变。
 **示例**
 ```sql
 :) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```
 ```text
 Row 1:
 ──────
 name:        index_granularity
 value:       8192
 changed:     0
 description: How many rows correspond to one primary key value.
 type:        SettingUInt64
 Row 2:
 ──────
 name:        min_bytes_for_wide_part
 value:       0
 changed:     0
 description: Minimal uncompressed size in bytes to create part in wide format instead of compact
 type:        SettingUInt64
 Row 3:
 ──────
 name:        min_rows_for_wide_part
 value:       0
 changed:     0
 description: Minimal number of rows to create part in wide format instead of compact
 type:        SettingUInt64
 Row 4:
 ──────
 name:        merge_max_block_size
 value:       8192
 changed:     0
 description: How many rows in blocks should be formed for merge operations.
 type:        SettingUInt64
 4 rows in set. Elapsed: 0.001 sec.
 ```
 [原文](https://clickhouse.com/docs/zh/operations/system-tables/merge_tree_settings) <!--hide-->
--- a/docs/zh/operations/system-tables/tables.md
+++ b/docs/zh/operations/system-tables/tables.md
@ -1,58 +1,128 @@
---
+# system.tables {#system-tables}
 machine_translated: true
 machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
 ---
-# 系统。表 {#system-tables}
+包含服务器知道的每个表的元数据。 [分离的](../../sql-reference/statements/detach.md)表不在 `system.tables` 显示。
-包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`。
+[临时表](../../sql-reference/statements/create/table.md#temporary-tables)只在创建它们的会话中的 `system.tables` 中才可见。它们的数据库字段显示为空，并且 `is_temporary` 标志显示为开启。
-此表包含以下列（列类型显示在括号中):
+此表包含以下列 (列类型显示在括号中):
-   `database` (String) — 表所在的数据库表名。
+-   `database` ([String](../../sql-reference/data-types/string.md)) — 表所在的数据库名。
-   `name` (String) — 表名。
+-   `name` ([String](../../sql-reference/data-types/string.md)) — 表名。
-   `engine` (String) — 表引擎名 (不包含参数)。
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — 表引擎名 (不包含参数)。
-   `is_temporary` (UInt8)-指示表是否是临时的标志。
+-   `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - 指示表是否是临时的标志。
-   `data_path` (String)-文件系统中表数据的路径。
+-   `data_path` ([String](../../sql-reference/data-types/string.md)) - 表数据在文件系统中的路径。
-   `metadata_path` (String)-文件系统中表元数据的路径。
+-   `metadata_path` ([String](../../sql-reference/data-types/string.md)) - 表元数据在文件系统中的路径。
-   `metadata_modification_time` (DateTime)-表元数据的最新修改时间。
+-   `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - 表元数据的最新修改时间。
-   `dependencies_database` (数组(字符串))-数据库依赖关系。
+-   `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - 数据库依赖关系。
-   `dependencies_table` （数组（字符串））-表依赖关系 ([MaterializedView](../../engines/table-engines/special/materializedview.md) 基于当前表的表）。
+-   `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - 表依赖关系 (基于当前表的 [物化视图](../../engines/table-engines/special/materializedview.md) 表) 。
-   `create_table_query` (String)-用于创建表的SQL语句。
+-   `create_table_query` ([String](../../sql-reference/data-types/string.md)) - 用于创建表的 SQL 语句。
-   `engine_full` (String)-表引擎的参数。
+-   `engine_full` ([String](../../sql-reference/data-types/string.md)) - 表引擎的参数。
-   `partition_key` (String)-表中指定的分区键表达式。
+-   `as_select` ([String](../../sql-reference/data-types/string.md)) - 视图的 `SELECT` 语句。
-   `sorting_key` (String)-表中指定的排序键表达式。
+-   `partition_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的分区键表达式。
-   `primary_key` (String)-表中指定的主键表达式。
+-   `sorting_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的排序键表达式。
-   `sampling_key` (String)-表中指定的采样键表达式。
+-   `primary_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的主键表达式。
-   `storage_policy` (字符串)-存储策略:
+-   `sampling_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的采样键表达式。
 -   `storage_policy` ([String](../../sql-reference/data-types/string.md)) - 存储策略:
    -   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
-    -   [分布](../../engines/table-engines/special/distributed.md#distributed)
+    -   [Distributed](../../engines/table-engines/special/distributed.md#distributed)
-   `total_rows` (Nullable(UInt64))-总行数，如果可以快速确定表中的确切行数，否则行数为`Null`（包括底层 `Buffer` 表）。
+-   `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 总行数，如果无法快速确定表中的确切行数，则行数返回为 `NULL` (包括底层 `Buffer` 表) 。
-   `total_bytes` (Nullable(UInt64))-总字节数，如果可以快速确定存储表的确切字节数，否则字节数为`Null` (即**不** 包括任何底层存储）。
+-   `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 总字节数，如果无法快速确定存储表的确切字节数，则字节数返回为 `NULL` ( **不** 包括任何底层存储) 。
-    -   如果表将数据存在磁盘上，返回实际使用的磁盘空间（压缩后）。
+    -   如果表将数据存在磁盘上，返回实际使用的磁盘空间 (压缩后) 。
    -   如果表在内存中存储数据，返回在内存中使用的近似字节数。
-   `lifetime_rows` (Nullbale(UInt64))-服务启动后插入的总行数(只针对`Buffer`表）。
+-   `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 服务启动后插入的总行数(只针对 `Buffer` 表) 。
 -   `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 服务启动后插入的总字节数(只针对 `Buffer` 表) 。
 -   `comment` ([String](../../sql-reference/data-types/string.md)) - 表的注释。
 -   `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 标志，表示表本身是否在磁盘上存储数据，或者访问其他来源。 
 `system.tables` 表被用于 `SHOW TABLES` 的查询实现中。
 **示例**
 ```sql
 SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
 ```
 ```text
 Row 1:
 ──────
 database:                   base
 name:                       t1
 uuid:                       81b1c20a-b7c6-4116-a2ce-7583fb6b6736
 engine:                     MergeTree
 is_temporary:               0
 data_paths:                 ['/var/lib/clickhouse/store/81b/81b1c20a-b7c6-4116-a2ce-7583fb6b6736/']
 metadata_path:              /var/lib/clickhouse/store/461/461cf698-fd0b-406d-8c01-5d8fd5748a91/t1.sql
 metadata_modification_time: 2021-01-25 19:14:32
 dependencies_database:      []
 dependencies_table:         []
 create_table_query:         CREATE TABLE base.t1 (`n` UInt64) ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192
 engine_full:                MergeTree ORDER BY n SETTINGS index_granularity = 8192
 as_select:                  SELECT database AS table_catalog
 partition_key:
 sorting_key:                n
 primary_key:                n
 sampling_key:
 storage_policy:             default
 total_rows:                 1
 total_bytes:                99
 lifetime_rows:              ᴺᵁᴸᴸ
 lifetime_bytes:             ᴺᵁᴸᴸ
 comment:
 has_own_data:               0
 Row 2:
 ──────
 database:                   default
 name:                       53r93yleapyears
 uuid:                       00000000-0000-0000-0000-000000000000
 engine:                     MergeTree
 is_temporary:               0
 data_paths:                 ['/var/lib/clickhouse/data/default/53r93yleapyears/']
 metadata_path:              /var/lib/clickhouse/metadata/default/53r93yleapyears.sql
 metadata_modification_time: 2020-09-23 09:05:36
 dependencies_database:      []
 dependencies_table:         []
 create_table_query:         CREATE TABLE default.`53r93yleapyears` (`id` Int8, `febdays` Int8) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192
 engine_full:                MergeTree ORDER BY id SETTINGS index_granularity = 8192
 as_select:                  SELECT name AS catalog_name
 partition_key:
 sorting_key:                id
 primary_key:                id
 sampling_key:
 storage_policy:             default
 total_rows:                 2
 total_bytes:                155
 lifetime_rows:              ᴺᵁᴸᴸ
 lifetime_bytes:             ᴺᵁᴸᴸ
 comment:
 has_own_data:               0
 ```
 [原文](https://clickhouse.com/docs/zh/operations/system-tables/tables) <!--hide-->
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -727,7 +727,6 @@ void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & o
 void LocalServer::addOptions(OptionsDescription & options_description)
 {
    options_description.main_description->add_options()
        ("database,d", po::value<std::string>(), "database")
        ("table,N", po::value<std::string>(), "name of the initial table")
        /// If structure argument is omitted then initial query is not generated
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -152,6 +152,7 @@
         This setting could be used to switch replication to another network interface
         (the server may be connected to multiple networks via multiple addresses)
      -->
    <!--
    <interserver_http_host>example.yandex.ru</interserver_http_host>
    -->
@ -177,6 +178,7 @@
      -->
    <!-- <listen_host>::</listen_host> -->
    <!-- Same for hosts without support for IPv6: -->
    <!-- <listen_host>0.0.0.0</listen_host> -->
@ -293,6 +295,10 @@
    <max_thread_pool_size>10000</max_thread_pool_size>
    <!-- Number of workers to recycle connections in background (see also drain_timeout).
         If the pool is full, connection will be drained synchronously. -->
    <!-- <max_threads_for_connection_collector>10</max_threads_for_connection_collector> -->
    <!-- On memory constrained environments you may have to set this to value larger than 1.
      -->
    <max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
--- a/2
+++ b/2
@ -87,7 +87,7 @@ if [ -z "$NO_BUILD" ] ; then
    # Build (only binary packages).
    debuild --preserve-env -e PATH \
    -e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
-    -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS}
+    -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS} ${DEB_ARCH_FLAG}
 fi
 if [ -n "$MAKE_RPM" ]; then
--- a/src/Access/Authentication.cpp
+++ b/src/Access/Authentication.cpp
@ -54,7 +54,7 @@ namespace
        const Poco::SHA1Engine::Digest & digest = engine.digest();
        Poco::SHA1Engine::Digest calculated_password_sha1(sha1_size);
-        for (size_t i = 0; i < sha1_size; i++)
+        for (size_t i = 0; i < sha1_size; ++i)
            calculated_password_sha1[i] = scrambled_password[i] ^ digest[i];
        auto calculated_password_double_sha1 = Util::encodeSHA1(calculated_password_sha1);
--- a/src/Access/LDAPClient.cpp
+++ b/src/Access/LDAPClient.cpp
@ -448,7 +448,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
                                vals = nullptr;
                            });
-                            for (std::size_t i = 0; vals[i]; i++)
+                            for (size_t i = 0; vals[i]; ++i)
                            {
                                if (vals[i]->bv_val && vals[i]->bv_len > 0)
                                    result.emplace(vals[i]->bv_val, vals[i]->bv_len);
@ -473,7 +473,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
                        referrals = nullptr;
                    });
-                    for (std::size_t i = 0; referrals[i]; i++)
+                    for (size_t i = 0; referrals[i]; ++i)
                    {
                        LOG_WARNING(&Poco::Logger::get("LDAPClient"), "Received reference during LDAP search but not following it: {}", referrals[i]);
                    }
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@ -15,6 +15,7 @@ namespace ErrorCodes
    extern const int READONLY;
    extern const int QUERY_IS_PROHIBITED;
    extern const int SETTING_CONSTRAINT_VIOLATION;
    extern const int UNKNOWN_SETTING;
 }
@ -200,7 +201,23 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
    };
    if (reaction == THROW_ON_VIOLATION)
    {
        try
        {
            access_control->checkSettingNameIsAllowed(setting_name);
        }
        catch (Exception & e)
        {
            if (e.code() == ErrorCodes::UNKNOWN_SETTING)
            {
                if (const auto hints = current_settings.getHints(change.name); !hints.empty())
                {
                      e.addMessage(fmt::format("Maybe you meant {}", toString(hints)));
                }
            }
            throw;
        }
    }
    else if (!access_control->isSettingNameAllowed(setting_name))
        return false;
--- a/src/AggregateFunctions/AggregateFunctionForEach.h
+++ b/src/AggregateFunctions/AggregateFunctionForEach.h
@ -90,7 +90,7 @@ private:
                throw;
            }
-            for (i = 0; i < old_size; i++)
+            for (i = 0; i < old_size; ++i)
            {
                nested_func->merge(&new_state[i * nested_size_of_data],
                        &old_state[i * nested_size_of_data],
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
@ -54,6 +54,8 @@ public:
 template <typename T, typename Data, typename Policy>
 class AggregateFunctionBitmapL2 final : public IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>
 {
 private:
    static constexpr auto STATE_VERSION_1_MIN_REVISION = 54455;
 public:
    AggregateFunctionBitmapL2(const DataTypePtr & type)
        : IAggregateFunctionDataHelper<Data, AggregateFunctionBitmapL2<T, Data, Policy>>({type}, {})
@ -105,9 +107,38 @@ public:
        }
    }
-    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override { this->data(place).rbs.write(buf); }
+    bool isVersioned() const override { return true; }
-    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override { this->data(place).rbs.read(buf); }
+    size_t getDefaultVersion() const override { return 1; }
    size_t getVersionFromRevision(size_t revision) const override
    {
        if (revision >= STATE_VERSION_1_MIN_REVISION)
            return 1;
        else
            return 0;
    }
    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
    {
        if (!version)
            version = getDefaultVersion();
        if (*version >= 1)
            DB::writeBoolText(this->data(place).init, buf);
        this->data(place).rbs.write(buf);
    }
    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena *) const override
    {
        if (!version)
            version = getDefaultVersion();
        if (*version >= 1)
            DB::readBoolText(this->data(place).init, buf);
        this->data(place).rbs.read(buf);
    }
    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
    {
--- a/src/AggregateFunctions/AggregateFunctionHistogram.h
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.h
@ -271,7 +271,7 @@ public:
    {
        lower_bound = std::min(lower_bound, other.lower_bound);
        upper_bound = std::max(upper_bound, other.upper_bound);
-        for (size_t i = 0; i < other.size; i++)
+        for (size_t i = 0; i < other.size; ++i)
            add(other.points[i].mean, other.points[i].weight, max_bins);
    }
--- a/src/AggregateFunctions/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIf.cpp
@ -56,7 +56,7 @@ static bool ALWAYS_INLINE inline is_all_zeros(const UInt8 * flags, size_t size)
        i += 8;
    }
-    for (; i < size; i++)
+    for (; i < size; ++i)
        if (flags[i])
            return false;
--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@ -7,18 +7,20 @@
 #include <DataTypes/DataTypeDateTime.h>
-#define TOP_K_MAX_SIZE 0xFFFFFF
+static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
 namespace DB
 {
 struct Settings;
 namespace ErrorCodes
 {
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
    extern const int ARGUMENT_OUT_OF_BOUND;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int LOGICAL_ERROR;
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
@ -42,19 +44,22 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
 template <bool is_weighted>
-static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold, UInt64 load_factor, const Array & params)
+static IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
 {
-    WhichDataType which(argument_type);
+    if (argument_types.empty())
        throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list");
    WhichDataType which(argument_types[0]);
    if (which.idx == TypeIndex::Date)
-        return new AggregateFunctionTopKDate<is_weighted>(threshold, load_factor, {argument_type}, params);
+        return new AggregateFunctionTopKDate<is_weighted>(threshold, load_factor, argument_types, params);
    if (which.idx == TypeIndex::DateTime)
-        return new AggregateFunctionTopKDateTime<is_weighted>(threshold, load_factor, {argument_type}, params);
+        return new AggregateFunctionTopKDateTime<is_weighted>(threshold, load_factor, argument_types, params);
    /// Check that we can use plain version of AggregateFunctionTopKGeneric
-    if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
+    if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
-        return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, load_factor, argument_type, params);
+        return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, load_factor, argument_types, params);
    else
-        return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, load_factor, argument_type, params);
+        return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, load_factor, argument_types, params);
 }
@ -78,40 +83,37 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
    if (!params.empty())
    {
        if (params.size() > 2)
-            throw Exception("Aggregate function " + name + " requires two parameters or less.",
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                            "Aggregate function '{}' requires two parameters or less", name);
        UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
        if (params.size() == 2)
        {
            load_factor = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
            if (load_factor < 1)
-                throw Exception("Too small parameter 'load_factor' for aggregate function " + name + ". Minimum: 1",
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
-                    ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+                                "Too small parameter 'load_factor' for aggregate function '{}' (got {}, minimum is 1)", name, load_factor);
        }
-        if (k > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || k * load_factor > TOP_K_MAX_SIZE)
+        threshold = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
            throw Exception("Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE),
                ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-        if (k == 0)
+        if (threshold > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || threshold * load_factor > TOP_K_MAX_SIZE)
-            throw Exception("Parameter 0 is illegal for aggregate function " + name,
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
-                ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+                            "Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE));
-        threshold = k;
+        if (threshold == 0)
            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name);
    }
    AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK, is_weighted>(
        *argument_types[0], threshold, load_factor, argument_types, params));
    if (!res)
-        res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types[0], threshold, load_factor, params));
+        res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types, threshold, load_factor, params));
    if (!res)
-        throw Exception("Illegal type " + argument_types[0]->getName() +
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-            " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                        "Illegal type {} of argument for aggregate function '{}'", argument_types[0]->getName(), name);
    return res;
 }
--- a/src/AggregateFunctions/AggregateFunctionTopK.h
+++ b/src/AggregateFunctions/AggregateFunctionTopK.h
@ -132,8 +132,8 @@ private:
 public:
    AggregateFunctionTopKGeneric(
-        UInt64 threshold_, UInt64 load_factor, const DataTypePtr & input_data_type_, const Array & params)
+        UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>({input_data_type_}, params)
+        : IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>(argument_types_, params)
        , threshold(threshold_), reserved(load_factor * threshold), input_data_type(this->argument_types[0]) {}
    String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -2,6 +2,7 @@
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnSparse.h>
 #include <Core/Block.h>
 #include <Core/ColumnNumbers.h>
 #include <Core/Field.h>
@ -181,6 +182,13 @@ public:
        Arena * arena,
        ssize_t if_argument_pos = -1) const = 0;
    /// The version of "addBatch", that handle sparse columns as arguments.
    virtual void addBatchSparse(
        AggregateDataPtr * places,
        size_t place_offset,
        const IColumn ** columns,
        Arena * arena) const = 0;
    virtual void mergeBatch(
        size_t batch_size,
        AggregateDataPtr * places,
@ -193,6 +201,10 @@ public:
    virtual void addBatchSinglePlace(
        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const = 0;
    /// The version of "addBatchSinglePlace", that handle sparse columns as arguments.
    virtual void addBatchSparseSinglePlace(
        AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
    /** The same for single place when need to aggregate only filtered data.
      * Instead of using an if-column, the condition is combined inside the null_map
      */
@ -367,6 +379,22 @@ public:
        }
    }
    void addBatchSparse(
        AggregateDataPtr * places,
        size_t place_offset,
        const IColumn ** columns,
        Arena * arena) const override
    {
        const auto & column_sparse = assert_cast<const ColumnSparse &>(*columns[0]);
        const auto * values = &column_sparse.getValuesColumn();
        size_t batch_size = column_sparse.size();
        auto offset_it = column_sparse.begin();
        for (size_t i = 0; i < batch_size; ++i, ++offset_it)
            static_cast<const Derived *>(this)->add(places[offset_it.getCurrentRow()] + place_offset,
                                                    &values, offset_it.getValueIndex(), arena);
    }
    void mergeBatch(
        size_t batch_size,
        AggregateDataPtr * places,
@ -398,6 +426,19 @@ public:
        }
    }
    void addBatchSparseSinglePlace(
        AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
    {
        /// TODO: add values and defaults separately if order of adding isn't important.
        const auto & column_sparse = assert_cast<const ColumnSparse &>(*columns[0]);
        const auto * values = &column_sparse.getValuesColumn();
        size_t batch_size = column_sparse.size();
        auto offset_it = column_sparse.begin();
        for (size_t i = 0; i < batch_size; ++i, ++offset_it)
            static_cast<const Derived *>(this)->add(place, &values, offset_it.getValueIndex(), arena);
    }
    void addBatchSinglePlaceNotNull(
        size_t batch_size,
        AggregateDataPtr place,
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -106,6 +106,10 @@ if (USE_AWS_S3)
    add_headers_and_sources(dbms Disks/S3)
 endif()
 if (USE_AZURE_BLOB_STORAGE)
    add_headers_and_sources(dbms Disks/AzureBlobStorage)
 endif()
 if (USE_HDFS)
    add_headers_and_sources(dbms Storages/HDFS)
    add_headers_and_sources(dbms Disks/HDFS)
@ -450,6 +454,11 @@ if (USE_AWS_S3)
    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR})
 endif()
 if (USE_AZURE_BLOB_STORAGE)
    target_link_libraries (clickhouse_common_io PUBLIC ${AZURE_BLOB_STORAGE_LIBRARY})
    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AZURE_SDK_INCLUDES})
 endif()
 if (USE_S2_GEOMETRY)
    dbms_target_link_libraries (PUBLIC ${S2_GEOMETRY_LIBRARY})
    dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${S2_GEOMETRY_INCLUDE_DIR})
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -4,6 +4,8 @@
 #include <iomanip>
 #include <string_view>
 #include <filesystem>
 #include <map>
 #include <unordered_map>
 #include <base/argsToConfig.h>
 #include <base/DateLUT.h>
@ -52,6 +54,7 @@
 #include <Processors/Executors/PullingAsyncPipelineExecutor.h>
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Interpreters/ProfileEventsExt.h>
 #include <IO/WriteBufferFromOStream.h>
 #include <IO/CompressionMethod.h>
 #include <Client/InternalTextLogs.h>
@ -105,6 +108,99 @@ namespace ProfileEvents
 namespace DB
 {
 static void incrementProfileEventsBlock(Block & dst, const Block & src)
 {
    if (!dst)
    {
        dst = src;
        return;
    }
    assertBlocksHaveEqualStructure(src, dst, "ProfileEvents");
    std::unordered_map<String, size_t> name_pos;
    for (size_t i = 0; i < dst.columns(); ++i)
        name_pos[dst.getByPosition(i).name] = i;
    size_t dst_rows = dst.rows();
    MutableColumns mutable_columns = dst.mutateColumns();
    auto & dst_column_host_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["host_name"]]);
    auto & dst_array_current_time = typeid_cast<ColumnUInt32 &>(*mutable_columns[name_pos["current_time"]]).getData();
    auto & dst_array_thread_id = typeid_cast<ColumnUInt64 &>(*mutable_columns[name_pos["thread_id"]]).getData();
    auto & dst_array_type = typeid_cast<ColumnInt8 &>(*mutable_columns[name_pos["type"]]).getData();
    auto & dst_column_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["name"]]);
    auto & dst_array_value = typeid_cast<ColumnInt64 &>(*mutable_columns[name_pos["value"]]).getData();
    const auto & src_column_host_name = typeid_cast<const ColumnString &>(*src.getByName("host_name").column);
    const auto & src_array_current_time = typeid_cast<const ColumnUInt32 &>(*src.getByName("current_time").column).getData();
    const auto & src_array_thread_id = typeid_cast<const ColumnUInt64 &>(*src.getByName("thread_id").column).getData();
    const auto & src_column_name = typeid_cast<const ColumnString &>(*src.getByName("name").column);
    const auto & src_array_value = typeid_cast<const ColumnInt64 &>(*src.getByName("value").column).getData();
    struct Id
    {
        StringRef name;
        StringRef host_name;
        UInt64 thread_id;
        bool operator<(const Id & rhs) const
        {
            return std::tie(name, host_name, thread_id)
                 < std::tie(rhs.name, rhs.host_name, rhs.thread_id);
        }
    };
    std::map<Id, UInt64> rows_by_name;
    for (size_t src_row = 0; src_row < src.rows(); ++src_row)
    {
        Id id{
            src_column_name.getDataAt(src_row),
            src_column_host_name.getDataAt(src_row),
            src_array_thread_id[src_row],
        };
        rows_by_name[id] = src_row;
    }
    /// Merge src into dst.
    for (size_t dst_row = 0; dst_row < dst_rows; ++dst_row)
    {
        Id id{
            dst_column_name.getDataAt(dst_row),
            dst_column_host_name.getDataAt(dst_row),
            dst_array_thread_id[dst_row],
        };
        if (auto it = rows_by_name.find(id); it != rows_by_name.end())
        {
            size_t src_row = it->second;
            dst_array_current_time[dst_row] = src_array_current_time[src_row];
            switch (dst_array_type[dst_row])
            {
                case ProfileEvents::Type::INCREMENT:
                    dst_array_value[dst_row] += src_array_value[src_row];
                    break;
                case ProfileEvents::Type::GAUGE:
                    dst_array_value[dst_row] = src_array_value[src_row];
                    break;
            }
            rows_by_name.erase(it);
        }
    }
    /// Copy rows from src that dst does not contains.
    for (const auto & [id, pos] : rows_by_name)
    {
        for (size_t col = 0; col < src.columns(); ++col)
        {
            mutable_columns[col]->insert((*src.getByPosition(col).column)[pos]);
        }
    }
    dst.setColumns(std::move(mutable_columns));
 }
 std::atomic_flag exit_on_signal = ATOMIC_FLAG_INIT;
@ -753,7 +849,7 @@ void ClientBase::onProfileEvents(Block & block)
        }
        else
        {
-            profile_events.last_block = block;
+            incrementProfileEventsBlock(profile_events.last_block, block);
        }
    }
    profile_events.watch.restart();
@ -1635,7 +1731,13 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description,
    /// Check unrecognized options without positional options.
    auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
    if (!unrecognized_options.empty())
    {
        auto hints = this->getHints(unrecognized_options[0]);
        if (!hints.empty())
            throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", unrecognized_options[0], toString(hints));
        throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
    }
    /// Check positional options (options after ' -- ', ex: clickhouse-client -- <options>).
    unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
@ -1713,6 +1815,25 @@ void ClientBase::init(int argc, char ** argv)
    ;
    addOptions(options_description);
    auto getter = [](const auto & op)
    {
        String op_long_name = op->long_name();
        return "--" + String(op_long_name);
    };
    if (options_description.main_description)
    {
        const auto & main_options = options_description.main_description->options();
        std::transform(main_options.begin(), main_options.end(), std::back_inserter(cmd_options), getter);
    }
    if (options_description.external_description)
    {
        const auto & external_options = options_description.external_description->options();
        std::transform(external_options.begin(), external_options.end(), std::back_inserter(cmd_options), getter);
    }
    parseAndCheckOptions(options_description, options, common_arguments);
    po::notify(options);
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -1,5 +1,6 @@
 #pragma once
 #include "Common/NamePrompter.h"
 #include <Common/ProgressIndication.h>
 #include <Common/InterruptListener.h>
 #include <Common/ShellCommand.h>
@ -37,7 +38,7 @@ void interruptSignalHandler(int signum);
 class InternalTextLogs;
-class ClientBase : public Poco::Util::Application
+class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase>
 {
 public:
@ -48,6 +49,8 @@ public:
    void init(int argc, char ** argv);
    std::vector<String> getAllRegisteredNames() const override { return cmd_options; }
 protected:
    void runInteractive();
    void runNonInteractive();
@ -145,6 +148,7 @@ protected:
    std::vector<String> queries_files; /// If not empty, queries will be read from these files
    std::vector<String> interleave_queries_files; /// If not empty, run queries from these files before processing every file from 'queries_files'.
    std::vector<String> cmd_options;
    bool stdin_is_a_tty = false; /// stdin is a terminal.
    bool stdout_is_a_tty = false; /// stdout is a terminal.
--- a/src/Client/IConnections.cpp
+++ b/src/Client/IConnections.cpp
@ -25,7 +25,12 @@ struct PocoSocketWrapper : public Poco::Net::SocketImpl
 void IConnections::DrainCallback::operator()(int fd, Poco::Timespan, const std::string fd_description) const
 {
    if (!PocoSocketWrapper(fd).poll(drain_timeout, Poco::Net::Socket::SELECT_READ))
-        throw Exception(ErrorCodes::SOCKET_TIMEOUT, "Read timeout while draining from {}", fd_description);
+    {
        throw Exception(ErrorCodes::SOCKET_TIMEOUT,
            "Read timeout ({} ms) while draining from {}",
            drain_timeout.totalMilliseconds(),
            fd_description);
    }
 }
 }
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@ -395,17 +395,17 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
                read_list.push_back(*connection->socket);
        }
        auto timeout = is_draining ? drain_timeout : receive_timeout;
        int n = Poco::Net::Socket::select(
            read_list,
            write_list,
            except_list,
-            is_draining ? drain_timeout : receive_timeout);
+            timeout);
        /// We treat any error as timeout for simplicity.
        /// And we also check if read_list is still empty just in case.
        if (n <= 0 || read_list.empty())
        {
            auto err_msg = fmt::format("Timeout exceeded while reading from {}", dumpAddressesUnlocked());
            for (ReplicaState & state : replica_states)
            {
                Connection * connection = state.connection;
@ -415,7 +415,10 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
                    invalidateReplica(state);
                }
            }
-            throw Exception(err_msg, ErrorCodes::TIMEOUT_EXCEEDED);
+            throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
                "Timeout ({} ms) exceeded while reading from {}",
                timeout.totalMilliseconds(),
                dumpAddressesUnlocked());
        }
    }
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@ -133,6 +133,11 @@ public:
    void get(size_t n, Field & res) const override;
    bool isDefaultAt(size_t) const override
    {
        throw Exception("Method isDefaultAt is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
    }
    StringRef getDataAt(size_t n) const override;
    void insertData(const char * pos, size_t length) override;
@ -208,6 +213,16 @@ public:
        throw Exception("Method hasEqualValues is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
    }
    double getRatioOfDefaultRows(double) const override
    {
        throw Exception("Method getRatioOfDefaultRows is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
    }
    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
    {
        throw Exception("Method getIndicesOfNonDefaultRows is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
    }
    void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
    void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@ -182,6 +182,13 @@ StringRef ColumnArray::getDataAt(size_t n) const
 }
 bool ColumnArray::isDefaultAt(size_t n) const
 {
    const auto & offsets_data = getOffsets();
    return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1];
 }
 void ColumnArray::insertData(const char * pos, size_t length)
 {
    /** Similarly - only for arrays of fixed length values.
@ -576,7 +583,8 @@ void ColumnArray::expand(const IColumn::Filter & mask, bool inverted)
    }
    if (from != -1)
-        throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);}
+        throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
 }
 template <typename T>
 ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const
@ -868,6 +876,16 @@ ColumnPtr ColumnArray::compress() const
        });
 }
 double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const
 {
    return getRatioOfDefaultRowsImpl<ColumnArray>(sample_ratio);
 }
 void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
    return getIndicesOfNonDefaultRowsImpl<ColumnArray>(indices, from, limit);
 }
 ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
 {
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@ -60,6 +60,7 @@ public:
    Field operator[](size_t n) const override;
    void get(size_t n, Field & res) const override;
    StringRef getDataAt(size_t n) const override;
    bool isDefaultAt(size_t n) const override;
    void insertData(const char * pos, size_t length) override;
    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
    const char * deserializeAndInsertFromArena(const char * pos) override;
@ -143,6 +144,10 @@ public:
        return false;
    }
    double getRatioOfDefaultRows(double sample_ratio) const override;
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
    bool isCollationSupported() const override { return getData().isCollationSupported(); }
 private:
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@ -82,6 +82,7 @@ public:
    Field operator[](size_t) const override { throwMustBeDecompressed(); }
    void get(size_t, Field &) const override { throwMustBeDecompressed(); }
    StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); }
    bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); }
    void insert(const Field &) override { throwMustBeDecompressed(); }
    void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
    void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
@ -113,6 +114,8 @@ public:
    void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); }
    void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
    size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
    double getRatioOfDefaultRows(double) const override { throwMustBeDecompressed(); }
    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); }
 protected:
    size_t rows;
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@ -5,6 +5,7 @@
 #include <Columns/IColumn.h>
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/PODArray.h>
 namespace DB
@ -115,6 +116,11 @@ public:
        return data->getFloat32(0);
    }
    bool isDefaultAt(size_t) const override
    {
        return data->isDefaultAt(0);
    }
    bool isNullAt(size_t) const override
    {
        return data->isNullAt(0);
@ -239,6 +245,27 @@ public:
        return false;
    }
    double getRatioOfDefaultRows(double) const override
    {
        return data->isDefaultAt(0) ? 1.0 : 0.0;
    }
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        if (!data->isDefaultAt(0))
        {
            size_t to = limit && from + limit < size() ? from + limit : size();
            indices.reserve(indices.size() + to - from);
            for (size_t i = from; i < to; ++i)
                indices.push_back(i);
        }
    }
    SerializationInfoPtr getSerializationInfo() const override
    {
        return data->getSerializationInfo();
    }
    bool isNullable() const override { return isColumnNullable(*data); }
    bool onlyNull() const override { return data->isNullAt(0); }
    bool isNumeric() const override { return data->isNumeric(); }
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@ -331,7 +331,8 @@ void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
 template <is_decimal T>
 ColumnPtr ColumnDecimal<T>::compress() const
 {
-    size_t source_size = data.size() * sizeof(T);
+    const size_t data_size = data.size();
    const size_t source_size = data_size * sizeof(T);
    /// Don't compress small blocks.
    if (source_size < 4096) /// A wild guess.
@ -342,8 +343,9 @@ ColumnPtr ColumnDecimal<T>::compress() const
    if (!compressed)
        return ColumnCompressed::wrap(this->getPtr());
-    return ColumnCompressed::create(data.size(), compressed->size(),
+    const size_t compressed_size = compressed->size();
-        [compressed = std::move(compressed), column_size = data.size(), scale = this->scale]
+    return ColumnCompressed::create(data_size, compressed_size,
        [compressed = std::move(compressed), column_size = data_size, scale = this->scale]
        {
            auto res = ColumnDecimal<T>::create(column_size, scale);
            ColumnCompressed::decompressBuffer(
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@ -177,8 +177,17 @@ public:
        return false;
    }
-    ColumnPtr compress() const override;
+    double getRatioOfDefaultRows(double sample_ratio) const override
    {
        return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
    }
    void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
    {
        return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
    }
    ColumnPtr compress() const override;
    void insertValue(const T value) { data.push_back(value); }
    Container & getData() { return data; }
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@ -51,6 +51,12 @@ MutableColumnPtr ColumnFixedString::cloneResized(size_t size) const
    return new_col_holder;
 }
 bool ColumnFixedString::isDefaultAt(size_t index) const
 {
    assert(index < size());
    return memoryIsZero(chars.data() + index * n, n);
 }
 void ColumnFixedString::insert(const Field & x)
 {
    const String & s = DB::get<const String &>(x);
@ -409,9 +415,9 @@ ColumnPtr ColumnFixedString::compress() const
    if (!compressed)
        return ColumnCompressed::wrap(this->getPtr());
-    size_t column_size = size();
+    const size_t column_size = size();
-
+    const size_t compressed_size = compressed->size();
-    return ColumnCompressed::create(column_size, compressed->size(),
+    return ColumnCompressed::create(column_size, compressed_size,
        [compressed = std::move(compressed), column_size, n = n]
        {
            size_t chars_size = n * column_size;
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@ -88,6 +88,8 @@ public:
        return StringRef(&chars[n * index], n);
    }
    bool isDefaultAt(size_t index) const override;
    void insert(const Field & x) override;
    void insertFrom(const IColumn & src_, size_t index) override;
@ -182,6 +184,16 @@ public:
        return false;
    }
    double getRatioOfDefaultRows(double sample_ratio) const override
    {
        return getRatioOfDefaultRowsImpl<ColumnFixedString>(sample_ratio);
    }
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        return getIndicesOfNonDefaultRowsImpl<ColumnFixedString>(indices, from, limit);
    }
    bool canBeInsideNullable() const override { return true; }
    bool isFixedAndContiguous() const override { return true; }
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@ -24,7 +24,12 @@ class ColumnFunction final : public COWHelper<IColumn, ColumnFunction>
 private:
    friend class COWHelper<IColumn, ColumnFunction>;
-    ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_ = false, bool is_function_compiled_ = false);
+    ColumnFunction(
        size_t size,
        FunctionBasePtr function_,
        const ColumnsWithTypeAndName & columns_to_capture,
        bool is_short_circuit_argument_ = false,
        bool is_function_compiled_ = false);
 public:
    const char * getFamilyName() const override { return "Function"; }
@ -68,6 +73,11 @@ public:
        throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
    bool isDefaultAt(size_t) const override
    {
        throw Exception("isDefaultAt is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
    void insert(const Field &) override
    {
        throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED);
@ -153,6 +163,16 @@ public:
        throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
    double getRatioOfDefaultRows(double) const override
    {
        throw Exception("Method getRatioOfDefaultRows is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
    {
        throw Exception("Method getIndicesOfNonDefaultRows is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
    bool isShortCircuitArgument() const { return is_short_circuit_argument; }
    DataTypePtr getResultType() const;
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@ -64,6 +64,7 @@ public:
        return getDictionary().getDataAtWithTerminatingZero(getIndexes().getUInt(n));
    }
    bool isDefaultAt(size_t n) const override { return getDictionary().isDefaultAt(getIndexes().getUInt(n)); }
    UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); }
    UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
    Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
@ -180,6 +181,16 @@ public:
        return false;
    }
    double getRatioOfDefaultRows(double sample_ratio) const override
    {
        return getIndexes().getRatioOfDefaultRows(sample_ratio);
    }
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        return getIndexes().getIndicesOfNonDefaultRows(indices, from, limit);
    }
    bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); }
    bool isFixedAndContiguous() const override { return false; }
    size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@ -81,6 +81,11 @@ void ColumnMap::get(size_t n, Field & res) const
        getNestedData().get(offset + i, map[i]);
 }
 bool ColumnMap::isDefaultAt(size_t n) const
 {
    return nested->isDefaultAt(n);
 }
 StringRef ColumnMap::getDataAt(size_t) const
 {
    throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
@ -273,6 +278,16 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
    return false;
 }
 double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const
 {
    return getRatioOfDefaultRowsImpl<ColumnMap>(sample_ratio);
 }
 void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
    return getIndicesOfNonDefaultRowsImpl<ColumnMap>(indices, from, limit);
 }
 ColumnPtr ColumnMap::compress() const
 {
    auto compressed = nested->compress();
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@ -51,6 +51,7 @@ public:
    Field operator[](size_t n) const override;
    void get(size_t n, Field & res) const override;
    bool isDefaultAt(size_t n) const override;
    StringRef getDataAt(size_t n) const override;
    void insertData(const char * pos, size_t length) override;
    void insert(const Field & x) override;
@ -85,6 +86,8 @@ public:
    void protect() override;
    void forEachSubcolumn(ColumnCallback callback) override;
    bool structureEquals(const IColumn & rhs) const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
    const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }
    ColumnArray & getNestedColumn() { return assert_cast<ColumnArray &>(*nested); }
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@ -648,6 +648,29 @@ void ColumnNullable::checkConsistency() const
            ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT);
 }
 ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
 {
    ColumnPtr new_values;
    ColumnPtr new_null_map;
    if (default_field.getType() == Field::Types::Null)
    {
        auto default_column = nested_column->cloneEmpty();
        default_column->insertDefault();
        /// Value in main column, when null map is 1 is implementation defined. So, take any value.
        new_values = nested_column->createWithOffsets(offsets, (*default_column)[0], total_rows, shift);
        new_null_map = null_map->createWithOffsets(offsets, Field(1u), total_rows, shift);
    }
    else
    {
        new_values = nested_column->createWithOffsets(offsets, default_field, total_rows, shift);
        new_null_map = null_map->createWithOffsets(offsets, Field(0u), total_rows, shift);
    }
    return ColumnNullable::create(new_values, new_null_map);
 }
 ColumnPtr makeNullable(const ColumnPtr & column)
 {
    if (isColumnNullable(*column))
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@ -54,6 +54,7 @@ public:
    void get(size_t n, Field & res) const override;
    bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); }
    UInt64 get64(size_t n) const override { return nested_column->get64(n); }
    bool isDefaultAt(size_t n) const override { return isNullAt(n); }
    /**
     * If isNullAt(n) returns false, returns the nested column's getDataAt(n), otherwise returns a special value
@ -137,6 +138,18 @@ public:
        return false;
    }
    double getRatioOfDefaultRows(double sample_ratio) const override
    {
        return null_map->getRatioOfDefaultRows(sample_ratio);
    }
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        null_map->getIndicesOfNonDefaultRows(indices, from, limit);
    }
    ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override;
    bool isNullable() const override { return true; }
    bool isFixedAndContiguous() const override { return false; }
    bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@ -0,0 +1,779 @@
 #include <Columns/ColumnSparse.h>
 #include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnCompressed.h>
 #include <Columns/ColumnTuple.h>
 #include <Common/WeakHash.h>
 #include <Common/SipHash.h>
 #include <Common/HashTable/Hash.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
 #include <algorithm>
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int LOGICAL_ERROR;
    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
 }
 ColumnSparse::ColumnSparse(MutableColumnPtr && values_)
    : values(std::move(values_)), _size(0)
 {
    if (!values->empty())
        throw Exception("Not empty values passed to ColumnSparse, but no offsets passed", ErrorCodes::LOGICAL_ERROR);
    values->insertDefault();
    offsets = ColumnUInt64::create();
 }
 ColumnSparse::ColumnSparse(MutableColumnPtr && values_, MutableColumnPtr && offsets_, size_t size_)
    : values(std::move(values_)), offsets(std::move(offsets_)), _size(size_)
 {
    const ColumnUInt64 * offsets_concrete = typeid_cast<const ColumnUInt64 *>(offsets.get());
    if (!offsets_concrete)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "'offsets' column must be a ColumnUInt64, got: {}", offsets->getName());
    /// 'values' should contain one extra element: default value at 0 position.
    if (offsets->size() + 1 != values->size())
        throw Exception(ErrorCodes::LOGICAL_ERROR,
            "Values size ({}) is inconsistent with offsets size ({})", values->size(), offsets->size());
    if (_size < offsets->size())
        throw Exception(ErrorCodes::LOGICAL_ERROR,
            "Size of sparse column ({}) cannot be lower than number of non-default values ({})", _size, offsets->size());
    if (!offsets_concrete->empty() && _size <= offsets_concrete->getData().back())
        throw Exception(ErrorCodes::LOGICAL_ERROR,
            "Size of sparse column ({}) should be greater than last position of non-default value ({})",
                _size, offsets_concrete->getData().back());
 #ifndef NDEBUG
    const auto & offsets_data = getOffsetsData();
    const auto * it = std::adjacent_find(offsets_data.begin(), offsets_data.end(), std::greater_equal<UInt64>());
    if (it != offsets_data.end())
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Offsets of ColumnSparse must be strictly sorted");
 #endif
 }
 MutableColumnPtr ColumnSparse::cloneResized(size_t new_size) const
 {
    if (new_size == 0)
        return ColumnSparse::create(values->cloneEmpty());
    if (new_size >= _size)
        return ColumnSparse::create(IColumn::mutate(values), IColumn::mutate(offsets), new_size);
    auto res = ColumnSparse::create(values->cloneEmpty());
    res->insertRangeFrom(*this, 0, new_size);
    return res;
 }
 bool ColumnSparse::isDefaultAt(size_t n) const
 {
    return getValueIndex(n) == 0;
 }
 bool ColumnSparse::isNullAt(size_t n) const
 {
    return values->isNullAt(getValueIndex(n));
 }
 Field ColumnSparse::operator[](size_t n) const
 {
    return (*values)[getValueIndex(n)];
 }
 void ColumnSparse::get(size_t n, Field & res) const
 {
    values->get(getValueIndex(n), res);
 }
 bool ColumnSparse::getBool(size_t n) const
 {
    return values->getBool(getValueIndex(n));
 }
 Float64 ColumnSparse::getFloat64(size_t n) const
 {
    return values->getFloat64(getValueIndex(n));
 }
 Float32 ColumnSparse::getFloat32(size_t n) const
 {
    return values->getFloat32(getValueIndex(n));
 }
 UInt64 ColumnSparse::getUInt(size_t n) const
 {
    return values->getUInt(getValueIndex(n));
 }
 Int64 ColumnSparse::getInt(size_t n) const
 {
    return values->getInt(getValueIndex(n));
 }
 UInt64 ColumnSparse::get64(size_t n) const
 {
    return values->get64(getValueIndex(n));
 }
 StringRef ColumnSparse::getDataAt(size_t n) const
 {
    return values->getDataAt(getValueIndex(n));
 }
 ColumnPtr ColumnSparse::convertToFullColumnIfSparse() const
 {
    return values->createWithOffsets(getOffsetsData(), (*values)[0], _size, /*shift=*/ 1);
 }
 void ColumnSparse::insertSingleValue(const Inserter & inserter)
 {
    inserter(*values);
    size_t last_idx = values->size() - 1;
    if (values->isDefaultAt(last_idx))
        values->popBack(1);
    else
        getOffsetsData().push_back(_size);
    ++_size;
 }
 void ColumnSparse::insertData(const char * pos, size_t length)
 {
    insertSingleValue([&](IColumn & column) { column.insertData(pos, length); });
 }
 StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
 {
    return values->serializeValueIntoArena(getValueIndex(n), arena, begin);
 }
 const char * ColumnSparse::deserializeAndInsertFromArena(const char * pos)
 {
    const char * res = nullptr;
    insertSingleValue([&](IColumn & column) { res = column.deserializeAndInsertFromArena(pos); });
    return res;
 }
 const char * ColumnSparse::skipSerializedInArena(const char * pos) const
 {
    return values->skipSerializedInArena(pos);
 }
 void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length)
 {
    if (length == 0)
        return;
    if (start + length > src.size())
        throw Exception("Parameter out of bound in IColumnString::insertRangeFrom method.",
            ErrorCodes::LOGICAL_ERROR);
    auto & offsets_data = getOffsetsData();
    size_t end = start + length;
    if (const auto * src_sparse = typeid_cast<const ColumnSparse *>(&src))
    {
        const auto & src_offsets = src_sparse->getOffsetsData();
        const auto & src_values = src_sparse->getValuesColumn();
        size_t offset_start = std::lower_bound(src_offsets.begin(), src_offsets.end(), start) - src_offsets.begin();
        size_t offset_end = std::lower_bound(src_offsets.begin(), src_offsets.end(), end) - src_offsets.begin();
        assert(offset_start <= offset_end);
        if (offset_start != offset_end)
        {
            offsets_data.reserve(offsets_data.size() + offset_end - offset_start);
            insertManyDefaults(src_offsets[offset_start] - start);
            offsets_data.push_back(_size);
            ++_size;
            for (size_t i = offset_start + 1; i < offset_end; ++i)
            {
                size_t current_diff = src_offsets[i] - src_offsets[i - 1];
                insertManyDefaults(current_diff - 1);
                offsets_data.push_back(_size);
                ++_size;
            }
            /// 'end' <= 'src_offsets[offsets_end]', but end is excluded, so index is 'offsets_end' - 1.
            /// Since 'end' is excluded, need to subtract one more row from result.
            insertManyDefaults(end - src_offsets[offset_end - 1] - 1);
            values->insertRangeFrom(src_values, offset_start + 1, offset_end - offset_start);
        }
        else
        {
            insertManyDefaults(length);
        }
    }
    else
    {
        for (size_t i = start; i < end; ++i)
        {
            if (!src.isDefaultAt(i))
            {
                values->insertFrom(src, i);
                offsets_data.push_back(_size);
            }
            ++_size;
        }
    }
 }
 void ColumnSparse::insert(const Field & x)
 {
    insertSingleValue([&](IColumn & column) { column.insert(x); });
 }
 void ColumnSparse::insertFrom(const IColumn & src, size_t n)
 {
    if (const auto * src_sparse = typeid_cast<const ColumnSparse *>(&src))
    {
        if (size_t value_index = src_sparse->getValueIndex(n))
        {
            getOffsetsData().push_back(_size);
            values->insertFrom(src_sparse->getValuesColumn(), value_index);
        }
    }
    else
    {
        if (!src.isDefaultAt(n))
        {
            values->insertFrom(src, n);
            getOffsetsData().push_back(_size);
        }
    }
    ++_size;
 }
 void ColumnSparse::insertDefault()
 {
    ++_size;
 }
 void ColumnSparse::insertManyDefaults(size_t length)
 {
    _size += length;
 }
 void ColumnSparse::popBack(size_t n)
 {
    assert(n < _size);
    auto & offsets_data = getOffsetsData();
    size_t new_size = _size - n;
    size_t removed_values = 0;
    while (!offsets_data.empty() && offsets_data.back() >= new_size)
    {
        offsets_data.pop_back();
        ++removed_values;
    }
    if (removed_values)
        values->popBack(removed_values);
    _size = new_size;
 }
 ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const
 {
    if (_size != filt.size())
        throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
    if (offsets->empty())
    {
        auto res = cloneEmpty();
        res->insertManyDefaults(countBytesInFilter(filt));
        return res;
    }
    auto res_offsets = offsets->cloneEmpty();
    auto & res_offsets_data = assert_cast<ColumnUInt64 &>(*res_offsets).getData();
    Filter values_filter;
    values_filter.reserve(values->size());
    values_filter.push_back(1);
    size_t values_result_size_hint = 1;
    size_t res_offset = 0;
    auto offset_it = begin();
    for (size_t i = 0; i < _size; ++i, ++offset_it)
    {
        if (!offset_it.isDefault())
        {
            if (filt[i])
            {
                res_offsets_data.push_back(res_offset);
                values_filter.push_back(1);
                ++res_offset;
                ++values_result_size_hint;
            }
            else
            {
                values_filter.push_back(0);
            }
        }
        else
        {
            res_offset += filt[i] != 0;
        }
    }
    auto res_values = values->filter(values_filter, values_result_size_hint);
    return this->create(std::move(res_values), std::move(res_offsets), res_offset);
 }
 void ColumnSparse::expand(const Filter & mask, bool inverted)
 {
    if (mask.size() < _size)
        throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
    auto res_offsets = offsets->cloneEmpty();
    auto & res_offsets_data = assert_cast<ColumnUInt64 &>(*res_offsets).getData();
    auto it = begin();
    for (size_t i = 0; i < mask.size(); ++i)
    {
        if (!!mask[i] ^ inverted)
        {
            if (it.getCurrentRow() == _size)
                throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
            if (!it.isDefault())
                res_offsets_data[it.getCurrentOffset()] = i;
            ++it;
        }
    }
    _size = mask.size();
 }
 ColumnPtr ColumnSparse::permute(const Permutation & perm, size_t limit) const
 {
    return permuteImpl(*this, perm, limit);
 }
 ColumnPtr ColumnSparse::index(const IColumn & indexes, size_t limit) const
 {
    return selectIndexImpl(*this, indexes, limit);
 }
 template <typename Type>
 ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
 {
    assert(limit <= indexes.size());
    if (limit == 0)
        return ColumnSparse::create(values->cloneEmpty());
    if (offsets->empty())
    {
        auto res = cloneEmpty();
        res->insertManyDefaults(limit);
        return res;
    }
    auto res_offsets = offsets->cloneEmpty();
    auto & res_offsets_data = assert_cast<ColumnUInt64 &>(*res_offsets).getData();
    auto res_values = values->cloneEmpty();
    res_values->insertDefault();
    /// If we need to permute full column, or if limit is large enough,
    /// it's better to save indexes of values in O(size)
    /// and avoid binary search for obtaining every index.
    /// 3 is just a guess for overhead on copying indexes.
    bool execute_linear =
        limit == _size || limit * std::bit_width(offsets->size()) > _size * 3;
    if (execute_linear)
    {
        PaddedPODArray<UInt64> values_index(_size);
        auto offset_it = begin();
        for (size_t i = 0; i < _size; ++i, ++offset_it)
            values_index[i] = offset_it.getValueIndex();
        for (size_t i = 0; i < limit; ++i)
        {
            size_t index = values_index[indexes[i]];
            if (index != 0)
            {
                res_values->insertFrom(*values, index);
                res_offsets_data.push_back(i);
            }
        }
    }
    else
    {
        for (size_t i = 0; i < limit; ++i)
        {
            size_t index = getValueIndex(indexes[i]);
            if (index != 0)
            {
                res_values->insertFrom(*values, index);
                res_offsets_data.push_back(i);
            }
        }
    }
    return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit);
 }
 int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
 {
    if (const auto * rhs_sparse = typeid_cast<const ColumnSparse *>(&rhs_))
        return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint);
    return values->compareAt(getValueIndex(n), m, rhs_, null_direction_hint);
 }
 void ColumnSparse::compareColumn(const IColumn & rhs, size_t rhs_row_num,
                    PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                    int direction, int nan_direction_hint) const
 {
    if (row_indexes)
    {
        /// TODO: implement without conversion to full column.
        auto this_full = convertToFullColumnIfSparse();
        auto rhs_full = rhs.convertToFullColumnIfSparse();
        this_full->compareColumn(*rhs_full, rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint);
    }
    else
    {
        const auto & rhs_sparse = assert_cast<const ColumnSparse &>(rhs);
        PaddedPODArray<Int8> nested_result;
        values->compareColumn(rhs_sparse.getValuesColumn(), rhs_sparse.getValueIndex(rhs_row_num),
            nullptr, nested_result, direction, nan_direction_hint);
        const auto & offsets_data = getOffsetsData();
        compare_results.resize_fill(_size, nested_result[0]);
        for (size_t i = 0; i < offsets_data.size(); ++i)
            compare_results[offsets_data[i]] = nested_result[i + 1];
    }
 }
 int ColumnSparse::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator & collator) const
 {
    if (const auto * rhs_sparse = typeid_cast<const ColumnSparse *>(&rhs))
        return values->compareAtWithCollation(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint, collator);
    return values->compareAtWithCollation(getValueIndex(n), m, rhs, null_direction_hint, collator);
 }
 bool ColumnSparse::hasEqualValues() const
 {
    size_t num_defaults = getNumberOfDefaults();
    if (num_defaults == _size)
        return true;
    /// Have at least 1 default and 1 non-default values.
    if (num_defaults != 0)
        return false;
    /// Check that probably all non-default values are equal.
    /// It's suboptiomal, but it's a rare case.
    for (size_t i = 2; i < values->size(); ++i)
        if (values->compareAt(1, i, *values, 1) != 0)
            return false;
    return true;
 }
 void ColumnSparse::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
 {
    if (_size == 0)
        return;
    res.resize(_size);
    if (offsets->empty())
    {
        for (size_t i = 0; i < _size; ++i)
            res[i] = i;
        return;
    }
    if (limit == 0 || limit > _size)
        limit = _size;
    Permutation perm;
    /// Firstly we sort all values.
    /// limit + 1 for case when there are 0 default values.
    if (collator)
        values->getPermutationWithCollation(*collator, reverse, limit + 1, null_direction_hint, perm);
    else
        values->getPermutation(reverse, limit + 1, null_direction_hint, perm);
    size_t num_of_defaults = getNumberOfDefaults();
    size_t row = 0;
    const auto & offsets_data = getOffsetsData();
    /// Fill the permutation.
    for (size_t i = 0; i < perm.size() && row < limit; ++i)
    {
        if (perm[i] == 0)
        {
            if (!num_of_defaults)
                continue;
            /// Fill the positions of default values in the required quantity.
            auto offset_it = begin();
            while (row < limit)
            {
                while (offset_it.getCurrentRow() < _size && !offset_it.isDefault())
                    ++offset_it;
                if (offset_it.getCurrentRow() == _size)
                    break;
                res[row++] = offset_it.getCurrentRow();
                ++offset_it;
            }
        }
        else
        {
            res[row++] = offsets_data[perm[i] - 1];
        }
    }
    assert(row == limit);
 }
 void ColumnSparse::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
 {
    return getPermutationImpl(reverse, limit, null_direction_hint, res, nullptr);
 }
 void ColumnSparse::updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const
 {
    auto this_full = convertToFullColumnIfSparse();
    this_full->updatePermutation(reverse, limit, null_direction_hint, res, equal_range);
 }
 void ColumnSparse::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
 {
    return getPermutationImpl(reverse, limit, null_direction_hint, res, &collator);
 }
 void ColumnSparse::updatePermutationWithCollation(
    const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const
 {
    auto this_full = convertToFullColumnIfSparse();
    this_full->updatePermutationWithCollation(collator, reverse, limit, null_direction_hint, res, equal_range);
 }
 size_t ColumnSparse::byteSize() const
 {
    return values->byteSize() + offsets->byteSize() + sizeof(_size);
 }
 size_t ColumnSparse::byteSizeAt(size_t n) const
 {
    size_t index = getValueIndex(n);
    size_t res = values->byteSizeAt(index);
    if (index)
        res += sizeof(UInt64);
    return res;
 }
 size_t ColumnSparse::allocatedBytes() const
 {
    return values->allocatedBytes() + offsets->allocatedBytes() + sizeof(_size);
 }
 void ColumnSparse::protect()
 {
    values->protect();
    offsets->protect();
 }
 ColumnPtr ColumnSparse::replicate(const Offsets & replicate_offsets) const
 {
    /// TODO: implement specializations.
    if (_size != replicate_offsets.size())
        throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
    if (_size == 0)
        return ColumnSparse::create(values->cloneEmpty());
    auto res_offsets = offsets->cloneEmpty();
    auto & res_offsets_data = assert_cast<ColumnUInt64 &>(*res_offsets).getData();
    auto res_values = values->cloneEmpty();
    res_values->insertDefault();
    auto offset_it = begin();
    for (size_t i = 0; i < _size; ++i, ++offset_it)
    {
        if (!offset_it.isDefault())
        {
            size_t replicate_size = replicate_offsets[i] - replicate_offsets[i - 1];
            res_offsets_data.reserve(res_offsets_data.size() + replicate_size);
            for (size_t row = replicate_offsets[i - 1]; row < replicate_offsets[i]; ++row)
            {
                res_offsets_data.push_back(row);
                res_values->insertFrom(*values, offset_it.getValueIndex());
            }
        }
    }
    return ColumnSparse::create(std::move(res_values), std::move(res_offsets), replicate_offsets.back());
 }
 void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
 {
    values->updateHashWithValue(getValueIndex(n), hash);
 }
 void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const
 {
    if (hash.getData().size() != _size)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
        "column size is {}, hash size is {}", _size, hash.getData().size());
    auto offset_it = begin();
    auto & hash_data = hash.getData();
    for (size_t i = 0; i < _size; ++i, ++offset_it)
    {
        size_t value_index = offset_it.getValueIndex();
        auto data_ref = values->getDataAt(value_index);
        hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
    }
 }
 void ColumnSparse::updateHashFast(SipHash & hash) const
 {
    values->updateHashFast(hash);
    offsets->updateHashFast(hash);
    hash.update(_size);
 }
 void ColumnSparse::getExtremes(Field & min, Field & max) const
 {
    if (_size == 0)
    {
        values->get(0, min);
        values->get(0, max);
        return;
    }
    if (getNumberOfDefaults() == 0)
    {
        size_t min_idx = 1;
        size_t max_idx = 1;
        for (size_t i = 2; i < values->size(); ++i)
        {
            if (values->compareAt(i, min_idx, *values, 1) < 0)
                min_idx = i;
            else if (values->compareAt(i, max_idx, *values, 1) > 0)
                max_idx = i;
        }
        values->get(min_idx, min);
        values->get(max_idx, max);
        return;
    }
    values->getExtremes(min, max);
 }
 void ColumnSparse::getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const
 {
    const auto & offsets_data = getOffsetsData();
    const auto * start = from ? std::lower_bound(offsets_data.begin(), offsets_data.end(), from) : offsets_data.begin();
    const auto * end = limit ? std::lower_bound(offsets_data.begin(), offsets_data.end(), from + limit) : offsets_data.end();
    indices.insert(start, end);
 }
 double ColumnSparse::getRatioOfDefaultRows(double) const
 {
    return static_cast<double>(getNumberOfDefaults()) / _size;
 }
 MutableColumns ColumnSparse::scatter(ColumnIndex num_columns, const Selector & selector) const
 {
    return scatterImpl<ColumnSparse>(num_columns, selector);
 }
 void ColumnSparse::gather(ColumnGathererStream & gatherer_stream)
 {
    gatherer_stream.gather(*this);
 }
 ColumnPtr ColumnSparse::compress() const
 {
    auto values_compressed = values->compress();
    auto offsets_compressed = offsets->compress();
    size_t byte_size = values_compressed->byteSize() + offsets_compressed->byteSize();
    return ColumnCompressed::create(size(), byte_size,
        [values_compressed = std::move(values_compressed), offsets_compressed = std::move(offsets_compressed), size = size()]
        {
            return ColumnSparse::create(values_compressed->decompress(), offsets_compressed->decompress(), size);
        });
 }
 bool ColumnSparse::structureEquals(const IColumn & rhs) const
 {
    if (const auto * rhs_sparse = typeid_cast<const ColumnSparse *>(&rhs))
        return values->structureEquals(*rhs_sparse->values);
    return false;
 }
 void ColumnSparse::forEachSubcolumn(ColumnCallback callback)
 {
    callback(values);
    callback(offsets);
 }
 const IColumn::Offsets & ColumnSparse::getOffsetsData() const
 {
    return assert_cast<const ColumnUInt64 &>(*offsets).getData();
 }
 IColumn::Offsets & ColumnSparse::getOffsetsData()
 {
    return assert_cast<ColumnUInt64 &>(*offsets).getData();
 }
 size_t ColumnSparse::getValueIndex(size_t n) const
 {
    assert(n < _size);
    const auto & offsets_data = getOffsetsData();
    const auto * it = std::lower_bound(offsets_data.begin(), offsets_data.end(), n);
    if (it == offsets_data.end() || *it != n)
        return 0;
    return it - offsets_data.begin() + 1;
 }
 ColumnPtr recursiveRemoveSparse(const ColumnPtr & column)
 {
    if (!column)
        return column;
    if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
    {
        auto columns = column_tuple->getColumns();
        for (auto & element : columns)
            element = recursiveRemoveSparse(element);
        return ColumnTuple::create(columns);
    }
    return column->convertToFullColumnIfSparse();
 }
 }
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@ -0,0 +1,231 @@
 #pragma once
 #include <Columns/IColumn.h>
 #include <Columns/IColumnImpl.h>
 #include <Columns/ColumnsNumber.h>
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 class Collator;
 namespace DB
 {
 /** Column for spare representation.
 *  It stores column with non-default values and column
 *  with their sorted positions in original column. Column with
 *  values contains also one default value at 0 position to make
 *  implementation of execution of functions and sorting more convenient.
 */
 class ColumnSparse final : public COWHelper<IColumn, ColumnSparse>
 {
 private:
    friend class COWHelper<IColumn, ColumnSparse>;
    explicit ColumnSparse(MutableColumnPtr && values_);
    ColumnSparse(MutableColumnPtr && values_, MutableColumnPtr && offsets_, size_t size_);
    ColumnSparse(const ColumnSparse &) = default;
 public:
    static constexpr auto DEFAULT_ROWS_SEARCH_SAMPLE_RATIO = 0.1;
    static constexpr auto DEFAULT_RATIO_FOR_SPARSE_SERIALIZATION = 0.95;
    using Base = COWHelper<IColumn, ColumnSparse>;
    static Ptr create(const ColumnPtr & values_, const ColumnPtr & offsets_, size_t size_)
    {
        return Base::create(values_->assumeMutable(), offsets_->assumeMutable(), size_);
    }
    template <typename TColumnPtr, typename = typename std::enable_if<IsMutableColumns<TColumnPtr>::value>::type>
    static MutablePtr create(TColumnPtr && values_, TColumnPtr && offsets_, size_t size_)
    {
        return Base::create(std::move(values_), std::move(offsets_), size_);
    }
    static Ptr create(const ColumnPtr & values_)
    {
        return Base::create(values_->assumeMutable());
    }
    template <typename TColumnPtr, typename = typename std::enable_if<IsMutableColumns<TColumnPtr>::value>::type>
    static MutablePtr create(TColumnPtr && values_)
    {
        return Base::create(std::forward<TColumnPtr>(values_));
    }
    bool isSparse() const override { return true; }
    const char * getFamilyName() const override { return "Sparse"; }
    std::string getName() const override { return "Sparse(" + values->getName() + ")"; }
    TypeIndex getDataType() const override { return values->getDataType(); }
    MutableColumnPtr cloneResized(size_t new_size) const override;
    size_t size() const override { return _size; }
    bool isDefaultAt(size_t n) const override;
    bool isNullAt(size_t n) const override;
    Field operator[](size_t n) const override;
    void get(size_t n, Field & res) const override;
    bool getBool(size_t n) const override;
    Float64 getFloat64(size_t n) const override;
    Float32 getFloat32(size_t n) const override;
    UInt64 getUInt(size_t n) const override;
    Int64 getInt(size_t n) const override;
    UInt64 get64(size_t n) const override;
    StringRef getDataAt(size_t n) const override;
    ColumnPtr convertToFullColumnIfSparse() const override;
    /// Will insert null value if pos=nullptr
    void insertData(const char * pos, size_t length) override;
    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
    const char * deserializeAndInsertFromArena(const char * pos) override;
    const char * skipSerializedInArena(const char *) const override;
    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
    void insert(const Field & x) override;
    void insertFrom(const IColumn & src, size_t n) override;
    void insertDefault() override;
    void insertManyDefaults(size_t length) override;
    void popBack(size_t n) override;
    ColumnPtr filter(const Filter & filt, ssize_t) const override;
    void expand(const Filter & mask, bool inverted) override;
    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
    template <typename Type>
    ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
    int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
    void compareColumn(const IColumn & rhs, size_t rhs_row_num,
                       PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                       int direction, int nan_direction_hint) const override;
    int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator & collator) const override;
    bool hasEqualValues() const override;
    void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const;
    void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
    void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
    void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
    void updatePermutationWithCollation(
        const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
    size_t byteSize() const override;
    size_t byteSizeAt(size_t n) const override;
    size_t allocatedBytes() const override;
    void protect() override;
    ColumnPtr replicate(const Offsets & replicate_offsets) const override;
    void updateHashWithValue(size_t n, SipHash & hash) const override;
    void updateWeakHash32(WeakHash32 & hash) const override;
    void updateHashFast(SipHash & hash) const override;
    void getExtremes(Field & min, Field & max) const override;
    void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
    void gather(ColumnGathererStream & gatherer_stream) override;
    ColumnPtr compress() const override;
    void forEachSubcolumn(ColumnCallback callback) override;
    bool structureEquals(const IColumn & rhs) const override;
    bool isNullable() const override { return values->isNullable(); }
    bool isFixedAndContiguous() const override { return false; }
    bool valuesHaveFixedSize() const override { return values->valuesHaveFixedSize(); }
    size_t sizeOfValueIfFixed() const override { return values->sizeOfValueIfFixed() + values->sizeOfValueIfFixed(); }
    bool isCollationSupported() const override { return values->isCollationSupported(); }
    size_t getNumberOfDefaults() const { return _size - offsets->size(); }
    size_t getNumberOfTrailingDefaults() const
    {
        return offsets->empty() ? _size : _size - getOffsetsData().back() - 1;
    }
    /// Return position of element in 'values' columns,
    /// that corresponds to n-th element of full column.
    /// O(log(offsets.size())) complexity,
    size_t getValueIndex(size_t n) const;
    const IColumn & getValuesColumn() const { return *values; }
    IColumn & getValuesColumn() { return *values; }
    const ColumnPtr & getValuesPtr() const { return values; }
    ColumnPtr & getValuesPtr() { return values; }
    const IColumn::Offsets & getOffsetsData() const;
    IColumn::Offsets & getOffsetsData();
    const ColumnPtr & getOffsetsPtr() const { return offsets; }
    ColumnPtr & getOffsetsPtr() { return offsets; }
    const IColumn & getOffsetsColumn() const { return *offsets; }
    IColumn & getOffsetsColumn() { return *offsets; }
    /// This class helps to iterate over all values in ColumnSparse.
    class Iterator
    {
    public:
        Iterator(const PaddedPODArray<UInt64> & offsets_, size_t size_, size_t current_offset_, size_t current_row_)
            : offsets(offsets_), size(size_), current_offset(current_offset_), current_row(current_row_)
        {
        }
        bool ALWAYS_INLINE isDefault() const { return current_offset == offsets.size() || current_row != offsets[current_offset]; }
        size_t ALWAYS_INLINE getValueIndex() const { return isDefault() ? 0 : current_offset + 1; }
        size_t ALWAYS_INLINE getCurrentRow() const { return current_row; }
        size_t ALWAYS_INLINE getCurrentOffset() const { return current_offset; }
        bool operator==(const Iterator & other) const
        {
            return size == other.size
                && current_offset == other.current_offset
                && current_row == other.current_row;
        }
        bool operator!=(const Iterator & other) const { return !(*this == other); }
        Iterator operator++()
        {
            if (!isDefault())
                ++current_offset;
            ++current_row;
            return *this;
        }
    private:
        const PaddedPODArray<UInt64> & offsets;
        const size_t size;
        size_t current_offset;
        size_t current_row;
    };
    Iterator begin() const { return Iterator(getOffsetsData(), _size, 0, 0); }
    Iterator end() const { return Iterator(getOffsetsData(), _size, getOffsetsData().size(), _size); }
 private:
    using Inserter = std::function<void(IColumn &)>;
    /// Inserts value to 'values' column via callback.
    /// Properly handles cases, when inserted value is default.
    /// Used, when it's unknown in advance if inserted value is default.
    void insertSingleValue(const Inserter & inserter);
    /// Contains default value at 0 position.
    /// It's convenient, because it allows to execute, e.g functions or sorting,
    /// for this column without handling different cases.
    WrappedPtr values;
    /// Sorted offsets of non-default values in the full column.
    /// 'offsets[i]' corresponds to 'values[i + 1]'.
    WrappedPtr offsets;
    size_t _size;
 };
 ColumnPtr recursiveRemoveSparse(const ColumnPtr & column);
 }
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@ -474,8 +474,9 @@ void ColumnString::getExtremes(Field & min, Field & max) const
 ColumnPtr ColumnString::compress() const
 {
-    size_t source_chars_size = chars.size();
+    const size_t source_chars_size = chars.size();
-    size_t source_offsets_size = offsets.size() * sizeof(Offset);
+    const size_t source_offsets_elements = offsets.size();
    const size_t source_offsets_size = source_offsets_elements * sizeof(Offset);
    /// Don't compress small blocks.
    if (source_chars_size < 4096) /// A wild guess.
@ -489,12 +490,14 @@ ColumnPtr ColumnString::compress() const
    auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, true);
-    return ColumnCompressed::create(offsets.size(), chars_compressed->size() + offsets_compressed->size(),
+    const size_t chars_compressed_size = chars_compressed->size();
    const size_t offsets_compressed_size = offsets_compressed->size();
    return ColumnCompressed::create(source_offsets_elements, chars_compressed_size + offsets_compressed_size,
        [
            chars_compressed = std::move(chars_compressed),
            offsets_compressed = std::move(offsets_compressed),
            source_chars_size,
-            source_offsets_elements = offsets.size()
+            source_offsets_elements
        ]
        {
            auto res = ColumnString::create();
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@ -107,6 +107,12 @@ public:
        return StringRef(&chars[offsetAt(n)], sizeAt(n));
    }
    bool isDefaultAt(size_t n) const override
    {
        assert(n < size());
        return sizeAt(n) == 1;
    }
 /// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
 #if !defined(__clang__)
 #pragma GCC diagnostic push
@ -278,6 +284,16 @@ public:
        return typeid(rhs) == typeid(ColumnString);
    }
    double getRatioOfDefaultRows(double sample_ratio) const override
    {
        return getRatioOfDefaultRowsImpl<ColumnString>(sample_ratio);
    }
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
    {
        return getIndicesOfNonDefaultRowsImpl<ColumnString>(indices, from, limit);
    }
    Chars & getChars() { return chars; }
    const Chars & getChars() const { return chars; }
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@ -12,6 +12,7 @@
 #include <base/sort.h>
 #include <base/map.h>
 #include <base/range.h>
 #include <DataTypes/Serializations/SerializationInfoTuple.h>
 namespace DB
@ -113,6 +114,15 @@ void ColumnTuple::get(size_t n, Field & res) const
    res = tuple;
 }
 bool ColumnTuple::isDefaultAt(size_t n) const
 {
    const size_t tuple_size = columns.size();
    for (size_t i = 0; i < tuple_size; ++i)
        if (!columns[i]->isDefaultAt(n))
            return false;
    return true;
 }
 StringRef ColumnTuple::getDataAt(size_t) const
 {
    throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
@ -536,4 +546,25 @@ ColumnPtr ColumnTuple::compress() const
        });
 }
 double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
 {
    return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
 }
 void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
    return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
 }
 SerializationInfoPtr ColumnTuple::getSerializationInfo() const
 {
    MutableSerializationInfos infos;
    infos.reserve(columns.size());
    for (const auto & column : columns)
        infos.push_back(const_pointer_cast<SerializationInfo>(column->getSerializationInfo()));
    return std::make_shared<SerializationInfoTuple>(std::move(infos), SerializationInfo::Settings{});
 }
 }
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@ -53,6 +53,7 @@ public:
    Field operator[](size_t n) const override;
    void get(size_t n, Field & res) const override;
    bool isDefaultAt(size_t n) const override;
    StringRef getDataAt(size_t n) const override;
    void insertData(const char * pos, size_t length) override;
    void insert(const Field & x) override;
@ -93,6 +94,9 @@ public:
    bool structureEquals(const IColumn & rhs) const override;
    bool isCollationSupported() const override;
    ColumnPtr compress() const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
    SerializationInfoPtr getSerializationInfo() const override;
    size_t tupleSize() const { return columns.size(); }
--- a/Show More
+++ b/Show More
		`@ -1 +1 @@`
			`Subproject commit d10351f312c1ae1ca3fdda433693dfbef3acfece`				`Subproject commit bb69d48e0ee35c87a0f19e509a09a914f71f0cff`
		`@ -0,0 +1 @@`
							`Subproject commit ac4b763d4ca40122275f1497cbdc5451337461d9`