Merge branch 'master' into zookeeper_client_fault_injection

2024-11-21 23:21:59 +00:00 · 2022-09-05 15:03:26 +03:00 · 2022-09-05 15:03:26 +03:00 · abffd4595e
commit abffd4595e
parent 2044470895 582216a3ca
367 changed files with 7199 additions and 2577 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -1,6 +1,14 @@
+# To run clang-tidy from CMake, build ClickHouse with -DENABLE_CLANG_TIDY=1. To show all warnings, it is
+# recommended to pass "-k0" to Ninja.
+
 # Enable all checks + disale selected checks. Feel free to remove disabled checks from below list if
 # a) the new check is not controversial (this includes many checks in readability-* and google-*) or
 # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
+
+# TODO Let clang-tidy check headers in further directories
+#      --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
+HeaderFilterRegex: '^.*/(base)/.*(h|hpp)$'
+
 Checks: '*,
    -abseil-*,

--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -437,7 +437,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (address)
+          CHECK_NAME=Stateless tests (asan)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          EOF
@ -521,7 +521,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (thread)
+          CHECK_NAME=Stress test (tsan)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -1287,7 +1287,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (address)
+          CHECK_NAME=Stateless tests (asan)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -1326,7 +1326,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (address)
+          CHECK_NAME=Stateless tests (asan)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -1365,7 +1365,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -1404,7 +1404,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -1443,7 +1443,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=2
@ -1519,7 +1519,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -1558,7 +1558,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -1597,7 +1597,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=2
@ -1830,7 +1830,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (address)
+          CHECK_NAME=Stateful tests (asan)
          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -1867,7 +1867,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (thread)
+          CHECK_NAME=Stateful tests (tsan)
          REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -1904,7 +1904,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_msan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (memory)
+          CHECK_NAME=Stateful tests (msan)
          REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -2018,7 +2018,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (address)
+          CHECK_NAME=Stress test (asan)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
@ -2058,7 +2058,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (thread)
+          CHECK_NAME=Stress test (tsan)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
@ -2094,7 +2094,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (memory)
+          CHECK_NAME=Stress test (msan)
          REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
          EOF
      - name: Download json reports
@ -2130,7 +2130,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_undefined
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (undefined)
+          CHECK_NAME=Stress test (ubsan)
          REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
          EOF
      - name: Download json reports
@ -2319,7 +2319,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=0
          RUN_BY_HASH_TOTAL=4
@ -2357,7 +2357,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=1
          RUN_BY_HASH_TOTAL=4
@ -2395,7 +2395,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=2
          RUN_BY_HASH_TOTAL=4
@ -2433,7 +2433,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=3
          RUN_BY_HASH_TOTAL=4
@ -2550,7 +2550,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (ASan)
+          CHECK_NAME=AST fuzzer (asan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
          EOF
      - name: Download json reports
@ -2586,7 +2586,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (TSan)
+          CHECK_NAME=AST fuzzer (tsan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
          EOF
      - name: Download json reports
@ -2622,7 +2622,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (UBSan)
+          CHECK_NAME=AST fuzzer (ubsan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
          EOF
      - name: Download json reports
@ -2658,7 +2658,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (MSan)
+          CHECK_NAME=AST fuzzer (msan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
          EOF
      - name: Download json reports
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -1300,7 +1300,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (address)
+          CHECK_NAME=Stateless tests (asan)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -1339,7 +1339,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (address)
+          CHECK_NAME=Stateless tests (asan)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -1378,7 +1378,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -1417,7 +1417,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -1456,7 +1456,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=2
@ -1532,7 +1532,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -1571,7 +1571,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -1610,7 +1610,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=2
@ -1766,7 +1766,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_flaky_asan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests flaky check (address)
+          CHECK_NAME=Stateless tests flaky check (asan)
          REPO_COPY=${{runner.temp}}/stateless_flaky_asan/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -1927,7 +1927,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (address)
+          CHECK_NAME=Stateful tests (asan)
          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -1964,7 +1964,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (thread)
+          CHECK_NAME=Stateful tests (tsan)
          REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -2001,7 +2001,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_msan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (memory)
+          CHECK_NAME=Stateful tests (msan)
          REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -2115,7 +2115,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (address)
+          CHECK_NAME=Stress test (asan)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
@ -2155,7 +2155,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (thread)
+          CHECK_NAME=Stress test (tsan)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
@ -2191,7 +2191,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (memory)
+          CHECK_NAME=Stress test (msan)
          REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
          EOF
      - name: Download json reports
@ -2227,7 +2227,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_undefined
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (undefined)
+          CHECK_NAME=Stress test (ubsan)
          REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
          EOF
      - name: Download json reports
@ -2302,7 +2302,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (ASan)
+          CHECK_NAME=AST fuzzer (asan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
          EOF
      - name: Download json reports
@ -2338,7 +2338,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (TSan)
+          CHECK_NAME=AST fuzzer (tsan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
          EOF
      - name: Download json reports
@ -2374,7 +2374,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (UBSan)
+          CHECK_NAME=AST fuzzer (ubsan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
          EOF
      - name: Download json reports
@ -2410,7 +2410,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=AST fuzzer (MSan)
+          CHECK_NAME=AST fuzzer (msan)
          REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
          EOF
      - name: Download json reports
@ -2599,7 +2599,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=0
          RUN_BY_HASH_TOTAL=4
@ -2637,7 +2637,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=1
          RUN_BY_HASH_TOTAL=4
@ -2675,7 +2675,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=2
          RUN_BY_HASH_TOTAL=4
@ -2713,7 +2713,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=3
          RUN_BY_HASH_TOTAL=4
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,4 +1,4 @@
-name: ReleaseWorkflow
+name: PublishedReleaseCI
 # - Gets artifacts from S3
 # - Sends it to JFROG Artifactory
 # - Adds them to the release assets
@ -15,7 +15,7 @@ jobs:
    - name: Set envs
      run: |
        cat >> "$GITHUB_ENV" << 'EOF'
-        JFROG_API_KEY=${{ secrets.JFROG_KEY_API_PACKAGES }}
+        JFROG_API_KEY=${{ secrets.JFROG_ARTIFACTORY_API_KEY }}
        TEMP_PATH=${{runner.temp}}/release_packages
        REPO_COPY=${{runner.temp}}/release_packages/ClickHouse
        EOF
@ -30,7 +30,7 @@ jobs:
        cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
        cd "$REPO_COPY"
        python3 ./tests/ci/push_to_artifactory.py --release "${{ github.ref }}" \
-          --commit '${{ github.sha }}' --all
+          --commit '${{ github.sha }}' --artifactory-url "${{ secrets.JFROG_ARTIFACTORY_URL }}" --all
    - name: Upload packages to release assets
      uses: svenstaro/upload-release-action@v2
      with:
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@ -1,4 +1,4 @@
-name: ReleaseCI
+name: ReleaseBranchCI

 env:
  # Force the stdout and stderr streams to be unbuffered
@ -591,7 +591,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (address)
+          CHECK_NAME=Stateless tests (asan)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -630,7 +630,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (address)
+          CHECK_NAME=Stateless tests (asan)
          REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -669,7 +669,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -708,7 +708,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -747,7 +747,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (thread)
+          CHECK_NAME=Stateless tests (tsan)
          REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=2
@ -823,7 +823,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=0
@ -862,7 +862,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=1
@ -901,7 +901,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateless_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateless tests (memory)
+          CHECK_NAME=Stateless tests (msan)
          REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
          KILL_TIMEOUT=10800
          RUN_BY_HASH_NUM=2
@ -1134,7 +1134,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_debug
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (address)
+          CHECK_NAME=Stateful tests (asan)
          REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -1171,7 +1171,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (thread)
+          CHECK_NAME=Stateful tests (tsan)
          REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -1208,7 +1208,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stateful_msan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stateful tests (memory)
+          CHECK_NAME=Stateful tests (msan)
          REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
          KILL_TIMEOUT=3600
          EOF
@ -1322,7 +1322,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (address)
+          CHECK_NAME=Stress test (asan)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
@ -1362,7 +1362,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_thread
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (thread)
+          CHECK_NAME=Stress test (tsan)
          REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
          EOF
      - name: Download json reports
@ -1398,7 +1398,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_memory
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (memory)
+          CHECK_NAME=Stress test (msan)
          REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
          EOF
      - name: Download json reports
@ -1434,7 +1434,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/stress_undefined
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Stress test (undefined)
+          CHECK_NAME=Stress test (ubsan)
          REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
          EOF
      - name: Download json reports
@ -1623,7 +1623,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=0
          RUN_BY_HASH_TOTAL=4
@ -1661,7 +1661,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=1
          RUN_BY_HASH_TOTAL=4
@ -1699,7 +1699,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=2
          RUN_BY_HASH_TOTAL=4
@ -1737,7 +1737,7 @@ jobs:
          cat >> "$GITHUB_ENV" << 'EOF'
          TEMP_PATH=${{runner.temp}}/integration_tests_tsan
          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=Integration tests (thread)
+          CHECK_NAME=Integration tests (tsan)
          REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
          RUN_BY_HASH_NUM=3
          RUN_BY_HASH_TOTAL=4
--- a/base/base/Decimal.h
+++ b/base/base/Decimal.h
@ -52,15 +52,15 @@ struct Decimal
    constexpr Decimal(Decimal<T> &&) noexcept = default;
    constexpr Decimal(const Decimal<T> &) = default;

-    constexpr Decimal(const T & value_): value(value_) {}
+    constexpr Decimal(const T & value_): value(value_) {} // NOLINT(google-explicit-constructor)

    template <typename U>
-    constexpr Decimal(const Decimal<U> & x): value(x.value) {}
+    constexpr Decimal(const Decimal<U> & x): value(x.value) {} // NOLINT(google-explicit-constructor)

    constexpr Decimal<T> & operator=(Decimal<T> &&) noexcept = default;
    constexpr Decimal<T> & operator = (const Decimal<T> &) = default;

-    constexpr operator T () const { return value; }
+    constexpr operator T () const { return value; } // NOLINT(google-explicit-constructor)

    template <typename U>
    constexpr U convertTo() const
@ -111,7 +111,7 @@ public:
    using Base::Base;
    using NativeType = Base::NativeType;

-    constexpr DateTime64(const Base & v): Base(v) {}
+    constexpr DateTime64(const Base & v): Base(v) {} // NOLINT(google-explicit-constructor)
 };
 }

--- a/base/base/DecomposedFloat.h
+++ b/base/base/DecomposedFloat.h
@ -36,14 +36,14 @@ struct DecomposedFloat
 {
    using Traits = FloatTraits<T>;

-    DecomposedFloat(T x)
+    explicit DecomposedFloat(T x)
    {
        memcpy(&x_uint, &x, sizeof(x));
    }

    typename Traits::UInt x_uint;

-    bool is_negative() const
+    bool isNegative() const
    {
        return x_uint >> (Traits::bits - 1);
    }
@ -53,7 +53,7 @@ struct DecomposedFloat
    {
        return (exponent() == 0 && mantissa() == 0)
            ? 0
-            : (is_negative()
+            : (isNegative()
                ? -1
                : 1);
    }
@ -63,7 +63,7 @@ struct DecomposedFloat
        return (x_uint >> (Traits::mantissa_bits)) & (((1ull << (Traits::exponent_bits + 1)) - 1) >> 1);
    }

-    int16_t normalized_exponent() const
+    int16_t normalizedExponent() const
    {
        return int16_t(exponent()) - ((1ull << (Traits::exponent_bits - 1)) - 1);
    }
@ -73,20 +73,20 @@ struct DecomposedFloat
        return x_uint & ((1ull << Traits::mantissa_bits) - 1);
    }

-    int64_t mantissa_with_sign() const
+    int64_t mantissaWithSign() const
    {
-        return is_negative() ? -mantissa() : mantissa();
+        return isNegative() ? -mantissa() : mantissa();
    }

    /// NOTE Probably floating point instructions can be better.
-    bool is_integer_in_representable_range() const
+    bool isIntegerInRepresentableRange() const
    {
        return x_uint == 0
-            || (normalized_exponent() >= 0  /// The number is not less than one
+            || (normalizedExponent() >= 0  /// The number is not less than one
                /// The number is inside the range where every integer has exact representation in float
-                && normalized_exponent() <= static_cast<int16_t>(Traits::mantissa_bits)
+                && normalizedExponent() <= static_cast<int16_t>(Traits::mantissa_bits)
                /// After multiplying by 2^exp, the fractional part becomes zero, means the number is integer
-                && ((mantissa() & ((1ULL << (Traits::mantissa_bits - normalized_exponent())) - 1)) == 0));
+                && ((mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) == 0));
    }


@ -102,15 +102,15 @@ struct DecomposedFloat
            return sign();

        /// Different signs
-        if (is_negative() && rhs > 0)
+        if (isNegative() && rhs > 0)
            return -1;
-        if (!is_negative() && rhs < 0)
+        if (!isNegative() && rhs < 0)
            return 1;

        /// Fractional number with magnitude less than one
-        if (normalized_exponent() < 0)
+        if (normalizedExponent() < 0)
        {
-            if (!is_negative())
+            if (!isNegative())
                return rhs > 0 ? -1 : 1;
            else
                return rhs >= 0 ? -1 : 1;
@ -121,11 +121,11 @@ struct DecomposedFloat
        {
            if (rhs == std::numeric_limits<Int>::lowest())
            {
-                assert(is_negative());
+                assert(isNegative());

-                if (normalized_exponent() < static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
+                if (normalizedExponent() < static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
                    return 1;
-                if (normalized_exponent() > static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
+                if (normalizedExponent() > static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
                    return -1;

                if (mantissa() == 0)
@ -136,44 +136,44 @@ struct DecomposedFloat
        }

        /// Too large number: abs(float) > abs(rhs). Also the case with infinities and NaN.
-        if (normalized_exponent() >= static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
-            return is_negative() ? -1 : 1;
+        if (normalizedExponent() >= static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
+            return isNegative() ? -1 : 1;

        using UInt = std::conditional_t<(sizeof(Int) > sizeof(typename Traits::UInt)), make_unsigned_t<Int>, typename Traits::UInt>;
        UInt uint_rhs = rhs < 0 ? -rhs : rhs;

        /// Smaller octave: abs(rhs) < abs(float)
        /// FYI, TIL: octave is also called "binade", https://en.wikipedia.org/wiki/Binade
-        if (uint_rhs < (static_cast<UInt>(1) << normalized_exponent()))
-            return is_negative() ? -1 : 1;
+        if (uint_rhs < (static_cast<UInt>(1) << normalizedExponent()))
+            return isNegative() ? -1 : 1;

        /// Larger octave: abs(rhs) > abs(float)
-        if (normalized_exponent() + 1 < static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>)
-            && uint_rhs >= (static_cast<UInt>(1) << (normalized_exponent() + 1)))
-            return is_negative() ? 1 : -1;
+        if (normalizedExponent() + 1 < static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>)
+            && uint_rhs >= (static_cast<UInt>(1) << (normalizedExponent() + 1)))
+            return isNegative() ? 1 : -1;

        /// The same octave
-        /// uint_rhs == 2 ^ normalized_exponent + mantissa * 2 ^ (normalized_exponent - mantissa_bits)
+        /// uint_rhs == 2 ^ normalizedExponent + mantissa * 2 ^ (normalizedExponent - mantissa_bits)

-        bool large_and_always_integer = normalized_exponent() >= static_cast<int16_t>(Traits::mantissa_bits);
+        bool large_and_always_integer = normalizedExponent() >= static_cast<int16_t>(Traits::mantissa_bits);

        UInt a = large_and_always_integer
-            ? static_cast<UInt>(mantissa()) << (normalized_exponent() - Traits::mantissa_bits)
-            : static_cast<UInt>(mantissa()) >> (Traits::mantissa_bits - normalized_exponent());
+            ? static_cast<UInt>(mantissa()) << (normalizedExponent() - Traits::mantissa_bits)
+            : static_cast<UInt>(mantissa()) >> (Traits::mantissa_bits - normalizedExponent());

-        UInt b = uint_rhs - (static_cast<UInt>(1) << normalized_exponent());
+        UInt b = uint_rhs - (static_cast<UInt>(1) << normalizedExponent());

        if (a < b)
-            return is_negative() ? 1 : -1;
+            return isNegative() ? 1 : -1;
        if (a > b)
-            return is_negative() ? -1 : 1;
+            return isNegative() ? -1 : 1;

        /// Float has no fractional part means that the numbers are equal.
-        if (large_and_always_integer || (mantissa() & ((1ULL << (Traits::mantissa_bits - normalized_exponent())) - 1)) == 0)
+        if (large_and_always_integer || (mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) == 0)
            return 0;
        else
            /// Float has fractional part means its abs value is larger.
-            return is_negative() ? -1 : 1;
+            return isNegative() ? -1 : 1;
    }


--- a/base/base/JSON.h
+++ b/base/base/JSON.h
@ -38,6 +38,7 @@
  */


+// NOLINTBEGIN(google-explicit-constructor)
 #ifdef __clang__
 #  pragma clang diagnostic push
 #  pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
@ -46,6 +47,7 @@ POCO_DECLARE_EXCEPTION(Foundation_API, JSONException, Poco::Exception)
 #ifdef __clang__
 #  pragma clang diagnostic pop
 #endif
+// NOLINTEND(google-explicit-constructor)

 class JSON
 {
@ -61,7 +63,7 @@ public:
        checkInit();
    }

-    JSON(const std::string & s) : ptr_begin(s.data()), ptr_end(s.data() + s.size()), level(0)
+    explicit JSON(std::string_view s) : ptr_begin(s.data()), ptr_end(s.data() + s.size()), level(0)
    {
        checkInit();
    }
@ -71,13 +73,7 @@ public:
        *this = rhs;
    }

-    JSON & operator=(const JSON & rhs)
-    {
-        ptr_begin = rhs.ptr_begin;
-        ptr_end = rhs.ptr_end;
-        level = rhs.level;
-        return *this;
-    }
+    JSON & operator=(const JSON & rhs) = default;

    const char * data() const { return ptr_begin; }
    const char * dataEnd() const { return ptr_end; }
@ -169,7 +165,7 @@ public:

    /// Перейти к следующему элементу массива или следующей name-value паре объекта.
    iterator & operator++();
-    iterator operator++(int);
+    iterator operator++(int); // NOLINT(cert-dcl21-cpp)

    /// Есть ли в строке escape-последовательности
    bool hasEscapes() const;
--- a/base/base/arithmeticOverflow.h
+++ b/base/base/arithmeticOverflow.h
@ -3,6 +3,7 @@
 #include <base/extended_types.h>
 #include <base/defines.h>

+// NOLINTBEGIN(google-runtime-int)

 namespace common
 {
@ -206,3 +207,5 @@ namespace common
        return false;
    }
 }
+
+// NOLINTEND(google-runtime-int)
--- a/base/base/bit_cast.h
+++ b/base/base/bit_cast.h
@ -1,6 +1,6 @@
 #pragma once

-#include <string.h>
+#include <cstring>
 #include <algorithm>
 #include <type_traits>

--- a/base/base/defines.h
+++ b/base/base/defines.h
@ -143,8 +143,8 @@

 /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
 /// Consider adding a comment before using these macros.
-#   define TSA_SUPPRESS_WARNING_FOR_READ(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()
-#   define TSA_SUPPRESS_WARNING_FOR_WRITE(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()
+#   define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }())
+#   define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }())

 /// This macro is useful when only one thread writes to a member
 /// and you want to read this member from the same thread without locking a mutex.
--- a/base/base/extended_types.h
+++ b/base/base/extended_types.h
@ -5,7 +5,6 @@
 #include <base/types.h>
 #include <base/wide_integer.h>

-
 using Int128 = wide::integer<128, signed>;
 using UInt128 = wide::integer<128, unsigned>;
 using Int256 = wide::integer<256, signed>;
@ -18,7 +17,7 @@ static_assert(sizeof(UInt256) == 32);
 /// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior.
 /// So instead of using the std type_traits, we use our own version which allows extension.
 template <typename T>
-struct is_signed
+struct is_signed // NOLINT(readability-identifier-naming)
 {
    static constexpr bool value = std::is_signed_v<T>;
 };
@ -30,7 +29,7 @@ template <typename T>
 inline constexpr bool is_signed_v = is_signed<T>::value;

 template <typename T>
-struct is_unsigned
+struct is_unsigned // NOLINT(readability-identifier-naming)
 {
    static constexpr bool value = std::is_unsigned_v<T>;
 };
@ -51,7 +50,7 @@ template <class T> concept is_integer =
 template <class T> concept is_floating_point = std::is_floating_point_v<T>;

 template <typename T>
-struct is_arithmetic
+struct is_arithmetic // NOLINT(readability-identifier-naming)
 {
    static constexpr bool value = std::is_arithmetic_v<T>;
 };
@ -66,9 +65,9 @@ template <typename T>
 inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;

 template <typename T>
-struct make_unsigned
+struct make_unsigned // NOLINT(readability-identifier-naming)
 {
-    typedef std::make_unsigned_t<T> type;
+    using type = std::make_unsigned_t<T>;
 };

 template <> struct make_unsigned<Int128> { using type = UInt128; };
@ -79,9 +78,9 @@ template <> struct make_unsigned<UInt256> { using type = UInt256; };
 template <typename T> using make_unsigned_t = typename make_unsigned<T>::type;

 template <typename T>
-struct make_signed
+struct make_signed // NOLINT(readability-identifier-naming)
 {
-    typedef std::make_signed_t<T> type;
+    using type = std::make_signed_t<T>;
 };

 template <> struct make_signed<Int128>  { using type = Int128; };
@ -92,7 +91,7 @@ template <> struct make_signed<UInt256> { using type = Int256; };
 template <typename T> using make_signed_t = typename make_signed<T>::type;

 template <typename T>
-struct is_big_int
+struct is_big_int // NOLINT(readability-identifier-naming)
 {
    static constexpr bool value = false;
 };
@ -104,4 +103,3 @@ template <> struct is_big_int<UInt256> { static constexpr bool value = true; };

 template <typename T>
 inline constexpr bool is_big_int_v = is_big_int<T>::value;
-
--- a/base/base/find_symbols.h
+++ b/base/base/find_symbols.h
@ -15,7 +15,7 @@
  *
  * Allow to search for next character from the set of 'symbols...' in a string.
  * It is similar to 'strpbrk', 'strcspn' (and 'strchr', 'memchr' in the case of one symbol and '\0'),
-  * but with the following differencies:
+  * but with the following differences:
  * - works with any memory ranges, including containing zero bytes;
  * - doesn't require terminating zero byte: end of memory range is passed explicitly;
  * - if not found, returns pointer to end instead of nullptr;
--- a/base/base/iostream_debug_helpers.h
+++ b/base/base/iostream_debug_helpers.h
@ -120,6 +120,7 @@ Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t<decltype(dumpImpl<p
    return dumpImpl<priority>(out, x);
 }

+// NOLINTNEXTLINE(google-explicit-constructor)
 struct LowPriority { LowPriority(void *) {} };

 template <int priority, typename Out, typename T>
--- a/base/base/itoa.h
+++ b/base/base/itoa.h
@ -91,10 +91,10 @@ template <size_t N>
 using DivisionBy10PowN = typename SelectType
 <
    N,
-    Division<uint8_t, 0, 205U, 11>,                           /// divide by 10
-    Division<uint16_t, 1, 41943U, 22>,                        /// divide by 100
-    Division<uint32_t, 0, 3518437209U, 45>,                   /// divide by 10000
-    Division<uint64_t, 0, 12379400392853802749ULL, 90>        /// divide by 100000000
+    Division<uint8_t, false, 205U, 11>,                           /// divide by 10
+    Division<uint16_t, true, 41943U, 22>,                         /// divide by 100
+    Division<uint32_t, false, 3518437209U, 45>,                   /// divide by 10000
+    Division<uint64_t, false, 12379400392853802749ULL, 90>        /// divide by 100000000
 >::Result;

 template <size_t N>
@ -352,7 +352,7 @@ static inline char * writeUIntText(T x, char * p)
    static_assert(is_unsigned_v<T>);

    int len = digits10(x);
-    auto pp = p + len;
+    auto * pp = p + len;
    while (x >= 100)
    {
        const auto i = x % 100;
--- a/base/base/scope_guard.h
+++ b/base/base/scope_guard.h
@ -5,13 +5,13 @@
 #include <utility>

 template <class F>
-class [[nodiscard]] basic_scope_guard
+class [[nodiscard]] BasicScopeGuard
 {
 public:
-    constexpr basic_scope_guard() = default;
-    constexpr basic_scope_guard(basic_scope_guard && src) : function{src.release()} {}
+    constexpr BasicScopeGuard() = default;
+    constexpr BasicScopeGuard(BasicScopeGuard && src) : function{src.release()} {} // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor)

-    constexpr basic_scope_guard & operator=(basic_scope_guard && src)
+    constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor)
    {
        if (this != &src)
        {
@ -23,11 +23,11 @@ public:

    template <typename G>
    requires std::is_convertible_v<G, F>
-    constexpr basic_scope_guard(basic_scope_guard<G> && src) : function{src.release()} {}
+    constexpr BasicScopeGuard(BasicScopeGuard<G> && src) : function{src.release()} {} // NOLINT(google-explicit-constructor)

    template <typename G>
    requires std::is_convertible_v<G, F>
-    constexpr basic_scope_guard & operator=(basic_scope_guard<G> && src)
+    constexpr BasicScopeGuard & operator=(BasicScopeGuard<G> && src)
    {
        if (this != &src)
        {
@ -39,13 +39,13 @@ public:

    template <typename G>
    requires std::is_convertible_v<G, F>
-    constexpr basic_scope_guard(const G & function_) : function{function_} {}
+    constexpr BasicScopeGuard(const G & function_) : function{function_} {} // NOLINT(google-explicit-constructor)

    template <typename G>
    requires std::is_convertible_v<G, F>
-    constexpr basic_scope_guard(G && function_) : function{std::move(function_)} {}
+    constexpr BasicScopeGuard(G && function_) : function{std::move(function_)} {} // NOLINT(google-explicit-constructor, bugprone-forwarding-reference-overload, bugprone-move-forwarding-reference)

-    ~basic_scope_guard() { invoke(); }
+    ~BasicScopeGuard() { invoke(); }

    static constexpr bool is_nullable = std::is_constructible_v<bool, F>;

@ -70,7 +70,7 @@ public:

    template <typename G>
    requires std::is_convertible_v<G, F>
-    basic_scope_guard<F> & join(basic_scope_guard<G> && other)
+    BasicScopeGuard<F> & join(BasicScopeGuard<G> && other)
    {
        if (other.function)
        {
@ -102,14 +102,13 @@ private:
    F function = F{};
 };

-using scope_guard = basic_scope_guard<std::function<void(void)>>;
+using scope_guard = BasicScopeGuard<std::function<void(void)>>;


 template <class F>
-inline basic_scope_guard<F> make_scope_guard(F && function_) { return std::forward<F>(function_); }
+inline BasicScopeGuard<F> make_scope_guard(F && function_) { return std::forward<F>(function_); }

 #define SCOPE_EXIT_CONCAT(n, ...) \
 const auto scope_exit##n = make_scope_guard([&] { __VA_ARGS__; })
 #define SCOPE_EXIT_FWD(n, ...) SCOPE_EXIT_CONCAT(n, __VA_ARGS__)
 #define SCOPE_EXIT(...) SCOPE_EXIT_FWD(__LINE__, __VA_ARGS__)
-
--- a/base/base/sort.h
+++ b/base/base/sort.h
@ -14,7 +14,7 @@ template <typename Comparator>
 class DebugLessComparator
 {
 public:
-    constexpr DebugLessComparator(Comparator & cmp_)
+    constexpr DebugLessComparator(Comparator & cmp_) // NOLINT(google-explicit-constructor)
        : cmp(cmp_)
    {}

--- a/base/base/strong_typedef.h
+++ b/base/base/strong_typedef.h
@ -34,8 +34,10 @@ public:
    template <class Enable = typename std::is_move_assignable<T>::type>
    Self & operator=(T && rhs) { t = std::move(rhs); return *this;}

+    // NOLINTBEGIN(google-explicit-constructor)
    operator const T & () const { return t; }
    operator T & () { return t; }
+    // NOLINTEND(google-explicit-constructor)

    bool operator==(const Self & rhs) const { return t == rhs.t; }
    bool operator<(const Self & rhs) const { return t < rhs.t; }
@ -58,7 +60,10 @@ namespace std
    };
 }

+// NOLINTBEGIN(bugprone-macro-parentheses)
+
 #define STRONG_TYPEDEF(T, D) \
    struct D ## Tag {}; \
    using D = StrongTypedef<T, D ## Tag>; \

+// NOLINTEND(bugprone-macro-parentheses)
--- a/base/base/unit.h
+++ b/base/base/unit.h
@ -10,9 +10,11 @@ constexpr size_t GiB = 1024 * MiB;
 #  pragma clang diagnostic ignored "-Wreserved-identifier"
 #endif

+// NOLINTBEGIN(google-runtime-int)
 constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; }
 constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; }
 constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; }
+// NOLINTEND(google-runtime-int)

 #ifdef HAS_RESERVED_IDENTIFIER
 #  pragma clang diagnostic pop
--- a/base/base/wide_integer_to_string.h
+++ b/base/base/wide_integer_to_string.h
@ -51,8 +51,8 @@ struct fmt::formatter<wide::integer<Bits, Signed>>
 {
    constexpr auto parse(format_parse_context & ctx)
    {
-        auto it = ctx.begin();
-        auto end = ctx.end();
+        const auto * it = ctx.begin();
+        const auto * end = ctx.end();

        /// Only support {}.
        if (it != end && *it != '}')
--- a/base/glibc-compatibility/memcpy/memcpy.h
+++ b/base/glibc-compatibility/memcpy/memcpy.h
@ -63,7 +63,7 @@
  * Very large size of memcpy typically indicates suboptimal (not cache friendly) algorithms in code or unrealistic scenarios,
  * so we don't pay attention to using non-temporary stores.
  *
-  * On recent Intel CPUs, the presence of "erms" makes "rep movsb" the most benefitial,
+  * On recent Intel CPUs, the presence of "erms" makes "rep movsb" the most beneficial,
  * even comparing to non-temporary aligned unrolled stores even with the most wide registers.
  *
  * memcpy can be written in asm, C or C++. The latter can also use inline asm.
@ -214,4 +214,3 @@ tail:

    return ret;
 }
-
--- a/base/pcg-random/pcg_extras.hpp
+++ b/base/pcg-random/pcg_extras.hpp
@ -49,6 +49,8 @@
    #include <cxxabi.h>
 #endif

+// NOLINTBEGIN(readability-identifier-naming, modernize-use-using, bugprone-macro-parentheses, google-explicit-constructor)
+
 /*
 * Abstractions for compiler-specific directives
 */
@ -90,8 +92,6 @@
    #define PCG_EMULATED_128BIT_MATH 1
 #endif

-// NOLINTBEGIN(*)
-
 namespace pcg_extras {

 /*
@ -553,6 +553,6 @@ std::ostream& operator<<(std::ostream& out, printable_typename<T>) {

 } // namespace pcg_extras

-// NOLINTEND(*)
+// NOLINTEND(readability-identifier-naming, modernize-use-using, bugprone-macro-parentheses, google-explicit-constructor)

 #endif // PCG_EXTRAS_HPP_INCLUDED
--- a/base/pcg-random/pcg_random.hpp
+++ b/base/pcg-random/pcg_random.hpp
@ -101,7 +101,7 @@
 #endif

 /*
- * The pcg_extras namespace contains some support code that is likley to
+ * The pcg_extras namespace contains some support code that is likely to
 * be useful for a variety of RNGs, including:
 *      - 128-bit int support for platforms where it isn't available natively
 *      - bit twiddling operations
--- a/base/pcg-random/pcg_uint128.hpp
+++ b/base/pcg-random/pcg_uint128.hpp
@ -22,7 +22,7 @@
 /*
 * This code provides a a C++ class that can provide 128-bit (or higher)
 * integers.  To produce 2K-bit integers, it uses two K-bit integers,
- * placed in a union that allowes the code to also see them as four K/2 bit
+ * placed in a union that allows the code to also see them as four K/2 bit
 * integers (and access them either directly name, or by index).
 *
 * It may seem like we're reinventing the wheel here, because several
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit 33f60f961d4914441b684af43e9e5535078ba54b
+Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc
--- a/contrib/datasketches-cpp
+++ b/contrib/datasketches-cpp
@ -1 +1 @@
-Subproject commit 7d73d7610db31d4e1ecde0fb3a7ee90ef371207f
+Subproject commit 7abd49bb2e72bf9a5029993d31dcb1872da88292
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -54,9 +54,8 @@ set(SRCS
 add_library(cxx ${SRCS})
 set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")

-target_include_directories(cxx SYSTEM BEFORE PUBLIC
-        $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>
-        $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}>/src)
+target_include_directories(cxx SYSTEM BEFORE PRIVATE $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/src>)
+target_include_directories(cxx SYSTEM BEFORE PUBLIC  $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)

 # Enable capturing stack traces for all exceptions.
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -83,5 +83,8 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
        --yes --no-install-recommends \
    && apt-get clean

+# for external_symbolizer_path
+RUN ln -s /usr/bin/llvm-symbolizer-15 /usr/bin/llvm-symbolizer
+
 COPY build.sh /
 CMD ["bash", "-c", "/build.sh 2>&1"]
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -31,9 +31,6 @@ ARG deb_location_url=""

 # set non-empty single_binary_location_url to create docker image
 # from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
-# for example (run on aarch64 server):
-# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
-# note: clickhouse-odbc-bridge is not supported there.
 ARG single_binary_location_url=""

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@ -37,7 +37,6 @@ if [ -n "$ERROR_LOG_PATH" ]; then ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH")";
 FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"

 # There could be many disks declared in config
-readarray -t FILESYSTEM_CACHE_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.data_cache_path' || true)
 readarray -t DISKS_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.path' || true)

 CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
@ -51,7 +50,6 @@ for dir in "$DATA_DIR" \
  "$TMP_DIR" \
  "$USER_PATH" \
  "$FORMAT_SCHEMA_PATH" \
-  "${FILESYSTEM_CACHE_PATHS[@]}" \
  "${DISKS_PATHS[@]}"
 do
    # check if variable not empty
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -17,7 +17,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    python3-pip \
    shellcheck \
    yamllint \
-    && pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2 \
+    && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory PyGithub unidiff pylint==2.6.2 \
    && apt-get clean \
    && rm -rf /root/.cache/pip

--- a/docs/changelogs/v22.8.4.7-lts.md
+++ b/docs/changelogs/v22.8.4.7-lts.md
@ -0,0 +1,18 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.8.4.7-lts (baad27bcd2f) FIXME as compared to v22.8.3.13-lts (6a15b73faea)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#40760](https://github.com/ClickHouse/ClickHouse/issues/40760): Fix possible error 'Decimal math overflow' while parsing DateTime64. [#40546](https://github.com/ClickHouse/ClickHouse/pull/40546) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#40811](https://github.com/ClickHouse/ClickHouse/issues/40811): In [#40595](https://github.com/ClickHouse/ClickHouse/issues/40595) it was reported that the `host_regexp` functionality was not working properly with a name to address resolution in `/etc/hosts`. It's fixed. [#40769](https://github.com/ClickHouse/ClickHouse/pull/40769) ([Arthur Passos](https://github.com/arthurpassos)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@ -37,7 +37,7 @@ sudo xcode-select --install

 ``` bash
 brew update
-brew install cmake ninja libtool gettext llvm gcc binutils grep findutils
+brew install ccache cmake ninja libtool gettext llvm gcc binutils grep findutils
 ```

 ## Checkout ClickHouse Sources {#checkout-clickhouse-sources}
--- a/docs/en/engines/database-engines/replicated.md
+++ b/docs/en/engines/database-engines/replicated.md
@ -12,7 +12,7 @@ One ClickHouse server can have multiple replicated databases running and updatin

 ## Creating a Database {#creating-a-database}
 ``` sql
-    CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
+CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
 ```

 **Engine Parameters**
@ -21,9 +21,7 @@ One ClickHouse server can have multiple replicated databases running and updatin
 -   `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`.
 -   `replica_name` — Replica name. Replica names must be different for all replicas of the same shard.

-:::warning
 For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database.
-:::

 ## Specifics and Recommendations {#specifics-and-recommendations}

--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@ -16,12 +16,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
    ...
-) ENGINE = EmbeddedRocksDB([ttl]) PRIMARY KEY(primary_key_name)
+) ENGINE = EmbeddedRocksDB([ttl, rocksdb_dir, read_only]) PRIMARY KEY(primary_key_name)
 ```

 Engine parameters:

 - `ttl` - time to live for values. TTL is accepted in seconds. If TTL is 0, regular RocksDB instance is used (without TTL).
+- `rocksdb_dir` - path to the directory of an existed RocksDB or the destination path of the created RocksDB. Open the table with the specified `rocksdb_dir`.
+- `read_only` - when `read_only` is set to true, read-only mode is used. For storage with TTL, compaction will not be triggered (neither manual nor automatic), so no expired entries are removed.
 - `primary_key_name` – any column name in the column list.
 - `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
 - columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
--- a/docs/en/engines/table-engines/special/generate.md
+++ b/docs/en/engines/table-engines/special/generate.md
@ -15,7 +15,7 @@ Usage examples:
 ## Usage in ClickHouse Server {#usage-in-clickhouse-server}

 ``` sql
-ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length)
+ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length])
 ```

 The `max_array_length` and `max_string_length` parameters specify maximum length of all
--- a/docs/en/getting-started/example-datasets/cell-towers.md
+++ b/docs/en/getting-started/example-datasets/cell-towers.md
@ -13,7 +13,7 @@ OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4

 ## Get the Dataset {#get-the-dataset}

-1. Download the snapshot of the dataset from February 2021: [https://datasets.clickhouse.com/cell_towers.csv.xz] (729 MB).
+1. Download the snapshot of the dataset from February 2021: [cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB).

 2. Validate the integrity (optional step):
 ```
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@ -175,6 +175,10 @@ You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP
 - `br`
 - `deflate`
 - `xz`
+- `zstd`
+- `lz4`
+- `bz2`
+- `snappy`

 To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
 In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
--- a/docs/en/operations/access-rights.md
+++ b/docs/en/operations/access-rights.md
@ -151,4 +151,3 @@ Management queries:

    By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.

-[Original article](https://clickhouse.com/docs/en/operations/access_rights/) <!--hide-->
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@ -1,10 +1,10 @@
 ---
 slug: /en/operations/backup
 sidebar_position: 49
-sidebar_label: Data Backup
+sidebar_label: Data backup and restore
 ---

-# Data Backup
+# Data backup and restore

 While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented.

@ -16,21 +16,181 @@ Each company has different resources available and business requirements, so the
 Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly.
 :::

-## Duplicating Source Data Somewhere Else {#duplicating-source-data-somewhere-else}
+## Configure a backup destination
+
+In the examples below you will see the backup destination specified like `Disk('backups', '1.zip')`.  To prepare the destination add a file to `/etc/clickhouse-server/config.d/backup_disk.xml` specifying the backup destination.  For example, this file defines disk named `backups` and then adds that disk to the **backups > allowed_disk** list:
+
+```xml
+<clickhouse>
+    <storage_configuration>
+        <disks>
+<!--highlight-next-line -->
+            <backups>
+                <type>local</type>
+                <path>/backups/</path>
+            </backups>
+        </disks>
+    </storage_configuration>
+<!--highlight-start -->
+    <backups>
+        <allowed_disk>backups</allowed_disk>
+        <allowed_path>/backups/</allowed_path>
+    </backups>
+<!--highlight-end -->
+</clickhouse>
+```
+
+## Parameters
+
+Backups can be either full or incremental, and can include tables (including materialized views, projections, and dictionaries), and databases.  Backups can be synchronous (default) or asynchronous.  They can be compressed.  Backups can be password protected.
+
+The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a destination (or source), options and settings:
+- The destination for the backup, or the source for the restore.  This is based on the disk defined earlier.  For example `Disk('backups', 'filename.zip')`
+- ASYNC: backup or restore asynchronously
+- PARTITIONS: a list of partitions to restore
+- SETTINGS:
+    - [`compression_method`](en/sql-reference/statements/create/table/#column-compression-codecs) and compression_level
+    - `password` for the file on disk
+    - `base_backup`: the destination of the previous backup of this source.  For example, `Disk('backups', '1.zip')` 
+
+## Usage examples
+
+Backup and then restore a table:
+```
+BACKUP TABLE test.table TO Disk('backups', '1.zip')
+```
+
+Corresponding restore:
+```
+RESTORE TABLE test.table FROM Disk('backups', '1.zip')
+```
+
+:::note
+The above RESTORE would fail if the table `test.table` contains data, you would have to drop the table in order to test the RESTORE, or use the setting `allow_non_empty_tables=true`:
+```
+RESTORE TABLE test.table FROM Disk('backups', '1.zip') 
+SETTINGS allow_non_empty_tables=true
+```
+:::
+
+Tables can be restored, or backed up, with new names:
+```
+RESTORE TABLE test.table AS test.table2 FROM Disk('backups', '1.zip')
+```
+
+```
+BACKUP TABLE test.table3 AS test.table4 TO Disk('backups', '2.zip')
+```
+
+## Incremental backups
+
+Incremental backups can be taken by specifying the `base_backup`.
+:::note
+Incremental backups depend on the base backup.  The base backup must be kept available in order to be able to restore from an incremental backup.
+:::
+
+Incrementally store new data. The setting `base_backup` causes data since a previous backup to `Disk('backups', 'd.zip')` to be stored to `Disk('backups', 'incremental-a.zip')`:
+```
+BACKUP TABLE test.table TO Disk('backups', 'incremental-a.zip')
+  SETTINGS base_backup = Disk('backups', 'd.zip')
+```
+
+Restore all data from the incremental backup and the base_backup into a new table `test.table2`:
+```
+RESTORE TABLE test.table AS test.table2 
+  FROM Disk('backups', 'incremental-a.zip');
+```
+
+## Assign a password to the backup
+
+Backups written to disk can have a password applied to the file:
+```
+BACKUP TABLE test.table
+  TO Disk('backups', 'password-protected.zip')
+  SETTINGS password='qwerty'
+```
+
+Restore:
+```
+RESTORE TABLE test.table
+  FROM Disk('backups', 'password-protected.zip')
+  SETTINGS password='qwerty'
+```
+
+## Compression settings
+
+If you would like to specify the compression method or level:
+```
+BACKUP TABLE test.table
+  TO Disk('backups', 'filename.zip')
+  SETTINGS compression_method='lzma', compression_level=3
+```
+
+## Restore specific partitions
+If specific partitions associated with a table need to be restored these can be specified.  To restore partitions 1 and 4 from backup:
+```
+RESTORE TABLE test.table PARTITIONS '2', '3'
+  FROM Disk('backups', 'filename.zip')
+```
+
+## Check the status of backups
+
+The backup command returns an `id` and `status`, and that `id` can be used to get the status of the backup.  This is very useful to check the progress of long ASYNC backups.  The example below shows a failure that happened when trying to overwrite an existing backup file:
+```sql
+BACKUP TABLE helloworld.my_first_table TO Disk('backups', '1.zip') ASYNC
+```
+```response
+┌─id───────────────────────────────────┬─status──────────┐
+│ 7678b0b3-f519-4e6e-811f-5a0781a4eb52 │ CREATING_BACKUP │
+└──────────────────────────────────────┴─────────────────┘
+
+1 row in set. Elapsed: 0.001 sec.
+```
+
+```
+SELECT
+    *
+FROM system.backups
+where id='7678b0b3-f519-4e6e-811f-5a0781a4eb52'
+FORMAT Vertical
+```
+```response
+Row 1:
+──────
+id:                7678b0b3-f519-4e6e-811f-5a0781a4eb52
+name:              Disk('backups', '1.zip')
+#highlight-next-line
+status:            BACKUP_FAILED
+num_files:         0
+uncompressed_size: 0
+compressed_size:   0
+#highlight-next-line
+error:             Code: 598. DB::Exception: Backup Disk('backups', '1.zip') already exists. (BACKUP_ALREADY_EXISTS) (version 22.8.2.11 (official build))
+start_time:        2022-08-30 09:21:46
+end_time:          2022-08-30 09:21:46
+
+1 row in set. Elapsed: 0.002 sec.
+```
+
+## Alternatives
+
+ClickHouse stores data on disk, and there are many ways to backup disks.  These are some alternatives that have been used in the past, and that may fit in well in your environment.
+
+### Duplicating Source Data Somewhere Else {#duplicating-source-data-somewhere-else}

 Often data that is ingested into ClickHouse is delivered through some sort of persistent queue, such as [Apache Kafka](https://kafka.apache.org). In this case it is possible to configure an additional set of subscribers that will read the same data stream while it is being written to ClickHouse and store it in cold storage somewhere. Most companies already have some default recommended cold storage, which could be an object store or a distributed filesystem like [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html).

-## Filesystem Snapshots {#filesystem-snapshots}
+### Filesystem Snapshots {#filesystem-snapshots}

 Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](../engines/table-engines/special/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective.

-## clickhouse-copier {#clickhouse-copier}
+### clickhouse-copier {#clickhouse-copier}

 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.

 For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well.

-## Manipulations with Parts {#manipulations-with-parts}
+### Manipulations with Parts {#manipulations-with-parts}

 ClickHouse allows using the `ALTER TABLE ... FREEZE PARTITION ...` query to create a local copy of table partitions. This is implemented using hardlinks to the `/var/lib/clickhouse/shadow/` folder, so it usually does not consume extra disk space for old data. The created copies of files are not handled by ClickHouse server, so you can just leave them there: you will have a simple backup that does not require any additional external system, but it will still be prone to hardware issues. For this reason, it’s better to remotely copy them to another location and then remove the local copies. Distributed filesystems and object stores are still a good options for this, but normal attached file servers with a large enough capacity might work as well (in this case the transfer will occur via the network filesystem or maybe [rsync](https://en.wikipedia.org/wiki/Rsync)).
 Data can be restored from backup using the `ALTER TABLE ... ATTACH PARTITION ...`
@ -39,4 +199,3 @@ For more information about queries related to partition manipulations, see the [

 A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup).

-[Original article](https://clickhouse.com/docs/en/operations/backup/) <!--hide-->
--- a/docs/en/operations/quotas.md
+++ b/docs/en/operations/quotas.md
@ -2,10 +2,9 @@
 slug: /en/operations/quotas
 sidebar_position: 51
 sidebar_label: Quotas
+title: Quotas
 ---

-# Quotas
-
 Quotas allow you to limit resource usage over a period of time or track the use of resources.
 Quotas are set up in the user config, which is usually ‘users.xml’.

@ -118,4 +117,3 @@ For distributed query processing, the accumulated amounts are stored on the requ

 When the server is restarted, quotas are reset.

-[Original article](https://clickhouse.com/docs/en/operations/quotas/) <!--hide-->
--- a/docs/en/sql-reference/data-types/decimal.md
+++ b/docs/en/sql-reference/data-types/decimal.md
@ -46,7 +46,7 @@ Binary operations on Decimal result in wider result type (with any order of argu
 Rules for scale:

 -   add, subtract: S = max(S1, S2).
-   multuply: S = S1 + S2.
+-   multiply: S = S1 + S2.
 -   divide: S = S1.

 For similar operations between Decimal and integers, the result is Decimal of the same size as an argument.
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
@ -94,6 +94,21 @@ It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to onl
 -   If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value.
 -   If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value.
 -   If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time.
+    - Per default, this `WHERE`-condition is checked at the highest level of the SQL-Query. Alternatively, the condition can be checked in any other `WHERE`-clause within the query using the `{condition}`-keyword. Example:
+    ```sql
+    ...
+    SOURCE(CLICKHOUSE(... 
+        update_field 'added_time' 
+        QUERY '
+            SELECT my_arr.1 AS x, my_arr.2 AS y, creation_time 
+            FROM (
+                SELECT arrayZip(x_arr, y_arr) AS my_arr, creation_time 
+                FROM dictionary_source
+                WHERE {condition}
+            )'
+    ))
+    ...
+    ```

 If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data.

--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -267,7 +267,7 @@ Result:
 └────────────────┘
 ```

-:::Attention
+:::note
 The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday` functions described below is `Date` or `DateTime`.
 Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
 In case argument is out of normal range:
@ -1069,7 +1069,7 @@ Formats a Time according to the given Format string. Format is a constant expres
 **Syntax**

 ``` sql
-formatDateTime(Time, Format\[, Timezone\])
+formatDateTime(Time, Format[, Timezone])
 ```

 **Returned value(s)**
@ -1105,6 +1105,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %w       | weekday as a decimal number with Sunday as 0 (0-6)      | 2          |
 | %y       | Year, last two digits (00-99)                           | 18         |
 | %Y       | Year                                                    | 2018       |
+| %z       | Time offset from UTC as +HHMM or -HHMM                  | -0500      |
 | %%       | a % sign                                                | %          |

 **Example**
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -495,25 +495,23 @@ If the ‘s’ string is non-empty and does not contain the ‘c’ character at

 Returns the string ‘s’ that was converted from the encoding in ‘from’ to the encoding in ‘to’.

-## base58Encode(plaintext), base58Decode(encoded_text)
+## base58Encode(plaintext)

-Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.
+Accepts a String and encodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.

 **Syntax**

 ```sql
-base58Encode(decoded)
-base58Decode(encoded)
+base58Encode(plaintext)
 ```

 **Arguments**

- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant.
- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown.
+- `plaintext` — [String](../../sql-reference/data-types/string.md) column or constant.

 **Returned value**

-   A string containing encoded/decoded value of 1st argument.
+-   A string containing encoded value of 1st argument.

 Type: [String](../../sql-reference/data-types/string.md).

@ -523,17 +521,48 @@ Query:

 ``` sql
 SELECT base58Encode('Encoded');
+```
+
+Result:
+```text
+┌─base58Encode('Encoded')─┐
+│ 3dc8KtHrwM              │
+└─────────────────────────┘
+```
+
+## base58Decode(encoded_text)
+
+Accepts a String and decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.
+
+**Syntax**
+
+```sql
+base58Decode(encoded_text)
+```
+
+**Arguments**
+
+- `encoded_text` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown.
+
+**Returned value**
+
+-   A string containing decoded value of 1st argument.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+``` sql
 SELECT base58Decode('3dc8KtHrwM');
 ```

 Result:
 ```text
-┌─encodeBase58('Encoded')─┐
-│ 3dc8KtHrwM                         │
-└──────────────────────────────────┘
-┌─decodeBase58('3dc8KtHrwM')─┐
+┌─base58Decode('3dc8KtHrwM')─┐
 │ Encoded                    │
-└────────────────────────────────────┘
+└────────────────────────────┘
 ```

 ## base64Encode(s)
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@ -430,5 +430,119 @@ Result:
 └────────────────────────────┘
 ```  
  
+## mapApply  
+  
+**Syntax**
+
+```sql
+mapApply(func, map)
+```  
+  
+**Parameters**
+  
+-   `func`  - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
+-   `map` — [Map](../../sql-reference/data-types/map.md).
+
+**Returned value**
+
+- Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element.
+  
+**Example**
+
+Query:
+
+```sql
+SELECT mapApply((k, v) -> (k, v * 10), _map) AS r
+FROM
+(
+    SELECT map('key1', number, 'key2', number * 2) AS _map
+    FROM numbers(3)
+)
+```  
+  
+Result:  
+  
+```text
+┌─r─────────────────────┐
+│ {'key1':0,'key2':0}   │
+│ {'key1':10,'key2':20} │
+│ {'key1':20,'key2':40} │
+└───────────────────────┘
+```  
+
+## mapFilter  
+  
+**Syntax**
+
+```sql
+mapFilter(func, map)
+```  
+  
+**Parameters**
+
+-   `func`  - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
+-   `map` — [Map](../../sql-reference/data-types/map.md).  
+
+**Returned value**
+
+- Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0.
+  
+  
+**Example**
+
+Query:
+
+```sql
+SELECT mapFilter((k, v) -> ((v % 2) = 0), _map) AS r
+FROM
+(
+    SELECT map('key1', number, 'key2', number * 2) AS _map
+    FROM numbers(3)
+)
+```  
+  
+Result:  
+  
+```text
+┌─r───────────────────┐
+│ {'key1':0,'key2':0} │
+│ {'key2':2}          │
+│ {'key1':2,'key2':4} │
+└─────────────────────┘
+```  
+
+
+## mapUpdate  
+  
+**Syntax**
+
+```sql
+mapUpdate(map1, map2)
+```  
+  
+**Parameters**
+
+-   `map1` [Map](../../sql-reference/data-types/map.md).
+-   `map2` [Map](../../sql-reference/data-types/map.md).
+
+**Returned value**
+
+- Returns a map1 with values updated of values for the corresponding keys in map2.
+  
+**Example**
+
+Query:
+
+```sql
+SELECT mapUpdate(map('key1', 0, 'key3', 0), map('key1', 10, 'key2', 10)) AS map;
+```  
+  
+Result:  
+  
+```text
+┌─map────────────────────────────┐
+│ {'key3':0,'key1':10,'key2':10} │
+└────────────────────────────────┘
+```  

 [Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->
--- a/docs/en/sql-reference/functions/uniqtheta-functions.md
+++ b/docs/en/sql-reference/functions/uniqtheta-functions.md
@ -0,0 +1,94 @@
+---
+slug: /en/sql-reference/functions/uniqtheta-functions
+---
+
+# uniqTheta Functions
+
+uniqTheta functions work for two uniqThetaSketch objects to do set operation calculations such as  ∪ / ∩ / × (union/intersect/not), it is to return a new uniqThetaSketch object contain the result.
+
+A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
+
+UniqThetaSketch is a data structure storage of approximate values set.
+For more information on RoaringBitmap, see: [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html).
+
+## uniqThetaUnion
+
+Two uniqThetaSketch objects to do union calculation(set operation ∪), the result is a new uniqThetaSketch.
+
+``` sql
+uniqThetaUnion(uniqThetaSketch,uniqThetaSketch)
+```
+
+**Arguments**
+
+-   `uniqThetaSketch` – uniqThetaSketch object.
+
+**Example**
+
+``` sql
+select finalizeAggregation(uniqThetaUnion(a, b)) as a_union_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality
+from
+(select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b );
+```
+
+``` text
+┌─a_union_b─┬─a_cardinality─┬─b_cardinality─┐
+│         4 │             2 │             3 │
+└───────────┴───────────────┴───────────────┘
+```
+
+## uniqThetaIntersect
+
+Two uniqThetaSketch objects to do intersect calculation(set operation ∩), the result is a new uniqThetaSketch.
+
+``` sql
+uniqThetaIntersect(uniqThetaSketch,uniqThetaSketch)
+```
+
+**Arguments**
+
+-   `uniqThetaSketch` – uniqThetaSketch object.
+
+**Example**
+
+``` sql
+select finalizeAggregation(uniqThetaIntersect(a, b)) as a_intersect_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality
+from
+(select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b );
+```
+
+``` text
+┌─a_intersect_b─┬─a_cardinality─┬─b_cardinality─┐
+│             1 │             2 │             3 │
+└───────────────┴───────────────┴───────────────┘
+```
+
+## uniqThetaNot
+
+Two uniqThetaSketch objects to do a_not_b calculation(set operation ×), the result is a new uniqThetaSketch.
+
+``` sql
+uniqThetaNot(uniqThetaSketch,uniqThetaSketch)
+```
+
+**Arguments**
+
+-   `uniqThetaSketch` – uniqThetaSketch object.
+
+**Example**
+
+``` sql
+select finalizeAggregation(uniqThetaNot(a, b)) as a_not_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality
+from
+(select arrayReduce('uniqThetaState',[2,3,4]) as a, arrayReduce('uniqThetaState',[1,2]) as b );
+```
+
+``` text
+┌─a_not_b─┬─a_cardinality─┬─b_cardinality─┐
+│       2 │             3 │             2 │
+└─────────┴───────────────┴───────────────┘
+```
+
+**See Also**
+
+-   [uniqThetaSketch](../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
--- a/docs/en/sql-reference/statements/alter/constraint.md
+++ b/docs/en/sql-reference/statements/alter/constraint.md
@ -9,8 +9,8 @@ sidebar_label: CONSTRAINT
 Constraints could be added or deleted using following syntax:

 ``` sql
-ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression;
-ALTER TABLE [db].name DROP CONSTRAINT constraint_name;
+ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression;
+ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name;
 ```

 See more on [constraints](../../../sql-reference/statements/create/table.md#constraints).
--- a/docs/en/sql-reference/statements/alter/ttl.md
+++ b/docs/en/sql-reference/statements/alter/ttl.md
@ -11,7 +11,7 @@ sidebar_label: TTL
 You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:

 ``` sql
-ALTER TABLE table_name MODIFY TTL ttl_expression;
+ALTER TABLE [db.]table_name [ON CLUSTER cluster] MODIFY TTL ttl_expression;
 ```

 ## REMOVE TTL
@ -19,7 +19,7 @@ ALTER TABLE table_name MODIFY TTL ttl_expression;
 TTL-property can be removed from table with the following query:

 ```sql
-ALTER TABLE table_name REMOVE TTL
+ALTER TABLE [db.]table_name [ON CLUSTER cluster] REMOVE TTL
 ```

 **Example**
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@ -303,7 +303,7 @@ SHOW USERS

 ## SHOW ROLES

-Returns a list of [roles](../../operations/access-rights.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role-grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
+Returns a list of [roles](../../operations/access-rights.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).

 ### Syntax

--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@ -267,7 +267,7 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
 └────────────────┘
 ```

-:::Attention
+:::note
 Тип возвращаемого описанными далее функциями `toStartOf*`, `toMonday` значения - `Date` или `DateTime`.
 Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
 Возвращаемые значения для значений вне нормального диапазона:
@ -277,7 +277,7 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
 * `2149-05-31` будет результатом функции `toLastDayOfMonth` при обработке аргумента больше `2149-05-31`.
 :::

-:::Attention
+:::note
 Тип возвращаемого описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday` значения - `Date` или `DateTime`.
 Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
 Возвращаемые значения для значений вне нормального диапазона:
@ -1017,7 +1017,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
 **Синтаксис**

 ``` sql
-formatDateTime(Time, Format\[, Timezone\])
+formatDateTime(Time, Format[, Timezone])
 ```

 **Возвращаемое значение**
--- a/docs/ru/sql-reference/statements/alter/constraint.md
+++ b/docs/ru/sql-reference/statements/alter/constraint.md
@ -11,8 +11,8 @@ sidebar_label: "Манипуляции с ограничениями"
 Добавить или удалить ограничение можно с помощью запросов

 ``` sql
-ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression;
-ALTER TABLE [db].name DROP CONSTRAINT constraint_name;
+ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression;
+ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name;
 ```

 Запросы выполняют добавление или удаление метаданных об ограничениях таблицы `[db].name`, поэтому выполняются мгновенно.
--- a/docs/ru/sql-reference/statements/alter/ttl.md
+++ b/docs/ru/sql-reference/statements/alter/ttl.md
@ -11,7 +11,7 @@ sidebar_label: TTL
 Вы можете изменить [TTL для таблицы](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl) запросом следующего вида:

 ``` sql
-ALTER TABLE table-name MODIFY TTL ttl-expression
+ALTER TABLE [db.]table-name [ON CLUSTER cluster] MODIFY TTL ttl-expression
 ```

 ## REMOVE TTL {#remove-ttl}
@ -19,7 +19,7 @@ ALTER TABLE table-name MODIFY TTL ttl-expression
 Удалить табличный TTL можно запросом следующего вида:

 ```sql
-ALTER TABLE table_name REMOVE TTL
+ALTER TABLE [db.]table_name [ON CLUSTER cluster] REMOVE TTL
 ```

 **Пример**
--- a/docs/ru/sql-reference/statements/show.md
+++ b/docs/ru/sql-reference/statements/show.md
@ -305,7 +305,7 @@ SHOW USERS

 ## SHOW ROLES {#show-roles-statement}

-Выводит список [ролей](../../operations/access-rights.md#role-management). Для просмотра параметров ролей, см. системные таблицы [system.roles](../../operations/system-tables/roles.md#system_tables-roles) и [system.role-grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
+Выводит список [ролей](../../operations/access-rights.md#role-management). Для просмотра параметров ролей, см. системные таблицы [system.roles](../../operations/system-tables/roles.md#system_tables-roles) и [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).

 ### Синтаксис {#show-roles-syntax}

--- a/docs/zh/development/tests.md
+++ b/docs/zh/development/tests.md
@ -1,338 +1,297 @@
 ---
-slug: /zh/development/tests
+slug: /en/development/tests
+sidebar_position: 70
+sidebar_label: Testing
+title: ClickHouse Testing
+description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
 ---
-# ClickHouse 测试 {#clickhouse-testing}

-## 功能测试 {#functional-tests}
+## Functional Tests

-功能测试使用起来最简单方便. 大多数 ClickHouse 特性都可以通过功能测试进行测试, 并且对于可以通过功能测试进行测试的 ClickHouse 代码的每一个更改, 都必须使用这些特性
+Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.

-每个功能测试都会向正在运行的 ClickHouse 服务器发送一个或多个查询, 并将结果与参考进行比较.
+Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference.

-测试位于 `查询` 目录中. 有两个子目录: `无状态` 和 `有状态`. 无状态测试在没有任何预加载测试数据的情况下运行查询 - 它们通常在测试本身内即时创建小型合成数据集. 状态测试需要来自 Yandex.Metrica 的预加载测试数据, 它对公众开放.
+Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public.

-每个测试可以是两种类型之一: `.sql` 和 `.sh`. `.sql` 测试是简单的 SQL 脚本, 它通过管道传输到  `clickhouse-client --multiquery --testmode`. `.sh` 测试是一个自己运行的脚本. SQL 测试通常比 `.sh` 测试更可取. 仅当您必须测试某些无法从纯 SQL 中执行的功能时才应使用 `.sh` 测试, 例如将一些输入数据传送到 `clickhouse-client` 或测试 `clickhouse-local`.
+Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.

-### 在本地运行测试 {#functional-test-locally}
+### Running a Test Locally {#functional-test-locally}

-在本地启动ClickHouse服务器, 监听默认端口(9000). 例如, 要运行测试 `01428_hash_set_nan_key`, 请切换到存储库文件夹并运行以下命令:
+Start the ClickHouse server locally, listening on the default port (9000). To
+run, for example, the test `01428_hash_set_nan_key`, change to the repository
+folder and run the following command:

 ```
 PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
 ```

-有关更多选项, 请参阅`tests/clickhouse-test --help`. 您可以简单地运行所有测试或运行由测试名称中的子字符串过滤的测试子集：`./clickhouse-test substring`. 还有并行或随机顺序运行测试的选项.
+For more options, see `tests/clickhouse-test --help`. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. There are also options to run tests in parallel or in randomized order.

-### 添加新测试 {#adding-new-test}
+### Adding a New Test

-添加新的测试, 在 `queries/0_stateless` 目录下创建 `.sql` 或 `.sh` 文件, 手动检查, 然后通过以下方式生成`.reference`文件：`clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` 或 `./00000_test.sh > ./00000_test.reference`.
+To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.

-测试应仅使用(创建、删除等)`test` 数据库中假定已预先创建的表; 测试也可以使用临时表.
+Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.

-### 选择测试名称 {#choosing-test-name}
+### Choosing the Test Name

-测试名称以五位数前缀开头, 后跟描述性名称, 例如 `00422_hash_function_constexpr.sql`. 要选择前缀, 请找到目录中已存在的最大前缀, 并将其加一. 在此期间, 可能会添加一些具有相同数字前缀的其他测试, 但这没关系并且不会导致任何问题, 您以后不必更改它.
+The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.

-一些测试的名称中标有 `zookeeper`、`shard` 或 `long` . `zookeeper` 用于使用 ZooKeeper 的测试. `shard` 用于需要服务器监听 `127.0.0.*` 的测试; `distributed` 或 `global` 具有相同的含义. `long` 用于运行时间稍长于一秒的测试. Yo你可以分别使用 `--no-zookeeper`、`--no-shard` 和 `--no-long` 选项禁用这些测试组. 如果需要 ZooKeeper 或分布式查询，请确保为您的测试名称添加适当的前缀.
+Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries.

-### 检查必须发生的错误 {#checking-error-must-occur}
+### Checking for an Error that Must Occur

-有时您想测试是否因不正确的查询而发生服务器错误. 我们支持在 SQL 测试中对此进行特殊注释, 形式如下:
+Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form:
 ```
 select x; -- { serverError 49 }
 ```
-此测试确保服务器返回关于未知列“x”的错误代码为 49. 如果没有错误, 或者错误不同, 则测试失败. 如果您想确保错误发生在客户端, 请改用 `clientError` 注释.
+This test ensures that the server returns an error with code 49 about unknown column `x`. If there is no error, or the error is different, the test will fail. If you want to ensure that an error occurs on the client side, use `clientError` annotation instead.

-不要检查错误消息的特定措辞, 它将来可能会发生变化, 并且测试将不必要地中断. 只检查错误代码. 如果现有的错误代码不足以满足您的需求, 请考虑添加一个新的.
+Do not check for a particular wording of error message, it may change in the future, and the test will needlessly break. Check only the error code. If the existing error code is not precise enough for your needs, consider adding a new one.

-### 测试分布式查询 {#testing-distributed-query}
+### Testing a Distributed Query

-如果你想在功能测试中使用分布式查询, 你可以使用 `127.0.0.{1..2}` 的地址, 以便服务器查询自己; 或者您可以在服务器配置文件中使用预定义的测试集群, 例如`test_shard_localhost`. 请记住在测试名称中添加 `shard` 或 `distributed` 字样, 以便它以正确的配置在 CI 中运行, 其中服务器配置为支持分布式查询.
+If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries.


-## 已知错误 {#known-bugs}
+## Known Bugs {#known-bugs}

-如果我们知道一些可以通过功能测试轻松重现的错误, 我们将准备好的功能测试放在 `tests/queries/bugs` 目录中. 修复错误后, 这些测试将移至 `tests/queries/0_stateless` .
+If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `tests/queries/bugs` directory. These tests will be moved to `tests/queries/0_stateless` when bugs are fixed.

-## 集成测试 {#integration-tests}
+## Integration Tests {#integration-tests}

-集成测试允许在集群配置中测试 ClickHouse 以及 ClickHouse 与其他服务器(如 MySQL、Postgres、MongoDB)的交互. 它们可以用来模拟网络分裂、丢包等情况. 这些测试在Docker下运行, 并使用各种软件创建多个容器.
+Integration tests allow testing ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software.

-有关如何运行这些测试, 请参阅 `tests/integration/README.md` .
+See `tests/integration/README.md` on how to run these tests.

-注意, ClickHouse与第三方驱动程序的集成没有经过测试. 另外, 我们目前还没有JDBC和ODBC驱动程序的集成测试.
+Note that integration of ClickHouse with third-party drivers is not tested. Also, we currently do not have integration tests with our JDBC and ODBC drivers.

-## 单元测试 {#unit-tests}
+## Unit Tests {#unit-tests}

-当您想测试的不是 ClickHouse 整体, 而是单个独立库或类时，单元测试很有用. 您可以使用 `ENABLE_TESTS` CMake 选项启用或禁用测试构建. 单元测试(和其他测试程序)位于代码中的 `tests` 子目录中. 要运行单元测试, 请键入 `ninja test` 。有些测试使用 `gtest` , 但有些程序在测试失败时会返回非零退出码.
+Unit tests are useful when you want to test not the ClickHouse as a whole, but a single isolated library or class. You can enable or disable build of tests with `ENABLE_TESTS` CMake option. Unit tests (and other test programs) are located in `tests` subdirectories across the code. To run unit tests, type `ninja test`. Some tests use `gtest`, but some are just programs that return non-zero exit code on test failure.

-如果代码已经被功能测试覆盖了, 就没有必要进行单元测试(而且功能测试通常更易于使用).
+It’s not necessary to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use).

-例如, 您可以通过直接调用可执行文件来运行单独的 gtest 检查:
+You can run individual gtest checks by calling the executable directly, for example:

 ```bash
 $ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
 ```

-## 性能测试 {#performance-tests}
+## Performance Tests {#performance-tests}

-性能测试允许测量和比较 ClickHouse 的某些孤立部分在合成查询上的性能. 测试位于 `tests/performance`. 每个测试都由带有测试用例描述的 `.xml` 文件表示. 测试使用 `docker/tests/performance-comparison` 工具运行. 请参阅自述文件以进行调用.
+Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Performance tests are located at `tests/performance/`. Each test is represented by an `.xml` file with a description of the test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.

-每个测试在循环中运行一个或多个查询(可能带有参数组合). 一些测试可以包含预加载测试数据集的先决条件.
+Each test run one or multiple queries (possibly with combinations of parameters) in a loop.

-如果您希望在某些场景中提高ClickHouse的性能，并且如果可以在简单的查询中观察到改进，那么强烈建议编写性能测试。在测试期间使用 `perf top` 或其他perf工具总是有意义的.
+If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. Also, it is recommended to write performance tests when you add or modify SQL functions which are relatively isolated and not too obscure. It always makes sense to use `perf top` or other `perf` tools during your tests.

-## 测试工具和脚本 {#test-tools-and-scripts}
+## Test Tools and Scripts {#test-tools-and-scripts}

-  `tests` 目录中的一些程序不是准备好的测试，而是测试工具. 例如, 对于 `Lexer`, 有一个工具 `src/Parsers/tests/lexer` , 它只是对标准输入进行标记化并将着色结果写入标准输出. 您可以将这些类型的工具用作代码示例以及用于探索和手动测试.
+Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing.

-## 其他测试 {#miscellaneous-tests}
+## Miscellaneous Tests {#miscellaneous-tests}

-在 `tests/external_models` 中有机器学习模型的测试. 这些测试不会更新, 必须转移到集成测试.
+There are tests for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests.

-仲裁插入有单独的测试. 该测试在不同的服务器上运行 ClickHouse 集群并模拟各种故障情况：网络分裂、丢包(ClickHouse 节点之间、ClickHouse 和 ZooKeeper 之间、ClickHouse 服务器和客户端之间等)、`kill -9`、`kill -STOP` 和 `kill -CONT` , 比如 [Jepsen](https://aphyr.com/tags/Jepsen). 然后测试检查所有已确认的插入是否已写入并且所有被拒绝的插入均未写入.
+There is separate test for quorum inserts. This test run ClickHouse cluster on separate servers and emulate various failure cases: network split, packet drop (between ClickHouse nodes, between ClickHouse and ZooKeeper, between ClickHouse server and client, etc.), `kill -9`, `kill -STOP` and `kill -CONT` , like [Jepsen](https://aphyr.com/tags/Jepsen). Then the test checks that all acknowledged inserts was written and all rejected inserts was not.

-在 ClickHouse 开源之前, Quorum 测试是由单独的团队编写的. 这个团队不再与ClickHouse合作. 测试碰巧是用Java编写的. 由于这些原因, 必须重写仲裁测试并将其转移到集成测试.
+Quorum test was written by separate team before ClickHouse was open-sourced. This team no longer work with ClickHouse. Test was accidentally written in Java. For these reasons, quorum test must be rewritten and moved to integration tests.

-## 手动测试 {#manual-testing}
+## Manual Testing {#manual-testing}

-当您开发一个新特性时, 手动测试它也是合理的. 您可以按照以下步骤进行操作:
+When you develop a new feature, it is reasonable to also test it manually. You can do it with the following steps:

-构建 ClickHouse. 从终端运行 ClickHouse：将目录更改为 `programs/clickhouse-server` 并使用 `./clickhouse-server` 运行它.  默认情况下, 它将使用当前目录中的配置(`config.xml`、`users.xml` 和`config.d` 和`users.d` 目录中的文件). 要连接到 ClickHouse 服务器, 请运行 `programs/clickhouse-client/clickhouse-client` .
+Build ClickHouse. Run ClickHouse from the terminal: change directory to `programs/clickhouse-server` and run it with `./clickhouse-server`. It will use configuration (`config.xml`, `users.xml` and files within `config.d` and `users.d` directories) from the current directory by default. To connect to ClickHouse server, run `programs/clickhouse-client/clickhouse-client`.

-请注意, 所有 clickhouse 工具(服务器、客户端等)都只是指向名为 `clickhouse` 的单个二进制文件的符号链接. 你可以在 `programs/clickhouse` 找到这个二进制文件. 所有工具也可以作为 `clickhouse tool` 而不是 `clickhouse-tool` 调用.
+Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`.

-或者, 您可以安装 ClickHouse 包: 从 Yandex 存储库稳定发布, 或者您可以在 ClickHouse 源根目录中使用 `./release` 为自己构建包. 然后使用 `sudo service clickhouse-server start` 启动服务器(或停止以停止服务器). 在 `/etc/clickhouse-server/clickhouse-server.log` 中查找日志.
+Alternatively you can install ClickHouse package: either stable release from ClickHouse repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo clickhouse start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`.

-当您的系统上已经安装了 ClickHouse 时，您可以构建一个新的 `clickhouse` 二进制文件并替换现有的二进制文件:
+When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary:

 ``` bash
-$ sudo service clickhouse-server stop
+$ sudo clickhouse stop
 $ sudo cp ./clickhouse /usr/bin/
-$ sudo service clickhouse-server start
+$ sudo clickhouse start
 ```

-您也可以停止系统 clickhouse-server 并使用相同的配置运行您自己的服务器, 但登录到终端:
+Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal:

 ``` bash
-$ sudo service clickhouse-server stop
+$ sudo clickhouse stop
 $ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
 ```

-使用 gdb 的示例:
+Example with gdb:

 ``` bash
 $ sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
 ```

-如果系统 clickhouse-server 已经在运行并且你不想停止它, 你可以在你的 `config.xml` 中更改端口号(或在 `config.d` 目录中的文件中覆盖它们), 提供适当的数据路径, 并运行它.
+If the system clickhouse-server is already running and you do not want to stop it, you can change port numbers in your `config.xml` (or override them in a file in `config.d` directory), provide appropriate data path, and run it.

-`clickhouse` 二进制文件几乎没有依赖关系, 可以在广泛的 Linux 发行版中使用. 要在服务器上快速而肮脏地测试您的更改, 您可以简单地将新构建的 `clickhouse` 二进制文件 `scp` 到您的服务器, 然后按照上面的示例运行它.
+`clickhouse` binary has almost no dependencies and works across wide range of Linux distributions. To quick and dirty test your changes on a server, you can simply `scp` your fresh built `clickhouse` binary to your server and then run it as in examples above.

-## 测试环境 {#testing-environment}
+## Build Tests {#build-tests}

-在发布稳定版之前, 我们将其部署在测试环境中.测试环境是一个集群，处理 [Yandex.Metrica](https://metrica.yandex.com/) 数据的 1/39 部分. 我们与 Yandex.Metrica 团队共享我们的测试环境. ClickHouse无需在现有数据上停机即可升级. 我们首先看到的是, 数据被成功地处理了, 没有滞后于实时, 复制继续工作, Yandex.Metrica 团队没有发现任何问题. 第一次检查可以通过以下方式进行:
+Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well.

-``` sql
-SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h;
-```
+Examples:
+-   cross-compile for Darwin x86_64 (Mac OS X)
+-   cross-compile for FreeBSD x86_64
+-   cross-compile for Linux AArch64
+-   build on Ubuntu with libraries from system packages (discouraged)
+-   build with shared linking of libraries (discouraged)

-在某些情况下, 我们还会部署到 Yandex 中我们朋友团队的测试环境：Market、Cloud 等. 此外, 我们还有一些用于开发目的的硬件服务器.
+For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.

-## 负载测试 {#load-testing}
+Though we cannot run all tests on all variant of builds, we want to check at least that various build variants are not broken. For this purpose we use build tests.

-部署到测试环境后, 我们使用来自生产集群的查询运行负载测试. 这是手动完成的.
+We also test that there are no translation units that are too long to compile or require too much RAM.

-确保您在生产集群上启用了 `query_log`.
+We also test that there are no too large stack frames.

-收集一天或更长时间的查询日志:
+## Testing for Protocol Compatibility {#testing-for-protocol-compatibility}

-``` bash
-$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv
-```
+When we extend ClickHouse network protocol, we test manually that old clickhouse-client works with new clickhouse-server and new clickhouse-client works with old clickhouse-server (simply by running binaries from corresponding packages).

-这是一个复杂的例子. `type = 2` 将过滤成功执行的查询. `query LIKE '%ym:%'` 是从 Yandex.Metrica 中选择相关查询. `is_initial_query` 是只选择客户端发起的查询, 而不是 ClickHouse 本身(作为分布式查询处理的一部分).
+We also test some cases automatically with integrational tests:
+- if data written by old version of ClickHouse can be successfully read by the new version;
+- do distributed queries work in a cluster with different ClickHouse versions.

-`scp` 将此日志记录到您的测试集群并按如下方式运行它:
+## Help from the Compiler {#help-from-the-compiler}

-``` bash
-$ clickhouse benchmark --concurrency 16 < queries.tsv
-```
+Main ClickHouse code (that is located in `dbms` directory) is built with `-Wall -Wextra -Werror` and with some additional enabled warnings. Although these options are not enabled for third-party libraries.

-(可能你还想指定一个 `--user`)
+Clang has even more useful warnings - you can look for them with `-Weverything` and pick something to default build.

-然后把它留到晚上或周末, 去休息一下.
+For production builds, clang is used, but we also test make gcc builds. For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang in debug mode, debug version of `libc++` is used that allows to catch more errors at runtime.

-您应该检查 `clickhouse-server` 没有崩溃, 内存占用是有限的, 且性能不会随着时间的推移而降低.
+## Sanitizers {#sanitizers}

-由于查询和环境的高度可变性, 没有记录和比较精确的查询执行时间.
+### Address sanitizer
+We run functional, integration, stress and unit tests under ASan on per-commit basis.

-## 构建测试 {#build-tests}
+### Thread sanitizer
+We run functional, integration, stress and unit tests under TSan on per-commit basis.

-构建测试允许检查在各种可选配置和一些外部系统上的构建是否被破坏. 这些测试也是自动化的.
+### Memory sanitizer
+We run functional, integration, stress and unit tests under MSan on per-commit basis.

-示例:
-   Darwin x86_64 (Mac OS X) 交叉编译
-   FreeBSD x86_64 交叉编译
-   Linux AArch64 交叉编译
-   使用系统包中的库在 Ubuntu 上构建（不鼓励）
-   使用库的共享链接构建（不鼓励）
-
-例如, 使用系统包构建是不好的做法, 因为我们无法保证系统将拥有哪个确切版本的包. 但这确实是 Debian 维护者所需要的. 出于这个原因, 我们至少必须支持这种构建变体. 另一个例子: 共享链接是一个常见的麻烦来源, 但对于一些爱好者来说是需要的.
-
-虽然我们无法对所有构建变体运行所有测试, 但我们希望至少检查各种构建变体没有被破坏. 为此, 我们使用构建测试.
-
-我们还测试了那些太长而无法编译或需要太多RAM的没有翻译单元.
-
-我们还测试没有太大的堆栈帧.
-
-## 协议兼容性测试 {#testing-for-protocol-compatibility}
-
-当我们扩展 ClickHouse 网络协议时, 我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 一起工作, 而新的 clickhouse-client 与旧的 clickhouse-server 一起工作(只需从相应的包中运行二进制文件).
-
-我们还使用集成测试自动测试一些案例:
- 旧版本ClickHouse写入的数据是否可以被新版本成功读取;
- 在具有不同 ClickHouse 版本的集群中执行分布式查询.
-
-## 编译器的帮助 {#help-from-the-compiler}
-
-主要的 ClickHouse 代码(位于 `dbms` 目录中)是用 `-Wall -Wextra -Werror` 和一些额外的启用警告构建的. 虽然没有为第三方库启用这些选项.
-
-Clang 有更多有用的警告 - 你可以用 `-Weverything` 寻找它们并选择一些东西来默认构建.
-
-对于生产构建, 使用 clang, 但我们也测试 make gcc 构建. 对于开发, clang 通常使用起来更方便. 您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电池), 但请注意, 由于更好的控制流和过程间分析, 编译器能够使用 `-O3` 生成更多警告. 在调试模式下使用 clang 构建时, 使用调试版本的 `libc++` 允许在运行时捕获更多错误.
-
-## 地址清理器 {#sanitizers}
-
-### 地址清理器
-我们在ASan上运行功能测试、集成测试、压力测试和单元测试.
-
-### 线程清理器
-我们在TSan下运行功能测试、集成测试、压力测试和单元测试.
-
-### 内存清理器
-我们在MSan上运行功能测试、集成测试、压力测试和单元测试.
-
-### 未定义的行为清理器
-我们在UBSan下运行功能测试、集成测试、压力测试和单元测试. 某些第三方库的代码未针对 UB 进行清理.
+### Undefined behaviour sanitizer
+We run functional, integration, stress and unit tests under UBSan on per-commit basis. The code of some third-party libraries is not sanitized for UB.

 ### Valgrind (Memcheck)
-我们曾经在 Valgrind 下通宵运行功能测试, 但不再这样做了. 这需要几个小时. 目前在`re2`库中有一个已知的误报, 见[这篇文章](https://research.swtch.com/sparse).
+We used to run functional tests under Valgrind overnight, but don't do it anymore. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).

-## 模糊测试 {#fuzzing}
+## Fuzzing {#fuzzing}

-ClickHouse 模糊测试是使用 [libFuzzer](https://llvm.org/docs/LibFuzzer.html) 和随机 SQL 查询实现的. 所有模糊测试都应使用sanitizers(地址和未定义)进行.
+ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
+All the fuzz testing should be performed with sanitizers (Address and Undefined).

-LibFuzzer 用于库代码的隔离模糊测试. Fuzzer 作为测试代码的一部分实现, 并具有 `_fuzzer` 名称后缀.
-Fuzzer 示例可以在 `src/Parsers/tests/lexer_fuzzer.cpp` 中找到. LibFuzzer 特定的配置、字典和语料库存储在 `tests/fuzz` 中.
-我们鼓励您为处理用户输入的每个功能编写模糊测试.
+LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “_fuzzer” name postfixes.
+Fuzzer example can be found at `src/Parsers/fuzzers/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
+We encourage you to write fuzz tests for every functionality that handles user input.

-默认情况下不构建模糊器. 要构建模糊器, 应设置` -DENABLE_FUZZING=1` 和 `-DENABLE_TESTS=1` 选项.
-我们建议在构建模糊器时禁用 Jemalloc. 用于将 ClickHouse fuzzing 集成到 Google OSS-Fuzz 的配置可以在 `docker/fuzz` 中找到.
+Fuzzers are not built by default. To build fuzzers both `-DENABLE_FUZZING=1` and `-DENABLE_TESTS=1` options should be set.
+We recommend to disable Jemalloc while building fuzzers. Configuration used to integrate ClickHouse fuzzing to
+Google OSS-Fuzz can be found at `docker/fuzz`.

-我们还使用简单的模糊测试来生成随机SQL查询, 并检查服务器在执行这些查询时是否会死亡.
-你可以在 `00746_sql_fuzzy.pl` 中找到它. 这个测试应该连续运行(通宵或更长时间).
+We also use simple fuzz test to generate random SQL queries and to check that the server does not die executing them.
+You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).

-我们还使用复杂的基于 AST 的查询模糊器, 它能够找到大量的极端情况. 它在查询 AST 中进行随机排列和替换. 它会记住先前测试中的 AST 节点, 以使用它们对后续测试进行模糊测试, 同时以随机顺序处理它们. 您可以在 [这篇博客文章](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/) 中了解有关此模糊器的更多信息.
+We also use sophisticated AST-based query fuzzer that is able to find huge amount of corner cases. It does random permutations and substitutions in queries AST. It remembers AST nodes from previous tests to use them for fuzzing of subsequent tests while processing them in random order. You can learn more about this fuzzer in [this blog article](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/).

-## 压力测试 {#stress-test}
+## Stress test

-压力测试是另一种模糊测试. 它使用单个服务器以随机顺序并行运行所有功能测试. 不检查测试结果.
+Stress tests are another case of fuzzing. It runs all functional tests in parallel in random order with a single server. Results of the tests are not checked.

-经检查:
- 服务器不会崩溃，不会触发调试或清理程序陷阱;
- 没有死锁;
- 数据库结构一致;
- 服务器可以在测试后成功停止并重新启动，没有异常;
+It is checked that:
+- server does not crash, no debug or sanitizer traps are triggered;
+- there are no deadlocks;
+- the database structure is consistent;
+- server can successfully stop after the test and start again without exceptions.

-有五种变体 (Debug, ASan, TSan, MSan, UBSan).
+There are five variants (Debug, ASan, TSan, MSan, UBSan).

-## 线程模糊器 {#thread-fuzzer}
+## Thread Fuzzer

-Thread Fuzzer(请不要与 Thread Sanitizer 混淆)是另一种允许随机化线程执行顺序的模糊测试. 它有助于找到更多特殊情况.
+Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases.

-## 安全审计 {#security-audit}
+## Security Audit

-Yandex安全团队的人员从安全的角度对ClickHouse的功能做了一些基本的概述.
+Our Security Team did some basic overview of ClickHouse capabilities from the security standpoint.

-## 静态分析仪 {#static-analyzers}
+## Static Analyzers {#static-analyzers}

-我们在每次提交的基础上运行 `clang-tidy`. `clang-static-analyzer` 检查也被启用. `clang-tidy` 也用于一些样式检查.
+We run `clang-tidy` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks.

-我们已经评估了 `clang-tidy`、`Coverity`、`cppcheck`、`PVS-Studio`、`tscancode`、`CodeQL`. 您将在 `tests/instructions/` 目录中找到使用说明. 你也可以阅读[俄文文章](https://habr.com/company/yandex/blog/342018/).
+We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory.

-如果你使用 `CLion` 作为 IDE, 你可以利用一些开箱即用的 `clang-tidy` 检查
+If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box.

-我们还使用 `shellcheck` 对shell脚本进行静态分析.
+We also use `shellcheck` for static analysis of shell scripts.

-## 硬化 {#hardening}
+## Hardening {#hardening}

-在调试版本中, 我们使用自定义分配器执行用户级分配的 ASLR.
+In debug build we are using custom allocator that does ASLR of user-level allocations.

-我们还手动保护在分配后预期为只读的内存区域.
+We also manually protect memory regions that are expected to be readonly after allocation.

-在调试构建中, 我们还需要对libc进行自定义, 以确保不会调用 "有害的" (过时的、不安全的、非线程安全的)函数.
+In debug build we also involve a customization of libc that ensures that no "harmful" (obsolete, insecure, not thread-safe) functions are called.

-Debug 断言被广泛使用.
+Debug assertions are used extensively.

-在调试版本中，如果抛出带有 "逻辑错误" 代码(暗示错误)的异常, 则程序会过早终止. 它允许在发布版本中使用异常, 但在调试版本中使其成为断言.
+In debug build, if exception with "logical error" code (implies a bug) is being thrown, the program is terminated prematurely. It allows to use exceptions in release build but make it an assertion in debug build.

-jemalloc 的调试版本用于调试版本.
-libc++ 的调试版本用于调试版本.
+Debug version of jemalloc is used for debug builds.
+Debug version of libc++ is used for debug builds.

-## 运行时完整性检查
+## Runtime Integrity Checks

-对存储在磁盘上的数据是校验和. MergeTree 表中的数据同时以三种方式进行校验和*(压缩数据块、未压缩数据块、跨块的总校验和). 客户端和服务器之间或服务器之间通过网络传输的数据也会进行校验和. 复制确保副本上的数据位相同.
+Data stored on disk is checksummed. Data in MergeTree tables is checksummed in three ways simultaneously* (compressed data blocks, uncompressed data blocks, the total checksum across blocks). Data transferred over network between client and server or between servers is also checksummed. Replication ensures bit-identical data on replicas.

-需要防止硬件故障(存储介质上的位腐烂、服务器上 RAM 中的位翻转、网络控制器 RAM 中的位翻转、网络交换机 RAM 中的位翻转、客户端 RAM 中的位翻转、线路上的位翻转). 请注意，比特位操作很常见, 即使对于 ECC RAM 和 TCP 校验和(如果您每天设法运行数千台处理 PB 数据的服务器, 也可能发生比特位操作. [观看视频(俄语)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
+It is required to protect from faulty hardware (bit rot on storage media, bit flips in RAM on server, bit flips in RAM of network controller, bit flips in RAM of network switch, bit flips in RAM of client, bit flips on the wire). Note that bit flips are common and likely to occur even for ECC RAM and in presence of TCP checksums (if you manage to run thousands of servers processing petabytes of data each day). [See the video (russian)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).

-ClickHouse 提供诊断功能, 可帮助运维工程师找到故障硬件.
+ClickHouse provides diagnostics that will help ops engineers to find faulty hardware.

-\* 它并不慢.
+\* and it is not slow.

-## 代码风格 {#code-style}
+## Code Style {#code-style}

-[此处](style.md)描述了代码样式规则.
+Code style rules are described [here](style.md).

-要检查一些常见的样式违规，您可以使用 `utils/check-style` 脚本.
+To check for some common style violations, you can use `utils/check-style` script.

-要强制使用正确的代码样式, 您可以使用 `clang-format`. 文件 `.clang-format` 位于源根目录. 它大多与我们的实际代码风格相对应. 但是不建议将 `clang-format` 应用于现有文件, 因为它会使格式变得更糟. 您可以使用可以在 clang 源代码库中找到的 `clang-format-diff` 工具.
+To force proper style of your code, you can use `clang-format`. File `.clang-format` is located at the sources root. It mostly corresponding with our actual code style. But it’s not recommended to apply `clang-format` to existing files because it makes formatting worse. You can use `clang-format-diff` tool that you can find in clang source repository.

-或者, 您可以尝试使用 `uncrustify` 工具来重新格式化您的代码. 配置位于源根目录中的 `uncrustify.cfg` 中. 它比 `clang-format` 测试更少.
+Alternatively you can try `uncrustify` tool to reformat your code. Configuration is in `uncrustify.cfg` in the sources root. It is less tested than `clang-format`.

-`CLion` 有自己的代码格式化程序, 必须根据我们的代码风格进行调整.
+`CLion` has its own code formatter that has to be tuned for our code style.

-我们还使用 `codespell` 来查找代码中的拼写错误.它也是自动化的.
+We also use `codespell` to find typos in code. It is automated as well.

-## Metrica B2B 测试 {#metrica-b2b-tests}
+## Test Coverage {#test-coverage}

-每个 ClickHouse 版本都使用 Yandex Metrica 和 AppMetrica 引擎进行测试. ClickHouse 的测试版和稳定版部署在 VM 上, 并使用 Metrica 引擎的小副本运行, 该引擎处理输入数据的固定样本. 然后将两个 Metrica 引擎实例的结果放在一起比较.
-
-这些测试由单独的团队自动化. 由于移动部件数量众多, 测试在大多数情况下都因完全不相关的原因而失败, 这些原因很难弄清楚. 这些测试很可能对我们有负面价值. 尽管如此, 这些测试在数百次中被证明是有用的.
-
-## 测试覆盖率 {#test-coverage}
-
-我们还跟踪测试覆盖率, 但仅针对功能测试和 clickhouse-server. 它每天进行.
+We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis.

 ## Tests for Tests

-有自动检测薄片测试. 它运行所有新测试100次(用于功能测试)或10次(用于集成测试). 如果至少有一次测试失败，它就被认为是脆弱的.
+There is automated check for flaky tests. It runs all new tests 100 times (for functional tests) or 10 times (for integration tests). If at least single time the test failed, it is considered flaky.

 ## Testflows

-[Testflows](https://testflows.com/) 是一个企业级的测试框架. Altinity 使用它进行一些测试, 我们在 CI 中运行这些测试.
+[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse.

-## Yandex 检查 (only for Yandex employees)
+## Test Automation {#test-automation}

-这些检查将ClickHouse代码导入到Yandex内部的单一存储库中, 所以ClickHouse代码库可以被Yandex的其他产品(YT和YDB)用作库. 请注意, clickhouse-server本身并不是由内部回购构建的, Yandex应用程序使用的是未经修改的开源构建的.
+We run tests with [GitHub Actions](https://github.com/features/actions).

-## 测试自动化 {#test-automation}
+Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.

-我们使用 Yandex 内部 CI 和名为 "Sandbox" 的作业自动化系统运行测试.
+We do not use Travis CI due to the limit on time and computational power.
+We do not use Jenkins. It was used before and now we are happy we are not using Jenkins.

-在每次提交的基础上, 构建作业和测试都在沙箱中运行. 生成的包和测试结果发布在GitHub上, 可以通过直接链接下载. 产物要保存几个月. 当你在GitHub上发送一个pull请求时, 我们会把它标记为 "可以测试" , 我们的CI系统会为你构建ClickHouse包(发布、调试、使用地址清理器等).
-
-由于时间和计算能力的限制, 我们不使用 Travis CI.
-我们不用Jenkins. 以前用过, 现在我们很高兴不用Jenkins了.
-
-[原始文章](https://clickhouse.com/docs/en/development/tests/) <!--hide-->
+[Original article](https://clickhouse.com/docs/en/development/tests/) <!--hide-->
--- a/docs/zh/sql-reference/data-types/lowcardinality.md
+++ b/docs/zh/sql-reference/data-types/lowcardinality.md
@ -55,6 +55,5 @@ ORDER BY id

 ## 参考

- [高效低基数类型](https://www.altinity.com/blog/2019/3/27/low-cardinality).
 - [使用低基数类型减少ClickHouse的存储成本 – 来自Instana工程师的分享](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
 - [字符优化 (俄语视频分享)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [英语分享](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
--- a/docs/zh/sql-reference/functions/date-time-functions.md
+++ b/docs/zh/sql-reference/functions/date-time-functions.md
@ -956,7 +956,7 @@ SELECT
 **语法**

 ``` sql
-formatDateTime(Time, Format\[, Timezone\])
+formatDateTime(Time, Format[, Timezone])
 ```

 **返回值**
--- a/docs/zh/sql-reference/statements/create.md
+++ b/docs/zh/sql-reference/statements/create.md
@ -121,8 +121,6 @@ ENGINE = <Engine>
 ...
 ```

-如果指定了编解ec，则默认编解码器不适用。 编解码器可以组合在一个流水线中，例如, `CODEC(Delta, ZSTD)`. 要为您的项目选择最佳的编解码器组合，请通过类似于Altinity中描述的基准测试 [新编码提高ClickHouse效率](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) 文章.
-
 !!! warning "警告"
    您无法使用外部实用程序解压缩ClickHouse数据库文件，如 `lz4`. 相反，使用特殊的 [ﾂ环板compressorｮﾂ嘉ｯﾂ偲](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) 实用程序。

--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -723,7 +723,7 @@ bool Client::processWithFuzzing(const String & full_query)
        // queries, for lack of a better solution.
        // There is also a problem that fuzzer substitutes positive Int64
        // literals or Decimal literals, which are then parsed back as
-        // UInt64, and suddenly duplicate alias substitition starts or stops
+        // UInt64, and suddenly duplicate alias substitution starts or stops
        // working (ASTWithAlias::formatImpl) or something like that.
        // So we compare not even the first and second formatting of the
        // query, but second and third.
--- a/programs/disks/CommandMkDir.cpp
+++ b/programs/disks/CommandMkDir.cpp
@ -0,0 +1,67 @@
+#pragma once
+
+#include "ICommand.h"
+#include <Interpreters/Context.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+class CommandMkDir : public ICommand
+{
+public:
+    CommandMkDir()
+    {
+        command_name = "mkdir";
+        command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth()));
+        description = "Create directory or directories recursively";
+        usage = "mkdir [OPTION]... <PATH>";
+        command_option_description->add_options()
+            ("recursive", "recursively create directories")
+            ;
+    }
+
+    void processOptions(
+        Poco::Util::LayeredConfiguration & config,
+        po::variables_map & options) const override
+    {
+        if (options.count("recursive"))
+            config.setBool("recursive", true);
+    }
+
+    void execute(
+        const std::vector<String> & command_arguments,
+        DB::ContextMutablePtr & global_context,
+        Poco::Util::LayeredConfiguration & config) override
+    {
+        if (command_arguments.size() != 1)
+        {
+            printHelpMessage();
+            throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS);
+        }
+
+        String disk_name = config.getString("disk", "default");
+
+        String path = command_arguments[0];
+
+        DiskPtr disk = global_context->getDisk(disk_name);
+
+        String full_path = fullPathWithValidate(disk, path);
+        bool recursive = config.getBool("recursive", false);
+
+        if (recursive)
+            disk->createDirectories(full_path);
+        else
+            disk->createDirectory(full_path);
+    }
+};
+}
+
+std::unique_ptr <DB::ICommand> makeCommandMkDir()
+{
+    return std::make_unique<DB::CommandMkDir>();
+}
--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@ -63,7 +63,7 @@ void DisksApp::addOptions(

    positional_options_description.add("command_name", 1);

-    supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read"};
+    supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"};

    command_descriptions.emplace("list-disks", makeCommandListDisks());
    command_descriptions.emplace("list", makeCommandList());
@ -73,6 +73,7 @@ void DisksApp::addOptions(
    command_descriptions.emplace("copy", makeCommandCopy());
    command_descriptions.emplace("write", makeCommandWrite());
    command_descriptions.emplace("read", makeCommandRead());
+    command_descriptions.emplace("mkdir", makeCommandMkDir());
 }

 void DisksApp::processOptions()
--- a/programs/disks/DisksApp.h
+++ b/programs/disks/DisksApp.h
@ -4,6 +4,7 @@
 #include "CommandLink.cpp"
 #include "CommandList.cpp"
 #include "CommandListDisks.cpp"
+#include "CommandMkDir.cpp"
 #include "CommandMove.cpp"
 #include "CommandRead.cpp"
 #include "CommandRemove.cpp"
--- a/programs/disks/ICommand.h
+++ b/programs/disks/ICommand.h
@ -65,3 +65,4 @@ std::unique_ptr <DB::ICommand> makeCommandMove();
 std::unique_ptr <DB::ICommand> makeCommandRead();
 std::unique_ptr <DB::ICommand> makeCommandRemove();
 std::unique_ptr <DB::ICommand> makeCommandWrite();
+std::unique_ptr <DB::ICommand> makeCommandMkDir();
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@ -67,7 +67,7 @@ Run this tool inside your git repository. It will create .tsv files that can be
 The tool can process large enough repositories in a reasonable time.
 It has been tested on:
 - ClickHouse: 31 seconds; 3 million rows;
- LLVM: 8 minues; 62 million rows;
+- LLVM: 8 minutes; 62 million rows;
 - Linux - 12 minutes; 85 million rows;
 - Chromium - 67 minutes; 343 million rows;
 (the numbers as of Sep 2020)
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -736,7 +736,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
    std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
    /// This object will periodically calculate some metrics.
    AsynchronousMetrics async_metrics(
-        global_context, config().getUInt("asynchronous_metrics_update_period_s", 1),
+        global_context,
+        config().getUInt("asynchronous_metrics_update_period_s", 1),
+        config().getUInt("asynchronous_heavy_metrics_update_period_s", 120),
        [&]() -> std::vector<ProtocolServerMetrics>
        {
            std::vector<ProtocolServerMetrics> metrics;
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -65,9 +65,31 @@
        in specified format like JSON.
        For example, as below:
        {"date_time":"1650918987.180175","thread_name":"#1","thread_id":"254545","level":"Trace","query_id":"","logger_name":"BaseDaemon","message":"Received signal 2","source_file":"../base/daemon/BaseDaemon.cpp; virtual void SignalListener::run()","source_line":"192"}
-        To enable JSON logging support, just uncomment <formatting> tag below.
+        To enable JSON logging support, please uncomment the entire <formatting> tag below.
+        
+        a) You can modify key names by changing values under tag values inside <names> tag.
+        For example, to change DATE_TIME to MY_DATE_TIME, you can do like:
+            <date_time>MY_DATE_TIME</date_time>
+        b) You can stop unwanted log properties to appear in logs. To do so, you can simply comment out (recommended)
+        that property from this file.
+        For example, if you do not want your log to print query_id, you can comment out only <query_id> tag.
+        However, if you comment out all the tags under <names>, the program will print default values for as
+        below.
        -->
-        <!-- <formatting>json</formatting> -->
+        <!-- <formatting>
+            <type>json</type>
+            <names>
+                <date_time>date_time</date_time>
+                <thread_name>thread_name</thread_name>
+                <thread_id>thread_id</thread_id>
+                <level>level</level>
+                <query_id>query_id</query_id>
+                <logger_name>logger_name</logger_name>
+                <message>message</message>
+                <source_file>source_file</source_file>
+                <source_line>source_line</source_line>
+            </names>
+        </formatting> -->
    </logger>

    <!-- Add headers to response in options request. OPTIONS method is used in CORS preflight requests. -->
--- a/src/Access/AccessControl.cpp
+++ b/src/Access/AccessControl.cpp
@ -79,7 +79,7 @@ public:
            /// No user, probably the user has been dropped while it was in the cache.
            cache.remove(params);
        }
-        auto res = ContextAccess::make(access_control, params);
+        auto res = std::make_shared<ContextAccess>(access_control, params);
        res->initialize();
        cache.add(params, res);
        return res;
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@ -410,7 +410,7 @@ std::shared_ptr<const ContextAccess> ContextAccess::getFullAccess()
 {
    static const std::shared_ptr<const ContextAccess> res = []
    {
-        auto full_access = ContextAccess::make();
+        auto full_access = std::make_shared<ContextAccess>();
        full_access->is_full_access = true;
        full_access->access = std::make_shared<AccessRights>(AccessRights::getFullAccess());
        full_access->access_with_implicit = full_access->access;
--- a/src/Access/ContextAccess.h
+++ b/src/Access/ContextAccess.h
@ -166,12 +166,6 @@ public:
    /// without any limitations. This is used for the global context.
    static std::shared_ptr<const ContextAccess> getFullAccess();

-    template <typename... Args>
-    static std::shared_ptr<ContextAccess> make(Args &&... args)
-    {
-        return std::make_shared<ContextAccess>(std::forward<Args>(args)...);
-    }
-
    ~ContextAccess();

 private:
--- a/src/AggregateFunctions/ThetaSketchData.h
+++ b/src/AggregateFunctions/ThetaSketchData.h
@ -9,6 +9,8 @@
 #include <base/StringRef.h>
 #include <theta_sketch.hpp>
 #include <theta_union.hpp>
+#include <theta_intersection.hpp>
+#include <theta_a_not_b.hpp>


 namespace DB
@ -80,6 +82,58 @@ public:
            u->update(rhs.sk_union->get_result());
    }

+    void intersect(const ThetaSketchData & rhs)
+    {
+        datasketches::theta_union * u = getSkUnion();
+
+        if (sk_update)
+        {
+            u->update(*sk_update);
+            sk_update.reset(nullptr);
+        }
+
+        datasketches::theta_intersection theta_intersection;
+
+        theta_intersection.update(u->get_result());
+
+        if (rhs.sk_update)
+            theta_intersection.update(*rhs.sk_update);
+        else if (rhs.sk_union)
+            theta_intersection.update(rhs.sk_union->get_result());
+
+        sk_union.reset(nullptr);
+        u = getSkUnion();
+        u->update(theta_intersection.get_result());
+    }
+
+    void aNotB(const ThetaSketchData & rhs)
+    {
+        datasketches::theta_union * u = getSkUnion();
+
+        if (sk_update)
+        {
+            u->update(*sk_update);
+            sk_update.reset(nullptr);
+        }
+
+        datasketches::theta_a_not_b a_not_b;
+
+        if (rhs.sk_update)
+        {
+            datasketches::compact_theta_sketch result = a_not_b.compute(u->get_result(), *rhs.sk_update);
+            sk_union.reset(nullptr);
+            u = getSkUnion();
+            u->update(result);
+        }
+        else if (rhs.sk_union)
+        {
+            datasketches::compact_theta_sketch result = a_not_b.compute(u->get_result(), rhs.sk_union->get_result());
+            sk_union.reset(nullptr);
+            u = getSkUnion();
+            u->update(result);
+        }
+    }
+
    /// You can only call for an empty object.
    void read(DB::ReadBuffer & in)
    {
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@ -537,7 +537,7 @@ SizeAndChecksum BackupImpl::getFileSizeAndChecksum(const String & file_name) con
    if (!info)
        throw Exception(
            ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
-    return std::pair(info->size, info->checksum);
+    return {info->size, info->checksum};
 }

 BackupEntryPtr BackupImpl::readFile(const String & file_name) const
@ -625,7 +625,7 @@ CheckBackupResult checkBaseBackupForFile(const SizeAndChecksum & base_backup_inf
 {
    /// We cannot reuse base backup because our file is smaller
    /// than file stored in previous backup
-    if (new_entry_info.size > base_backup_info.first)
+    if (new_entry_info.size < base_backup_info.first)
        return CheckBackupResult::HasNothing;

    if (base_backup_info.first == new_entry_info.size)
@ -682,8 +682,6 @@ ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(BackupEntryPtr entry, si

 void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
 {
-
-    std::lock_guard lock{mutex};
    if (open_mode != OpenMode::WRITE)
        throw Exception("Backup is not opened for writing", ErrorCodes::LOGICAL_ERROR);

@ -802,7 +800,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
    /// or have only prefix of it in previous backup. Let's go long path.

    info.data_file_name = info.file_name;
+
+    if (use_archives)
+    {
+        std::lock_guard lock{mutex};
        info.archive_suffix = current_archive_suffix;
+    }

    bool is_data_file_required;
    coordination->addFileInfo(info, is_data_file_required);
@ -818,9 +821,11 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
    /// if source and destination are compatible
    if (!use_archives && info.base_size == 0 && writer->supportNativeCopy(reader_description))
    {
-
+        /// Should be much faster than writing data through server.
        LOG_TRACE(log, "Will copy file {} using native copy", adjusted_path);
-        /// Should be much faster than writing data through server
+
+        /// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
+
        writer->copyFileNative(entry->tryGetDiskIfExists(), entry->getFilePath(), info.data_file_name);
    }
    else
@ -838,6 +843,11 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
        if (use_archives)
        {
            LOG_TRACE(log, "Adding file {} to archive", adjusted_path);
+
+            /// An archive must be written strictly in one thread, so it's correct to lock the mutex for all the time we're writing the file
+            /// to the archive.
+            std::lock_guard lock{mutex};
+
            String archive_suffix = current_archive_suffix;
            bool next_suffix = false;
            if (current_archive_suffix.empty() && is_internal_backup)
@ -859,6 +869,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
        }
        else
        {
+            /// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
            writer->copyFileThroughBuffer(std::move(read_buffer), info.data_file_name);
        }
    }
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@ -130,7 +130,7 @@ private:
    std::pair<String, std::shared_ptr<IArchiveWriter>> archive_writers[2];
    String current_archive_suffix;
    String lock_file_name;
-    size_t num_files_written = 0;
+    std::atomic<size_t> num_files_written = 0;
    bool writing_finalized = false;
    const Poco::Logger * log;
 };
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -247,6 +247,7 @@ add_object_library(clickhouse_databases Databases)
 add_object_library(clickhouse_databases_mysql Databases/MySQL)
 add_object_library(clickhouse_disks Disks)
 add_object_library(clickhouse_interpreters Interpreters)
+add_object_library(clickhouse_interpreters_cache Interpreters/Cache)
 add_object_library(clickhouse_interpreters_access Interpreters/Access)
 add_object_library(clickhouse_interpreters_mysql Interpreters/MySQL)
 add_object_library(clickhouse_interpreters_clusterproxy Interpreters/ClusterProxy)
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -91,13 +91,6 @@ static const NameSet exit_strings
    "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
 };

-static const std::initializer_list<std::pair<String, String>> backslash_aliases
-{
-    { "\\l", "SHOW DATABASES" },
-    { "\\d", "SHOW TABLES" },
-    { "\\c", "USE" },
-};
-

 namespace ErrorCodes
 {
@ -1999,6 +1992,21 @@ void ClientBase::runInteractive()
    /// Enable bracketed-paste-mode so that we are able to paste multiline queries as a whole.
    lr.enableBracketedPaste();

+    static const std::initializer_list<std::pair<String, String>> backslash_aliases =
+        {
+            { "\\l", "SHOW DATABASES" },
+            { "\\d", "SHOW TABLES" },
+            { "\\c", "USE" },
+        };
+
+    static const std::initializer_list<String> repeat_last_input_aliases =
+        {
+            ".",  /// Vim shortcut
+            "/"   /// Oracle SQL Plus shortcut
+        };
+
+    String last_input;
+
    do
    {
        auto input = lr.readLine(prompt(), ":-] ");
@ -2016,7 +2024,7 @@ void ClientBase::runInteractive()
            has_vertical_output_suffix = true;
        }

-        for (const auto& [alias, command] : backslash_aliases)
+        for (const auto & [alias, command] : backslash_aliases)
        {
            auto it = std::search(input.begin(), input.end(), alias.begin(), alias.end());
            if (it != input.end() && std::all_of(input.begin(), it, isWhitespaceASCII))
@ -2034,10 +2042,20 @@ void ClientBase::runInteractive()
            }
        }

+        for (const auto & alias : repeat_last_input_aliases)
+        {
+            if (input == alias)
+            {
+                input  = last_input;
+                break;
+            }
+        }
+
        try
        {
            if (!processQueryText(input))
                break;
+            last_input = input;
        }
        catch (const Exception & e)
        {
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@ -24,6 +24,7 @@
 #include <Common/randomSeed.h>
 #include "Core/Block.h"
 #include <Interpreters/ClientInfo.h>
+#include <Interpreters/OpenTelemetrySpanLog.h>
 #include <Compression/CompressionFactory.h>
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@ -483,6 +484,22 @@ void Connection::sendQuery(
    bool with_pending_data,
    std::function<void(const Progress &)>)
 {
+    OpenTelemetry::SpanHolder span("Connection::sendQuery()");
+    span.addAttribute("clickhouse.query_id", query_id_);
+    span.addAttribute("clickhouse.query", query);
+    span.addAttribute("target", [this] () { return this->getHost() + ":" + std::to_string(this->getPort()); });
+
+    ClientInfo new_client_info;
+    const auto &current_trace_context = OpenTelemetry::CurrentContext();
+    if (client_info && current_trace_context.isTraceEnabled())
+    {
+        // use current span as the parent of remote span
+        new_client_info = *client_info;
+        new_client_info.client_trace_context = current_trace_context;
+
+        client_info = &new_client_info;
+    }
+
    if (!connected)
        connect(timeouts);

@ -540,7 +557,7 @@ void Connection::sendQuery(
        /// Send correct hash only for !INITIAL_QUERY, due to:
        /// - this will avoid extra protocol complexity for simplest cases
        /// - there is no need in hash for the INITIAL_QUERY anyway
-        ///   (since there is no secure/unsecure changes)
+        ///   (since there is no secure/non-secure changes)
        if (client_info && !cluster_secret.empty() && client_info->query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
        {
 #if USE_SSL
--- a/src/Client/HedgedConnectionsFactory.cpp
+++ b/src/Client/HedgedConnectionsFactory.cpp
@ -41,7 +41,7 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
 HedgedConnectionsFactory::~HedgedConnectionsFactory()
 {
    /// Stop anything that maybe in progress,
-    /// to avoid interfer with the subsequent connections.
+    /// to avoid interference with the subsequent connections.
    ///
    /// I.e. some replcas may be in the establishing state,
    /// this means that hedged connection is waiting for TablesStatusResponse,
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@ -12,6 +12,7 @@
 #include <Interpreters/castColumn.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Common/HashTable/HashSet.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>

 namespace DB
 {
@ -154,13 +155,15 @@ FieldInfo getFieldInfo(const Field & field)
 {
    FieldVisitorToScalarType to_scalar_type_visitor;
    applyVisitor(to_scalar_type_visitor, field);
+    FieldVisitorToNumberOfDimensions to_number_dimension_visitor;

    return
    {
        to_scalar_type_visitor.getScalarType(),
        to_scalar_type_visitor.haveNulls(),
        to_scalar_type_visitor.needConvertField(),
-        applyVisitor(FieldVisitorToNumberOfDimensions(), field),
+        applyVisitor(to_number_dimension_visitor, field),
+        to_number_dimension_visitor.need_fold_dimension
    };
 }

@ -821,6 +824,44 @@ MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const
    return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.cloneResized(new_size); });
 }

+void ColumnObject::getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const
+{
+    res.resize(num_rows);
+    std::iota(res.begin(), res.end(), 0);
+}
+
+void ColumnObject::compareColumn(const IColumn & rhs, size_t rhs_row_num,
+                                 PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
+                                 int direction, int nan_direction_hint) const
+{
+    return doCompareColumn<ColumnObject>(assert_cast<const ColumnObject &>(rhs), rhs_row_num, row_indexes,
+                                        compare_results, direction, nan_direction_hint);
+}
+
+void ColumnObject::getExtremes(Field & min, Field & max) const
+{
+    if (num_rows == 0)
+    {
+        min = Object();
+        max = Object();
+    }
+    else
+    {
+        get(0, min);
+        get(0, max);
+    }
+}
+
+MutableColumns ColumnObject::scatter(ColumnIndex num_columns, const Selector & selector) const
+{
+    return scatterImpl<ColumnObject>(num_columns, selector);
+}
+
+void ColumnObject::gather(ColumnGathererStream & gatherer)
+{
+    gatherer.gather(*this);
+}
+
 const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
 {
    if (const auto * node = subcolumns.findLeaf(key))
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@ -15,7 +15,7 @@ namespace DB

 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }

 /// Info that represents a scalar or array field in a decomposed view.
@ -35,6 +35,10 @@ struct FieldInfo

    /// Number of dimension in array. 0 if field is scalar.
    size_t num_dimensions;
+
+    /// If true then this field is an array of variadic dimension field
+    /// and we need to normalize the dimension
+    bool need_fold_dimension;
 };

 FieldInfo getFieldInfo(const Field & field);
@ -220,6 +224,19 @@ public:
    ColumnPtr replicate(const Offsets & offsets) const override;
    MutableColumnPtr cloneResized(size_t new_size) const override;

+    /// Order of rows in ColumnObject is undefined.
+    void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
+    void compareColumn(const IColumn & rhs, size_t rhs_row_num,
+                       PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
+                       int direction, int nan_direction_hint) const override;
+
+    void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
+    int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
+    void getExtremes(Field & min, Field & max) const override;
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
+    void gather(ColumnGathererStream & gatherer) override;
+
    /// All other methods throw exception.

    StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
@ -232,14 +249,7 @@ public:
    void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
    void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
    void expand(const Filter &, bool) override { throwMustBeConcrete(); }
-    int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeConcrete(); }
-    void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override { throwMustBeConcrete(); }
    bool hasEqualValues() const override { throwMustBeConcrete(); }
-    void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &) const override { throwMustBeConcrete(); }
-    void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override { throwMustBeConcrete(); }
-    MutableColumns scatter(ColumnIndex, const Selector &) const override { throwMustBeConcrete(); }
-    void gather(ColumnGathererStream &) override { throwMustBeConcrete(); }
-    void getExtremes(Field &, Field &) const override { throwMustBeConcrete(); }
    size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
    double getRatioOfDefaultRows(double) const override { throwMustBeConcrete(); }
    void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeConcrete(); }
@ -247,7 +257,7 @@ public:
 private:
    [[noreturn]] static void throwMustBeConcrete()
    {
-        throw Exception("ColumnObject must be converted to ColumnTuple before use", ErrorCodes::LOGICAL_ERROR);
+        throw Exception("ColumnObject must be converted to ColumnTuple before use", ErrorCodes::NOT_IMPLEMENTED);
    }

    template <typename Func>
--- a/src/Common/CaresPTRResolver.cpp
+++ b/src/Common/CaresPTRResolver.cpp
@ -15,8 +15,8 @@ namespace DB

    static void callback(void * arg, int status, int, struct hostent * host)
    {
-        auto * ptr_records = reinterpret_cast<std::unordered_set<std::string>*>(arg);
-        if (status == ARES_SUCCESS && host->h_aliases)
+        auto * ptr_records = static_cast<std::unordered_set<std::string>*>(arg);
+        if (ptr_records && status == ARES_SUCCESS)
        {
            /*
             * In some cases (e.g /etc/hosts), hostent::h_name is filled and hostent::h_aliases is empty.
@ -28,6 +28,8 @@ namespace DB
                ptr_records->insert(ptr_record);
            }

+            if (host->h_aliases)
+            {
                int i = 0;
                while (auto * ptr_record = host->h_aliases[i])
                {
@ -36,6 +38,7 @@ namespace DB
                }
            }
        }
+    }

    CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr)
    {
--- a/src/Common/CurrentMemoryTracker.cpp
+++ b/src/Common/CurrentMemoryTracker.cpp
@ -52,15 +52,10 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
        if (current_thread)
        {
            Int64 will_be = current_thread->untracked_memory + size;
-            Int64 limit = current_thread->untracked_memory_limit + current_thread->untracked_memory_limit_increase;

-            if (will_be > limit)
+            if (will_be > current_thread->untracked_memory_limit)
            {
-                /// Increase limit before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
-                /// more. It could be useful to enlarge Exception message in rethrow logic.
-                current_thread->untracked_memory_limit_increase = current_thread->untracked_memory_limit;
                memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
-                current_thread->untracked_memory_limit_increase = 0;
                current_thread->untracked_memory = 0;
            }
            else
--- a/src/Common/IntervalKind.h
+++ b/src/Common/IntervalKind.h
@ -64,7 +64,7 @@ struct IntervalKind
    const char * toNameOfFunctionExtractTimePart() const;

    /// Converts the string representation of an interval kind to its IntervalKind equivalent.
-    /// Returns false if the conversion unsucceeded.
+    /// Returns false if the conversion did not succeed.
    /// For example, `IntervalKind::tryParseString('second', result)` returns `result` equals `IntervalKind::Kind::Second`.
    static bool tryParseString(const std::string & kind, IntervalKind::Kind & result);
 };
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@ -166,27 +166,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
        }
    }

-    std::bernoulli_distribution fault(fault_probability);
-    if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true) && throw_if_memory_exceeded)
-    {
-        /// Revert
-        amount.fetch_sub(size, std::memory_order_relaxed);
-
-        /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
-        MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
-
-        ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
-        const auto * description = description_ptr.load(std::memory_order_relaxed);
-        throw DB::Exception(
-            DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED,
-            "Memory tracker{}{}: fault injected. Would use {} (attempt to allocate chunk of {} bytes), maximum: {}",
-            description ? " " : "",
-            description ? description : "",
-            formatReadableSizeWithBinarySuffix(will_be),
-            size,
-            formatReadableSizeWithBinarySuffix(current_hard_limit));
-    }
-
+    bool memory_limit_exceeded_ignored = false;

    bool allocation_traced = false;
    if (unlikely(current_profiler_limit && will_be > current_profiler_limit))
@ -205,7 +185,36 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
        allocation_traced = true;
    }

-    if (unlikely(current_hard_limit && will_be > current_hard_limit) && memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded)
+    std::bernoulli_distribution fault(fault_probability);
+    if (unlikely(fault_probability && fault(thread_local_rng)))
+    {
+        if (memoryTrackerCanThrow(level, true) && throw_if_memory_exceeded)
+        {
+            /// Revert
+            amount.fetch_sub(size, std::memory_order_relaxed);
+
+            /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
+            MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
+
+            ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
+            const auto * description = description_ptr.load(std::memory_order_relaxed);
+            throw DB::Exception(
+                DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED,
+                "Memory tracker{}{}: fault injected. Would use {} (attempt to allocate chunk of {} bytes), maximum: {}",
+                description ? " " : "",
+                description ? description : "",
+                formatReadableSizeWithBinarySuffix(will_be),
+                size,
+                formatReadableSizeWithBinarySuffix(current_hard_limit));
+        }
+        else
+            memory_limit_exceeded_ignored = true;
+    }
+
+
+    if (unlikely(current_hard_limit && will_be > current_hard_limit))
+    {
+        if (memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded)
        {
            OvercommitResult overcommit_result = OvercommitResult::NONE;
            if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed); overcommit_tracker_ptr != nullptr && query_tracker != nullptr)
@ -238,8 +247,16 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
                will_be = amount.load(std::memory_order_relaxed);
            }
        }
+        else
+            memory_limit_exceeded_ignored = true;
+    }

-    bool peak_updated;
+    bool peak_updated = false;
+    /// In case of MEMORY_LIMIT_EXCEEDED was ignored, will_be may include
+    /// memory of other allocations, that may fail but not reverted yet, and so
+    /// updating peak will be inaccurate.
+    if (!memory_limit_exceeded_ignored)
+    {
        if (throw_if_memory_exceeded)
        {
            /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
@ -252,6 +269,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
            bool log_memory_usage = false;
            peak_updated = updatePeak(will_be, log_memory_usage);
        }
+    }

    if (peak_updated && allocation_traced)
    {
--- a/src/Common/OpenTelemetryTraceContext.cpp
+++ b/src/Common/OpenTelemetryTraceContext.cpp
@ -0,0 +1,334 @@
+#include "Interpreters/OpenTelemetrySpanLog.h"
+
+#include <random>
+#include <base/getThreadId.h>
+#include <Common/Exception.h>
+#include <Common/hex.h>
+#include <Core/Settings.h>
+#include <IO/WriteHelpers.h>
+
+namespace DB
+{
+namespace OpenTelemetry
+{
+
+thread_local TracingContextOnThread current_thread_trace_context;
+
+void Span::addAttribute(std::string_view name, UInt64 value)
+{
+    if (!this->isTraceEnabled() || name.empty())
+        return;
+
+    this->attributes.push_back(Tuple{name, toString(value)});
+}
+
+void Span::addAttributeIfNotZero(std::string_view name, UInt64 value)
+{
+    if (value != 0)
+        addAttribute(name, value);
+}
+
+void Span::addAttribute(std::string_view name, std::string_view value)
+{
+    if (!this->isTraceEnabled() || name.empty())
+        return;
+
+    this->attributes.push_back(Tuple{name, value});
+}
+
+void Span::addAttributeIfNotEmpty(std::string_view name, std::string_view value)
+{
+    if (!this->isTraceEnabled() || name.empty() || value.empty())
+        return;
+
+    this->attributes.push_back(Tuple{name, value});
+}
+
+void Span::addAttribute(std::string_view name, std::function<String()> value_supplier)
+{
+    if (!this->isTraceEnabled() || !value_supplier)
+        return;
+
+    String value = value_supplier();
+    if (value.empty())
+        return;
+
+    this->attributes.push_back(Tuple{name, value});
+}
+
+void Span::addAttribute(const Exception & e) noexcept
+{
+    if (!this->isTraceEnabled())
+        return;
+
+    try
+    {
+        this->attributes.push_back(Tuple{"clickhouse.exception", getExceptionMessage(e, false)});
+    }
+    catch (...)
+    {
+        /// Ignore exceptions
+    }
+}
+
+void Span::addAttribute(std::exception_ptr e) noexcept
+{
+    if (!this->isTraceEnabled() || e == nullptr)
+        return;
+
+    try
+    {
+        this->attributes.push_back(Tuple{"clickhouse.exception", getExceptionMessage(e, false)});
+    }
+    catch (...)
+    {
+        /// Ignore exceptions
+    }
+}
+
+SpanHolder::SpanHolder(std::string_view _operation_name)
+{
+    if (current_thread_trace_context.isTraceEnabled())
+    {
+        this->trace_id = current_thread_trace_context.trace_id;
+        this->parent_span_id = current_thread_trace_context.span_id;
+        this->span_id = thread_local_rng(); // create a new id for this span
+        this->operation_name = _operation_name;
+        this->start_time_us
+            = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
+
+        // set current span id to this
+        current_thread_trace_context.span_id = this->span_id;
+    }
+}
+
+void SpanHolder::finish() noexcept
+{
+    if (!this->isTraceEnabled())
+        return;
+
+    // First of all, restore old value of current span.
+    assert(current_thread_trace_context.span_id == span_id);
+    current_thread_trace_context.span_id = parent_span_id;
+
+    try
+    {
+        auto log = current_thread_trace_context.span_log.lock();
+        if (!log)
+        {
+            // The log might be disabled.
+            return;
+        }
+
+        this->finish_time_us
+            = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
+
+        log->add(OpenTelemetrySpanLogElement(*this));
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__FUNCTION__);
+    }
+
+    trace_id = UUID();
+}
+
+SpanHolder::~SpanHolder()
+{
+    finish();
+}
+
+bool TracingContext::parseTraceparentHeader(std::string_view traceparent, String & error)
+{
+    trace_id = 0;
+
+    // Version 00, which is the only one we can parse, is fixed width. Use this
+    // fact for an additional sanity check.
+    const int expected_length = strlen("xx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxx-xx");
+    if (traceparent.length() != expected_length)
+    {
+        error = fmt::format("unexpected length {}, expected {}", traceparent.length(), expected_length);
+        return false;
+    }
+
+    const char * data = traceparent.data();
+
+    uint8_t version = unhex2(data);
+    data += 2;
+
+    if (version != 0)
+    {
+        error = fmt::format("unexpected version {}, expected 00", version);
+        return false;
+    }
+
+    if (*data != '-')
+    {
+        error = fmt::format("Malformed traceparant header: {}", traceparent);
+        return false;
+    }
+
+    ++data;
+    UInt64 trace_id_higher_64 = unhexUInt<UInt64>(data);
+    UInt64 trace_id_lower_64 = unhexUInt<UInt64>(data + 16);
+    data += 32;
+
+    if (*data != '-')
+    {
+        error = fmt::format("Malformed traceparant header: {}", traceparent);
+        return false;
+    }
+
+    ++data;
+    UInt64 span_id_64 = unhexUInt<UInt64>(data);
+    data += 16;
+
+    if (*data != '-')
+    {
+        error = fmt::format("Malformed traceparant header: {}", traceparent);
+        return false;
+    }
+
+    ++data;
+    this->trace_flags = unhex2(data);
+    this->trace_id.toUnderType().items[0] = trace_id_higher_64;
+    this->trace_id.toUnderType().items[1] = trace_id_lower_64;
+    this->span_id = span_id_64;
+    return true;
+}
+
+String TracingContext::composeTraceparentHeader() const
+{
+    // This span is a parent for its children, so we specify this span_id as a
+    // parent id.
+    return fmt::format(
+        "00-{:016x}{:016x}-{:016x}-{:02x}",
+        trace_id.toUnderType().items[0],
+        trace_id.toUnderType().items[1],
+        span_id,
+        // This cast is needed because fmt is being weird and complaining that
+        // "mixing character types is not allowed".
+        static_cast<uint8_t>(trace_flags));
+}
+
+const TracingContextOnThread & CurrentContext()
+{
+    return current_thread_trace_context;
+}
+
+void TracingContextOnThread::reset()
+{
+    this->trace_id = UUID();
+    this->span_id = 0;
+    this->trace_flags = TRACE_FLAG_NONE;
+    this->tracestate = "";
+    this->span_log.reset();
+}
+
+TracingContextHolder::TracingContextHolder(
+    std::string_view _operation_name,
+    TracingContext _parent_trace_context,
+    const Settings * settings_ptr,
+    const std::weak_ptr<OpenTelemetrySpanLog> & _span_log)
+{
+    if (current_thread_trace_context.isTraceEnabled())
+    {
+        ///
+        /// This is not the normal case,
+        /// it means that construction of current object is not at the start of current thread.
+        /// Usually this is due to:
+        ///    1. bad design
+        ///    2. right design but code changes so that original point where this object is constructing is not the new start execution of current thread
+        ///
+        /// In such case, we should use current context as parent of this new constructing object,
+        /// So this branch ensures this class can be instantiated multiple times on one same thread safely.
+        ///
+        this->is_context_owner = false;
+        this->root_span.trace_id = current_thread_trace_context.trace_id;
+        this->root_span.parent_span_id = current_thread_trace_context.span_id;
+        this->root_span.span_id = thread_local_rng();
+        this->root_span.operation_name = _operation_name;
+        this->root_span.start_time_us
+            = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
+
+        current_thread_trace_context.span_id = this->root_span.span_id;
+        return;
+    }
+
+    if (!_parent_trace_context.isTraceEnabled())
+    {
+        if (settings_ptr == nullptr)
+            /// skip tracing context initialization on current thread
+            return;
+
+        // start the trace ourselves, with some configurable probability.
+        std::bernoulli_distribution should_start_trace{settings_ptr->opentelemetry_start_trace_probability};
+        if (!should_start_trace(thread_local_rng))
+            /// skip tracing context initialization on current thread
+            return;
+
+        while (_parent_trace_context.trace_id == UUID())
+        {
+            // make sure the random generated trace_id is not 0 which is an invalid id
+            _parent_trace_context.trace_id.toUnderType().items[0] = thread_local_rng(); //-V656
+            _parent_trace_context.trace_id.toUnderType().items[1] = thread_local_rng(); //-V656
+        }
+        _parent_trace_context.span_id = 0;
+    }
+
+    this->root_span.trace_id = _parent_trace_context.trace_id;
+    this->root_span.parent_span_id = _parent_trace_context.span_id;
+    this->root_span.span_id = thread_local_rng();
+    this->root_span.operation_name = _operation_name;
+    this->root_span.start_time_us
+        = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
+
+    /// This object is created to initialize tracing context on a new thread,
+    /// it's helpful to record the thread_id so that we know the thread switching from the span log
+    this->root_span.addAttribute("clickhouse.thread_id", getThreadId());
+
+    /// set up trace context on current thread
+    current_thread_trace_context = _parent_trace_context;
+    current_thread_trace_context.span_id = this->root_span.span_id;
+    current_thread_trace_context.trace_flags = TRACE_FLAG_SAMPLED;
+    current_thread_trace_context.span_log = _span_log;
+}
+
+TracingContextHolder::~TracingContextHolder()
+{
+    if (!this->root_span.isTraceEnabled())
+    {
+        return;
+    }
+
+    try
+    {
+        auto shared_span_log = current_thread_trace_context.span_log.lock();
+        if (shared_span_log)
+        {
+            this->root_span.finish_time_us
+                = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
+
+            shared_span_log->add(OpenTelemetrySpanLogElement(this->root_span));
+        }
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__FUNCTION__);
+    }
+
+    this->root_span.trace_id = UUID();
+
+    if (this->is_context_owner)
+    {
+        /// Clear the context on current thread
+        current_thread_trace_context.reset();
+    }
+    else
+    {
+        current_thread_trace_context.span_id = this->root_span.parent_span_id;
+    }
+}
+
+}
+}
--- a/src/Common/OpenTelemetryTraceContext.h
+++ b/src/Common/OpenTelemetryTraceContext.h
@ -1,24 +1,161 @@
 #pragma once

-#include <base/types.h>
-#include <base/UUID.h>
+#include <Core/Field.h>

 namespace DB
 {

-// The runtime info we need to create new OpenTelemetry spans.
-struct OpenTelemetryTraceContext
+struct Settings;
+class OpenTelemetrySpanLog;
+
+namespace OpenTelemetry
+{
+
+struct Span
+{
+    UUID trace_id{};
+    UInt64 span_id = 0;
+    UInt64 parent_span_id = 0;
+    String operation_name;
+    UInt64 start_time_us = 0;
+    UInt64 finish_time_us = 0;
+    Map attributes;
+
+    void addAttribute(std::string_view name, UInt64 value);
+    void addAttributeIfNotZero(std::string_view name, UInt64 value);
+    void addAttribute(std::string_view name, std::string_view value);
+    void addAttributeIfNotEmpty(std::string_view name, std::string_view value);
+    void addAttribute(std::string_view name, std::function<String()> value_supplier);
+
+    /// Following two methods are declared as noexcept to make sure they're exception safe
+    /// This is because they're usually called in exception handler
+    void addAttribute(const Exception & e) noexcept;
+    void addAttribute(std::exception_ptr e) noexcept;
+
+    bool isTraceEnabled() const
+    {
+        return trace_id != UUID();
+    }
+};
+
+/// See https://www.w3.org/TR/trace-context/ for trace_flags definition
+enum TraceFlags : UInt8
+{
+    TRACE_FLAG_NONE = 0,
+    TRACE_FLAG_SAMPLED = 1,
+};
+
+/// The runtime info we need to create new OpenTelemetry spans.
+struct TracingContext
 {
    UUID trace_id{};
    UInt64 span_id = 0;
    // The incoming tracestate header and the trace flags, we just pass them
    // downstream. See https://www.w3.org/TR/trace-context/
    String tracestate;
-    UInt8 trace_flags = 0;
+    UInt8 trace_flags = TRACE_FLAG_NONE;

    // Parse/compose OpenTelemetry traceparent header.
-    bool parseTraceparentHeader(const std::string & traceparent, std::string & error);
-    std::string composeTraceparentHeader() const;
+    bool parseTraceparentHeader(std::string_view traceparent, String & error);
+    String composeTraceparentHeader() const;
+
+    bool isTraceEnabled() const
+    {
+        return trace_id != UUID();
+    }
+};
+
+/// Tracing context kept on each thread
+struct TracingContextOnThread : TracingContext
+{
+    TracingContextOnThread& operator =(const TracingContext& context)
+    {
+        *(static_cast<TracingContext*>(this)) = context;
+        return *this;
+    }
+
+    void reset();
+
+    /// Use weak_ptr instead of shared_ptr to hold a reference to the underlying system.opentelemetry_span_log table
+    /// Since this object is kept on threads and passed across threads, a weak_ptr is more safe to prevent potential leak
+    std::weak_ptr<OpenTelemetrySpanLog> span_log;
+};
+
+/// Get tracing context on current thread
+const TracingContextOnThread& CurrentContext();
+
+/// Holder of tracing context.
+/// It should be initialized at the beginning of each thread execution.
+/// And once it's destructed, it clears the context automatically.
+///
+/// It's also the root of all spans on current thread execution.
+///
+/// Although it's SAFE to construct this object multiple times on one same thread, it should be created at the beginning of one thread execution.
+struct TracingContextHolder
+{
+    /// Forbidden copy ctor and assignment to make the destructor safe
+    TracingContextHolder(const TracingContextHolder& scope) = delete;
+    TracingContextHolder& operator =(const TracingContextHolder& scope) = delete;
+
+    TracingContextHolder(std::string_view _operation_name,
+        const TracingContext& _parent_trace_context,
+        const std::weak_ptr<OpenTelemetrySpanLog>& _log)
+        : TracingContextHolder(_operation_name,
+            _parent_trace_context,
+            nullptr,
+            _log)
+    {
+    }
+
+    /// Initialize a tracing context on a child thread based on the context from the parent thread
+    TracingContextHolder(std::string_view _operation_name, const TracingContextOnThread & _parent_thread_trace_context)
+        : TracingContextHolder(_operation_name,
+            _parent_thread_trace_context,
+            nullptr,
+            _parent_thread_trace_context.span_log)
+    {
+    }
+
+    /// For servers like HTTP/TCP/GRPC to initialize tracing context on thread that process requests from clients
+    TracingContextHolder(std::string_view _operation_name,
+        TracingContext _parent_trace_context,
+        const Settings & _settings,
+        const std::weak_ptr<OpenTelemetrySpanLog> & _log)
+        : TracingContextHolder(_operation_name,
+            _parent_trace_context,
+            &_settings,
+            _log)
+    {
+    }
+
+    TracingContextHolder(std::string_view _operation_name,
+        TracingContext _parent_trace_context,
+        const Settings* settings_ptr,
+        const std::weak_ptr<OpenTelemetrySpanLog> & _log);
+
+    ~TracingContextHolder();
+
+    Span root_span;
+
+private:
+    bool is_context_owner = true;
+};
+
+using TracingContextHolderPtr = std::unique_ptr<TracingContextHolder>;
+
+/// A span holder that creates span automatically in a (function) scope if tracing is enabled.
+/// Once it's created or destructed, it automatically maitains the tracing context on the thread that it lives.
+struct SpanHolder : public Span
+{
+    SpanHolder(std::string_view);
+    ~SpanHolder();
+
+    /// Finish a span explicitly if needed.
+    /// It's safe to call it multiple times
+    void finish() noexcept;
 };

 }
+
+}
+
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -318,6 +318,7 @@ The server successfully detected this situation and will download merged part fr
    \
    M(FileSegmentWaitReadBufferMicroseconds, "Metric per file segment. Time spend waiting for internal read buffer (includes cache waiting)") \
    M(FileSegmentReadMicroseconds, "Metric per file segment. Time spend reading from file") \
+    M(FileSegmentWriteMicroseconds, "Metric per file segment. Time spend writing cache") \
    M(FileSegmentCacheWriteMicroseconds, "Metric per file segment. Time spend writing data to cache") \
    M(FileSegmentPredownloadMicroseconds, "Metric per file segment. Time spent predownloading data to cache (predownloading - finishing file segment download (after someone who failed to do that) up to the point current thread was requested to do)") \
    M(FileSegmentUsedBytes, "Metric per file segment. How many bytes were actually used from current file segment") \
--- a/src/Common/SLRUCachePolicy.h
+++ b/src/Common/SLRUCachePolicy.h
@ -33,7 +33,7 @@ public:
      * max_protected_size shows how many of the most frequently used entries will not be evicted after a sequential scan.
      * max_protected_size == 0 means that the default protected size is equal to half of the total max size.
      */
-    /// TODO: construct from special struct with cache policy parametrs (also with max_protected_size).
+    /// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
    SLRUCachePolicy(size_t max_size_, size_t max_elements_size_ = 0, double size_ratio = 0.5, OnWeightLossFunction on_weight_loss_function_ = {})
        : max_protected_size(max_size_ * std::min(1.0, size_ratio))
        , max_size(max_size_)
--- a/src/Common/Stopwatch.h
+++ b/src/Common/Stopwatch.h
@ -31,7 +31,7 @@ inline UInt64 clock_gettime_ns_adjusted(UInt64 prev_time, clockid_t clock_type =
 }

 /** Differs from Poco::Stopwatch only by using 'clock_gettime' instead of 'gettimeofday',
-  *  returns nanoseconds instead of microseconds, and also by other minor differencies.
+  *  returns nanoseconds instead of microseconds, and also by other minor differences.
  */
 class Stopwatch
 {
@ -152,4 +152,3 @@ private:
    /// Most significant bit is a lock. When it is set, compareAndRestartDeferred method will return false.
    UInt64 nanoseconds(UInt64 prev_time) const { return clock_gettime_ns_adjusted(prev_time, clock_type) & 0x7FFFFFFFFFFFFFFFULL; }
 };
-
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@ -2,6 +2,7 @@
 #include <Common/setThreadName.h>
 #include <Common/Exception.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/OpenTelemetryTraceContext.h>

 #include <cassert>
 #include <iostream>
@ -86,7 +87,7 @@ void ThreadPoolImpl<Thread>::setQueueSize(size_t value)

 template <typename Thread>
 template <typename ReturnType>
-ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, int priority, std::optional<uint64_t> wait_microseconds)
+ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, int priority, std::optional<uint64_t> wait_microseconds, bool propagate_opentelemetry_tracing_context)
 {
    auto on_error = [&](const std::string & reason)
    {
@ -149,7 +150,11 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, int priority, std::opti
            }
        }

-        jobs.emplace(std::move(job), priority);
+        jobs.emplace(std::move(job),
+                     priority,
+                     /// Tracing context on this thread is used as parent context for the sub-thread that runs the job
+                     propagate_opentelemetry_tracing_context ? DB::OpenTelemetry::CurrentContext() : DB::OpenTelemetry::TracingContextOnThread());
+
        ++scheduled_jobs;
        new_job_or_shutdown.notify_one();
    }
@ -170,9 +175,9 @@ bool ThreadPoolImpl<Thread>::trySchedule(Job job, int priority, uint64_t wait_mi
 }

 template <typename Thread>
-void ThreadPoolImpl<Thread>::scheduleOrThrow(Job job, int priority, uint64_t wait_microseconds)
+void ThreadPoolImpl<Thread>::scheduleOrThrow(Job job, int priority, uint64_t wait_microseconds, bool propagate_opentelemetry_tracing_context)
 {
-    scheduleImpl<void>(std::move(job), priority, wait_microseconds);
+    scheduleImpl<void>(std::move(job), priority, wait_microseconds, propagate_opentelemetry_tracing_context);
 }

 template <typename Thread>
@ -250,6 +255,9 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
        Job job;
        bool need_shutdown = false;

+        /// A copy of parent trace context
+        DB::OpenTelemetry::TracingContextOnThread parent_thead_trace_context;
+
        {
            std::unique_lock lock(mutex);
            new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); });
@ -260,6 +268,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
                /// boost::priority_queue does not provide interface for getting non-const reference to an element
                /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
                job = std::move(const_cast<Job &>(jobs.top().job));
+                parent_thead_trace_context = std::move(const_cast<DB::OpenTelemetry::TracingContextOnThread &>(jobs.top().thread_trace_context));
                jobs.pop();
            }
            else
@ -272,22 +281,40 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_

        if (!need_shutdown)
        {
+            ALLOW_ALLOCATIONS_IN_SCOPE;
+
+            /// Set up tracing context for this thread by its parent context
+            DB::OpenTelemetry::TracingContextHolder thread_trace_context("ThreadPool::worker()", parent_thead_trace_context);
+
            try
            {
-                ALLOW_ALLOCATIONS_IN_SCOPE;
                CurrentMetrics::Increment metric_active_threads(
                    std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);

                job();
+
+                if (thread_trace_context.root_span.isTraceEnabled())
+                {
+                    /// Use the thread name as operation name so that the tracing log will be more clear.
+                    /// The thread name is usually set in the jobs, we can only get the name after the job finishes
+                    std::string thread_name = getThreadName();
+                    if (!thread_name.empty())
+                        thread_trace_context.root_span.operation_name = thread_name;
+                }
+
                /// job should be reset before decrementing scheduled_jobs to
                /// ensure that the Job destroyed before wait() returns.
                job = {};
+                parent_thead_trace_context.reset();
            }
            catch (...)
            {
+                thread_trace_context.root_span.addAttribute(std::current_exception());
+
                /// job should be reset before decrementing scheduled_jobs to
                /// ensure that the Job destroyed before wait() returns.
                job = {};
+                parent_thead_trace_context.reset();

                {
                    std::lock_guard lock(mutex);
@ -323,7 +350,8 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_


 template class ThreadPoolImpl<std::thread>;
-template class ThreadPoolImpl<ThreadFromGlobalPool>;
+template class ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>;
+template class ThreadFromGlobalPoolImpl<true>;

 std::unique_ptr<GlobalThreadPool> GlobalThreadPool::the_instance;

--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@ -14,6 +14,7 @@

 #include <Poco/Event.h>
 #include <Common/ThreadStatus.h>
+#include <Common/OpenTelemetryTraceContext.h>
 #include <base/scope_guard.h>

 /** Very simple thread pool similar to boost::threadpool.
@ -55,7 +56,7 @@ public:
    bool trySchedule(Job job, int priority = 0, uint64_t wait_microseconds = 0) noexcept;

    /// Similar to scheduleOrThrowOnError(...). Wait for specified amount of time and schedule a job or throw an exception.
-    void scheduleOrThrow(Job job, int priority = 0, uint64_t wait_microseconds = 0);
+    void scheduleOrThrow(Job job, int priority = 0, uint64_t wait_microseconds = 0, bool propagate_opentelemetry_tracing_context = true);

    /// Wait for all currently active jobs to be done.
    /// You may call schedule and wait many times in arbitrary order.
@ -96,9 +97,10 @@ private:
    {
        Job job;
        int priority;
+        DB::OpenTelemetry::TracingContextOnThread thread_trace_context;

-        JobWithPriority(Job job_, int priority_)
-            : job(job_), priority(priority_) {}
+        JobWithPriority(Job job_, int priority_, const DB::OpenTelemetry::TracingContextOnThread& thread_trace_context_)
+            : job(job_), priority(priority_), thread_trace_context(thread_trace_context_) {}

        bool operator< (const JobWithPriority & rhs) const
        {
@ -111,7 +113,7 @@ private:
    std::exception_ptr first_exception;

    template <typename ReturnType>
-    ReturnType scheduleImpl(Job job, int priority, std::optional<uint64_t> wait_microseconds);
+    ReturnType scheduleImpl(Job job, int priority, std::optional<uint64_t> wait_microseconds, bool propagate_opentelemetry_tracing_context = true);

    void worker(typename std::list<Thread>::iterator thread_it);

@ -154,14 +156,18 @@ public:

 /** Looks like std::thread but allocates threads in GlobalThreadPool.
  * Also holds ThreadStatus for ClickHouse.
+  *
+  * NOTE: User code should use 'ThreadFromGlobalPool' declared below instead of directly using this class.
+  *
  */
-class ThreadFromGlobalPool : boost::noncopyable
+template <bool propagate_opentelemetry_context = true>
+class ThreadFromGlobalPoolImpl : boost::noncopyable
 {
 public:
-    ThreadFromGlobalPool() = default;
+    ThreadFromGlobalPoolImpl() = default;

    template <typename Function, typename... Args>
-    explicit ThreadFromGlobalPool(Function && func, Args &&... args)
+    explicit ThreadFromGlobalPoolImpl(Function && func, Args &&... args)
        : state(std::make_shared<State>())
    {
        /// NOTE:
@ -185,15 +191,19 @@ public:
            /// before sending signal that permits to join this thread.
            DB::ThreadStatus thread_status;
            std::apply(function, arguments);
-        });
+        },
+        0, // default priority
+        0, // default wait_microseconds
+        propagate_opentelemetry_context
+        );
    }

-    ThreadFromGlobalPool(ThreadFromGlobalPool && rhs) noexcept
+    ThreadFromGlobalPoolImpl(ThreadFromGlobalPoolImpl && rhs) noexcept
    {
        *this = std::move(rhs);
    }

-    ThreadFromGlobalPool & operator=(ThreadFromGlobalPool && rhs) noexcept
+    ThreadFromGlobalPoolImpl & operator=(ThreadFromGlobalPoolImpl && rhs) noexcept
    {
        if (initialized())
            abort();
@ -201,7 +211,7 @@ public:
        return *this;
    }

-    ~ThreadFromGlobalPool()
+    ~ThreadFromGlobalPoolImpl()
    {
        if (initialized())
            abort();
@ -233,7 +243,7 @@ public:
        return true;
    }

-private:
+protected:
    struct State
    {
        /// Should be atomic() because of possible concurrent access between
@ -254,6 +264,19 @@ private:
    }
 };

-
 /// Recommended thread pool for the case when multiple thread pools are created and destroyed.
-using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPool>;
+///
+/// The template parameter of ThreadFromGlobalPool is set to false to disable tracing context propagation to underlying worker.
+/// Because ThreadFromGlobalPool schedules a job upon GlobalThreadPool, this means there will be two workers to schedule a job in 'ThreadPool',
+/// one is at GlobalThreadPool level, the other is at ThreadPool level, so tracing context will be initialized on the same thread twice.
+///
+/// Once the worker on ThreadPool gains the control of execution, it won't return until it's shutdown,
+/// which means the tracing context initialized at underlying worker level won't be delete for a very long time.
+/// This would cause wrong context for further jobs scheduled in ThreadPool.
+///
+/// To make sure the tracing context are correctly propagated, we explicitly disable context propagation(including initialization and de-initialization) at underlying worker level.
+///
+using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>;
+
+/// An alias for user code to execute a job in the global thread pool
+using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@ -3,7 +3,6 @@
 #include <Common/QueryProfiler.h>
 #include <Common/ThreadStatus.h>
 #include <base/errnoToString.h>
-#include <Interpreters/OpenTelemetrySpanLog.h>
 #include <Interpreters/Context.h>

 #include <Poco/Logger.h>
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@ -4,7 +4,6 @@
 #include <Interpreters/Context_fwd.h>
 #include <IO/Progress.h>
 #include <Common/MemoryTracker.h>
-#include <Common/OpenTelemetryTraceContext.h>
 #include <Common/ProfileEvents.h>
 #include <base/StringRef.h>
 #include <Common/ConcurrentBoundedQueue.h>
@ -33,7 +32,6 @@ class ThreadStatus;
 class QueryProfilerReal;
 class QueryProfilerCPU;
 class QueryThreadLog;
-struct OpenTelemetrySpanHolder;
 class TasksStatsCounters;
 struct RUsageCounters;
 struct PerfEventsCounters;
@ -135,8 +133,6 @@ public:
    Int64 untracked_memory = 0;
    /// Each thread could new/delete memory in range of (-untracked_memory_limit, untracked_memory_limit) without access to common counters.
    Int64 untracked_memory_limit = 4 * 1024 * 1024;
-    /// Increase limit in case of exception.
-    Int64 untracked_memory_limit_increase = 0;

    /// Statistics of read and write rows/bytes
    Progress progress_in;
@ -145,12 +141,6 @@ public:
    using Deleter = std::function<void()>;
    Deleter deleter;

-    // This is the current most-derived OpenTelemetry span for this thread. It
-    // can be changed throughout the query execution, whenever we enter a new
-    // span or exit it. See OpenTelemetrySpanHolder that is normally responsible
-    // for these changes.
-    OpenTelemetryTraceContext thread_trace_context;
-
 protected:
    ThreadGroupStatusPtr thread_group;

--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@ -497,7 +497,7 @@ private:
    /// last index of offsets that was not processed
    size_t last;

-    /// limit for adding to hashtable. In worst case with case insentive search, the table will be filled at most as half
+    /// limit for adding to hashtable. In worst case with case insensitive search, the table will be filled at most as half
    static constexpr size_t small_limit = VolnitskyTraits::hash_size / 8;

 public:
--- a/src/Common/tests/gtest_lru_file_cache.cpp
+++ b/src/Common/tests/gtest_lru_file_cache.cpp
@ -1,514 +0,0 @@
-#include <iomanip>
-#include <iostream>
-#include <gtest/gtest.h>
-#include <Common/FileCache.h>
-#include <Common/FileSegment.h>
-#include <Common/CurrentThread.h>
-#include <Common/filesystemHelpers.h>
-#include <Common/FileCacheSettings.h>
-#include <Common/tests/gtest_global_context.h>
-#include <Common/SipHash.h>
-#include <Common/hex.h>
-#include <Interpreters/Context.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <filesystem>
-#include <thread>
-
-namespace fs = std::filesystem;
-
-fs::path caches_dir = fs::current_path() / "lru_cache_test";
-String cache_base_path = caches_dir / "cache1" / "";
-
-void assertRange(
-    [[maybe_unused]] size_t assert_n, DB::FileSegmentPtr file_segment,
-    const DB::FileSegment::Range & expected_range, DB::FileSegment::State expected_state)
-{
-    auto range = file_segment->range();
-
-    std::cerr << fmt::format("\nAssert #{} : {} == {} (state: {} == {})\n", assert_n,
-                             range.toString(), expected_range.toString(),
-                             toString(file_segment->state()), toString(expected_state));
-
-    ASSERT_EQ(range.left, expected_range.left);
-    ASSERT_EQ(range.right, expected_range.right);
-    ASSERT_EQ(file_segment->state(), expected_state);
-}
-
-void printRanges(const auto & segments)
-{
-    std::cerr << "\nHaving file segments: ";
-    for (const auto & segment : segments)
-        std::cerr << '\n' << segment->range().toString() << " (state: " + DB::FileSegment::stateToString(segment->state()) + ")" << "\n";
-}
-
-std::vector<DB::FileSegmentPtr> fromHolder(const DB::FileSegmentsHolder & holder)
-{
-    return std::vector<DB::FileSegmentPtr>(holder.file_segments.begin(), holder.file_segments.end());
-}
-
-String getFileSegmentPath(const String & base_path, const DB::FileCache::Key & key, size_t offset)
-{
-    auto key_str = key.toString();
-    return fs::path(base_path) / key_str.substr(0, 3) / key_str / DB::toString(offset);
-}
-
-void download(DB::FileSegmentPtr file_segment)
-{
-    const auto & key = file_segment->key();
-    size_t size = file_segment->range().size();
-
-    auto key_str = key.toString();
-    auto subdir = fs::path(cache_base_path) / key_str.substr(0, 3) / key_str;
-    if (!fs::exists(subdir))
-        fs::create_directories(subdir);
-
-    std::string data(size, '0');
-    file_segment->write(data.data(), size, file_segment->getDownloadOffset());
-}
-
-void prepareAndDownload(DB::FileSegmentPtr file_segment)
-{
-    // std::cerr << "Reserving: " << file_segment->range().size() << " for: " << file_segment->range().toString() << "\n";
-    ASSERT_TRUE(file_segment->reserve(file_segment->range().size()));
-    download(file_segment);
-}
-
-void complete(const DB::FileSegmentsHolder & holder)
-{
-    for (const auto & file_segment : holder.file_segments)
-    {
-        ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        prepareAndDownload(file_segment);
-        file_segment->completeWithState(DB::FileSegment::State::DOWNLOADED);
-    }
-}
-
-
-TEST(FileCache, get)
-{
-    if (fs::exists(cache_base_path))
-        fs::remove_all(cache_base_path);
-    fs::create_directories(cache_base_path);
-
-    DB::ThreadStatus thread_status;
-
-    /// To work with cache need query_id and query context.
-    std::string query_id = "query_id";
-    auto query_context = DB::Context::createCopy(getContext().context);
-    query_context->makeQueryContext();
-    query_context->setCurrentQueryId(query_id);
-    DB::CurrentThread::QueryScope query_scope_holder(query_context);
-
-    DB::FileCacheSettings settings;
-    settings.max_size = 30;
-    settings.max_elements = 5;
-    auto cache = DB::FileCache(cache_base_path, settings);
-    cache.initialize();
-    auto key = cache.hash("key1");
-
-    {
-        auto holder = cache.getOrSet(key, 0, 10, false);  /// Add range [0, 9]
-        auto segments = fromHolder(holder);
-        /// Range was not present in cache. It should be added in cache as one while file segment.
-        ASSERT_EQ(segments.size(), 1);
-
-        assertRange(1, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
-
-        /// Exception because space not reserved.
-        /// EXPECT_THROW(download(segments[0]), DB::Exception);
-        /// Exception because space can be reserved only by downloader
-        /// EXPECT_THROW(segments[0]->reserve(segments[0]->range().size()), DB::Exception);
-
-        ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(segments[0]->reserve(segments[0]->range().size()));
-        assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING);
-
-        download(segments[0]);
-        segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-        assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-    }
-
-    /// Current cache:    [__________]
-    ///                   ^          ^
-    ///                   0          9
-    ASSERT_EQ(cache.getFileSegmentsNum(), 1);
-    ASSERT_EQ(cache.getUsedCacheSize(), 10);
-
-    {
-        /// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
-        auto holder = cache.getOrSet(key, 5, 10, false);
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 2);
-
-        assertRange(4, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-        assertRange(5, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::EMPTY);
-
-        ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        prepareAndDownload(segments[1]);
-        segments[1]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-        assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
-    }
-
-    /// Current cache:    [__________][_____]
-    ///                   ^          ^^     ^
-    ///                   0          910    14
-    ASSERT_EQ(cache.getFileSegmentsNum(), 2);
-    ASSERT_EQ(cache.getUsedCacheSize(), 15);
-
-    {
-        auto holder = cache.getOrSet(key, 9, 1, false);  /// Get [9, 9]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 1);
-        assertRange(7, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-    }
-
-    {
-        auto holder = cache.getOrSet(key, 9, 2, false);  /// Get [9, 10]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 2);
-        assertRange(8, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-        assertRange(9, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
-    }
-
-    {
-        auto holder = cache.getOrSet(key, 10, 1, false);  /// Get [10, 10]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 1);
-        assertRange(10, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
-    }
-
-    complete(cache.getOrSet(key, 17, 4, false)); /// Get [17, 20]
-    complete(cache.getOrSet(key, 24, 3, false)); /// Get [24, 26]
-    /// complete(cache.getOrSet(key, 27, 1, false)); /// Get [27, 27]
-
-    /// Current cache:    [__________][_____]   [____]    [___][]
-    ///                   ^          ^^     ^   ^    ^    ^   ^^^
-    ///                   0          910    14  17   20   24  2627
-    ///
-    ASSERT_EQ(cache.getFileSegmentsNum(), 4);
-    ASSERT_EQ(cache.getUsedCacheSize(), 22);
-
-    {
-        auto holder = cache.getOrSet(key, 0, 26, false); /// Get [0, 25]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 6);
-
-        assertRange(11, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-        assertRange(12, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
-
-        /// Missing [15, 16] should be added in cache.
-        assertRange(13, segments[2], DB::FileSegment::Range(15, 16), DB::FileSegment::State::EMPTY);
-
-        ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        prepareAndDownload(segments[2]);
-
-        segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-
-        assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
-
-        /// New [21, 23], but will not be added in cache because of elements limit (5)
-        assertRange(15, segments[4], DB::FileSegment::Range(21, 23), DB::FileSegment::State::EMPTY);
-        ASSERT_TRUE(segments[4]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_FALSE(segments[4]->reserve(1));
-
-        assertRange(16, segments[5], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-
-        /// Current cache:    [__________][_____][   ][____]    [___]
-        ///                   ^                            ^    ^
-        ///                   0                            20   24
-        ///
-
-        /// Range [27, 27] must be evicted in previous getOrSet [0, 25].
-        /// Let's not invalidate pointers to returned segments from range [0, 25] and
-        /// as max elements size is reached, next attempt to put something in cache should fail.
-        /// This will also check that [27, 27] was indeed evicted.
-
-        auto holder1 = cache.getOrSet(key, 27, 1, false);
-        auto segments_1 = fromHolder(holder1); /// Get [27, 27]
-        ASSERT_EQ(segments_1.size(), 1);
-        assertRange(17, segments_1[0], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
-    }
-
-    {
-        auto holder = cache.getOrSet(key, 12, 10, false); /// Get [12, 21]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 4);
-
-        assertRange(18, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
-        assertRange(19, segments[1], DB::FileSegment::Range(15, 16), DB::FileSegment::State::DOWNLOADED);
-        assertRange(20, segments[2], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
-
-        assertRange(21, segments[3], DB::FileSegment::Range(21, 21), DB::FileSegment::State::EMPTY);
-
-        ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        prepareAndDownload(segments[3]);
-
-        segments[3]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-        ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED);
-    }
-
-    /// Current cache:    [_____][__][____][_]   [___]
-    ///                   ^          ^       ^   ^   ^
-    ///                   10         17      21  24  26
-
-    ASSERT_EQ(cache.getFileSegmentsNum(), 5);
-
-    {
-        auto holder = cache.getOrSet(key, 23, 5, false); /// Get [23, 28]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 3);
-
-        assertRange(22, segments[0], DB::FileSegment::Range(23, 23), DB::FileSegment::State::EMPTY);
-        assertRange(23, segments[1], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-        assertRange(24, segments[2], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
-
-        ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        prepareAndDownload(segments[0]);
-        prepareAndDownload(segments[2]);
-        segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-        segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-    }
-
-    /// Current cache:    [____][_]  [][___][__]
-    ///                   ^       ^  ^^^   ^^  ^
-    ///                   17      21 2324  26  28
-
-    {
-        auto holder5 = cache.getOrSet(key, 2, 3,false); /// Get [2, 4]
-        auto s5 = fromHolder(holder5);
-        ASSERT_EQ(s5.size(), 1);
-        assertRange(25, s5[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::EMPTY);
-
-        auto holder1 = cache.getOrSet(key, 30, 2, false); /// Get [30, 31]
-        auto s1 = fromHolder(holder1);
-        ASSERT_EQ(s1.size(), 1);
-        assertRange(26, s1[0], DB::FileSegment::Range(30, 31), DB::FileSegment::State::EMPTY);
-
-        ASSERT_TRUE(s5[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        prepareAndDownload(s5[0]);
-        prepareAndDownload(s1[0]);
-        s5[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-        s1[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-
-        /// Current cache:    [___]       [_][___][_]   [__]
-        ///                   ^   ^       ^  ^   ^  ^   ^  ^
-        ///                   2   4       23 24  26 27  30 31
-
-        auto holder2 = cache.getOrSet(key, 23, 1, false); /// Get [23, 23]
-        auto s2 = fromHolder(holder2);
-        ASSERT_EQ(s2.size(), 1);
-
-        auto holder3 = cache.getOrSet(key, 24, 3, false); /// Get [24, 26]
-        auto s3 = fromHolder(holder3);
-        ASSERT_EQ(s3.size(), 1);
-
-        auto holder4 = cache.getOrSet(key, 27, 1, false); /// Get [27, 27]
-        auto s4 = fromHolder(holder4);
-        ASSERT_EQ(s4.size(), 1);
-
-        /// All cache is now unreleasable because pointers are still hold
-        auto holder6 = cache.getOrSet(key, 0, 40, false);
-        auto f = fromHolder(holder6);
-        ASSERT_EQ(f.size(), 9);
-
-        assertRange(27, f[0], DB::FileSegment::Range(0, 1), DB::FileSegment::State::EMPTY);
-        assertRange(28, f[2], DB::FileSegment::Range(5, 22), DB::FileSegment::State::EMPTY);
-        assertRange(29, f[6], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
-        assertRange(30, f[8], DB::FileSegment::Range(32, 39), DB::FileSegment::State::EMPTY);
-
-        ASSERT_TRUE(f[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(f[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(f[6]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(f[8]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-
-        ASSERT_FALSE(f[0]->reserve(1));
-        ASSERT_FALSE(f[2]->reserve(1));
-        ASSERT_FALSE(f[6]->reserve(1));
-        ASSERT_FALSE(f[8]->reserve(1));
-    }
-
-    {
-        auto holder = cache.getOrSet(key, 2, 3, false); /// Get [2, 4]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 1);
-        assertRange(31, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
-    }
-
-    /// Current cache:    [___]       [_][___][_]   [__]
-    ///                   ^   ^       ^  ^   ^  ^   ^  ^
-    ///                   2   4       23 24  26 27  30 31
-
-    {
-        auto holder = cache.getOrSet(key, 25, 5, false); /// Get [25, 29]
-        auto segments = fromHolder(holder);
-        ASSERT_EQ(segments.size(), 3);
-
-        assertRange(32, segments[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-        assertRange(33, segments[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
-
-        assertRange(34, segments[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
-        ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
-
-        bool lets_start_download = false;
-        std::mutex mutex;
-        std::condition_variable cv;
-
-        std::thread other_1([&]
-        {
-            DB::ThreadStatus thread_status_1;
-            auto query_context_1 = DB::Context::createCopy(getContext().context);
-            query_context_1->makeQueryContext();
-            query_context_1->setCurrentQueryId("query_id_1");
-            DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
-            thread_status_1.attachQueryContext(query_context_1);
-
-            auto holder_2 = cache.getOrSet(key, 25, 5, false); /// Get [25, 29] once again.
-            auto segments_2 = fromHolder(holder_2);
-            ASSERT_EQ(segments.size(), 3);
-
-            assertRange(35, segments_2[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-            assertRange(36, segments_2[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
-            assertRange(37, segments_2[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADING);
-
-            ASSERT_TRUE(segments[2]->getOrSetDownloader() != DB::FileSegment::getCallerId());
-            ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
-
-            {
-                std::lock_guard lock(mutex);
-                lets_start_download = true;
-            }
-            cv.notify_one();
-
-            segments_2[2]->wait();
-            ASSERT_TRUE(segments_2[2]->state() == DB::FileSegment::State::DOWNLOADED);
-        });
-
-        {
-            std::unique_lock lock(mutex);
-            cv.wait(lock, [&]{ return lets_start_download; });
-        }
-
-        prepareAndDownload(segments[2]);
-        segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-        ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED);
-
-        other_1.join();
-    }
-
-    /// Current cache:    [___]       [___][_][__][__]
-    ///                   ^   ^       ^   ^  ^^  ^^  ^
-    ///                   2   4       24  26 27  2930 31
-
-    {
-        /// Now let's check the similar case but getting ERROR state after segment->wait(), when
-        /// state is changed not manually via segment->complete(state) but from destructor of holder
-        /// and notify_all() is also called from destructor of holder.
-
-        std::optional<DB::FileSegmentsHolder> holder;
-        holder.emplace(cache.getOrSet(key, 3, 23, false)); /// Get [3, 25]
-
-        auto segments = fromHolder(*holder);
-        ASSERT_EQ(segments.size(), 3);
-
-        assertRange(38, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
-
-        assertRange(39, segments[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::EMPTY);
-        ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADING);
-
-        assertRange(40, segments[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-
-        bool lets_start_download = false;
-        std::mutex mutex;
-        std::condition_variable cv;
-
-        std::thread other_1([&]
-        {
-            DB::ThreadStatus thread_status_1;
-            auto query_context_1 = DB::Context::createCopy(getContext().context);
-            query_context_1->makeQueryContext();
-            query_context_1->setCurrentQueryId("query_id_1");
-            DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
-            thread_status_1.attachQueryContext(query_context_1);
-
-            auto holder_2 = cache.getOrSet(key, 3, 23, false); /// Get [3, 25] once again
-            auto segments_2 = fromHolder(*holder);
-            ASSERT_EQ(segments_2.size(), 3);
-
-            assertRange(41, segments_2[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
-            assertRange(42, segments_2[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADING);
-            assertRange(43, segments_2[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-
-            ASSERT_TRUE(segments_2[1]->getDownloader() != DB::FileSegment::getCallerId());
-            ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::DOWNLOADING);
-
-            {
-                std::lock_guard lock(mutex);
-                lets_start_download = true;
-            }
-            cv.notify_one();
-
-            segments_2[1]->wait();
-            printRanges(segments_2);
-            ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED);
-
-            ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            prepareAndDownload(segments_2[1]);
-            segments_2[1]->completeWithState(DB::FileSegment::State::DOWNLOADED);
-        });
-
-        {
-            std::unique_lock lock(mutex);
-            cv.wait(lock, [&]{ return lets_start_download; });
-        }
-
-        holder.reset();
-        other_1.join();
-        printRanges(segments);
-        ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADED);
-    }
-
-    /// Current cache:    [___][        ][___][_][__]
-    ///                   ^   ^^         ^   ^^  ^  ^
-    ///                   2   45       24  2627 28 29
-
-    {
-        /// Test LRUCache::restore().
-
-        auto cache2 = DB::FileCache(cache_base_path, settings);
-        cache2.initialize();
-
-        auto holder1 = cache2.getOrSet(key, 2, 28, false); /// Get [2, 29]
-
-        auto segments1 = fromHolder(holder1);
-        ASSERT_EQ(segments1.size(), 5);
-
-        assertRange(44, segments1[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
-        assertRange(45, segments1[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADED);
-        assertRange(45, segments1[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-        assertRange(46, segments1[3], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
-        assertRange(47, segments1[4], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADED);
-    }
-
-    {
-        /// Test max file segment size
-
-        auto settings2 = settings;
-        settings2.max_file_segment_size = 10;
-        auto cache2 = DB::FileCache(caches_dir / "cache2", settings2);
-        cache2.initialize();
-
-        auto holder1 = cache2.getOrSet(key, 0, 25, false); /// Get [0, 24]
-        auto segments1 = fromHolder(holder1);
-
-        ASSERT_EQ(segments1.size(), 3);
-        assertRange(48, segments1[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
-        assertRange(49, segments1[1], DB::FileSegment::Range(10, 19), DB::FileSegment::State::EMPTY);
-        assertRange(50, segments1[2], DB::FileSegment::Range(20, 24), DB::FileSegment::State::EMPTY);
-    }
-
-}
--- a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
@ -58,7 +58,7 @@ Fuzzing data consists of:
            else:
                read_key()
            if (7):
-                read_nonce (simillar to read_key)
+                read_nonce (similar to read_key)
            if (8):
                set current_key

--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -370,6 +370,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
    {
        auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, next_log_idx);

+        size_t preprocessed = 0;
        LOG_INFO(log, "Preprocessing {} log entries", log_entries->size());
        auto idx = state_machine->last_commit_index() + 1;
        for (const auto & entry : *log_entries)
@ -378,7 +379,12 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
                state_machine->pre_commit(idx, entry->get_buf());

            ++idx;
+            ++preprocessed;
+
+            if (preprocessed % 50000 == 0)
+                LOG_TRACE(log, "Preprocessed {}/{} entries", preprocessed, log_entries->size());
        }
+        LOG_INFO(log, "Preprocessing done");
    }

    loadLatestConfig();
--- a/src/Coordination/KeeperSnapshotManager.h
+++ b/src/Coordination/KeeperSnapshotManager.h
@ -27,7 +27,7 @@ enum SnapshotVersion : uint8_t

 static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V5;

-/// What is stored in binary shapsnot
+/// What is stored in binary snapshot
 struct SnapshotDeserializationResult
 {
    /// Storage
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@ -369,8 +369,16 @@ void KeeperStorage::UncommittedState::addDeltas(std::vector<Delta> new_deltas)
        const auto & added_delta = deltas.emplace_back(std::move(delta));

        if (!added_delta.path.empty())
+        {
+            deltas_for_path[added_delta.path].push_back(&added_delta);
            applyDelta(added_delta);
        }
+        else if (const auto * auth_delta = std::get_if<AddAuthDelta>(&added_delta.operation))
+        {
+            auto & uncommitted_auth = session_and_auth[auth_delta->session_id];
+            uncommitted_auth.emplace_back(&auth_delta->auth_id);
+        }
+    }
 }

 void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
@ -385,6 +393,26 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
            break;
        }

+        auto & front_delta = deltas.front();
+
+        if (!front_delta.path.empty())
+        {
+            auto & path_deltas = deltas_for_path.at(front_delta.path);
+            assert(path_deltas.front() == &front_delta);
+            path_deltas.pop_front();
+            if (path_deltas.empty())
+                deltas_for_path.erase(front_delta.path);
+        }
+        else if (auto * add_auth = std::get_if<AddAuthDelta>(&front_delta.operation))
+        {
+            auto & uncommitted_auth = session_and_auth[add_auth->session_id];
+            assert(!uncommitted_auth.empty() && uncommitted_auth.front() == &add_auth->auth_id);
+            uncommitted_auth.pop_front();
+            if (uncommitted_auth.empty())
+                session_and_auth.erase(add_auth->session_id);
+
+        }
+
        deltas.pop_front();
    }

@ -405,10 +433,12 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid)
            deltas.back().zxid,
            rollback_zxid);

+    auto delta_it = deltas.rbegin();
+
    // we need to undo ephemeral mapping modifications
    // CreateNodeDelta added ephemeral for session id -> we need to remove it
    // RemoveNodeDelta removed ephemeral for session id -> we need to add it back
-    for (auto delta_it = deltas.rbegin(); delta_it != deltas.rend(); ++delta_it)
+    for (; delta_it != deltas.rend(); ++delta_it)
    {
        if (delta_it->zxid < rollback_zxid)
            break;
@ -431,29 +461,56 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid)
                    }
                },
                delta_it->operation);
+
+            auto & path_deltas = deltas_for_path.at(delta_it->path);
+            if (path_deltas.back() == &*delta_it)
+            {
+                path_deltas.pop_back();
+                if (path_deltas.empty())
+                    deltas_for_path.erase(delta_it->path);
+            }
+        }
+        else if (auto * add_auth = std::get_if<AddAuthDelta>(&delta_it->operation))
+        {
+            auto & uncommitted_auth = session_and_auth[add_auth->session_id];
+            if (uncommitted_auth.back() == &add_auth->auth_id)
+            {
+                uncommitted_auth.pop_back();
+                if (uncommitted_auth.empty())
+                    session_and_auth.erase(add_auth->session_id);
+            }
        }
    }

-    std::erase_if(deltas, [rollback_zxid](const auto & delta) { return delta.zxid == rollback_zxid; });
+    if (delta_it == deltas.rend())
+        deltas.clear();
+    else
+        deltas.erase(delta_it.base(), deltas.end());

-    std::unordered_set<std::string> deleted_nodes;
+    absl::flat_hash_set<std::string> deleted_nodes;
    std::erase_if(
        nodes,
        [&, rollback_zxid](const auto & node)
        {
            if (node.second.zxid == rollback_zxid)
            {
-                deleted_nodes.emplace(node.first);
+                deleted_nodes.emplace(std::move(node.first));
                return true;
            }
            return false;
        });

    // recalculate all the uncommitted deleted nodes
-    for (const auto & delta : deltas)
+    for (const auto & deleted_node : deleted_nodes)
    {
-        if (!delta.path.empty() && deleted_nodes.contains(delta.path))
-            applyDelta(delta);
+        auto path_delta_it = deltas_for_path.find(deleted_node);
+        if (path_delta_it != deltas_for_path.end())
+        {
+            for (const auto & delta : path_delta_it->second)
+            {
+                applyDelta(*delta);
+            }
+        }
    }
 }

@ -2135,7 +2192,7 @@ void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing)
    }
    catch (...)
    {
-        LOG_FATAL(&Poco::Logger::get("KeeperStorage"), "Failed to rollback log. Terminating to avoid incosistencies");
+        LOG_FATAL(&Poco::Logger::get("KeeperStorage"), "Failed to rollback log. Terminating to avoid inconsistencies");
        std::terminate();
    }
 }
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@ -229,27 +229,42 @@ public:

        bool hasACL(int64_t session_id, bool is_local, std::function<bool(const AuthID &)> predicate)
        {
-            for (const auto & session_auth : storage.session_and_auth[session_id])
+            const auto check_auth = [&](const auto & auth_ids)
            {
-                if (predicate(session_auth))
+                for (const auto & auth : auth_ids)
+                {
+                    using TAuth = std::remove_reference_t<decltype(auth)>;
+
+                    const AuthID * auth_ptr = nullptr;
+                    if constexpr (std::is_pointer_v<TAuth>)
+                        auth_ptr = auth;
+                    else
+                        auth_ptr = &auth;
+
+                    if (predicate(*auth_ptr))
                        return true;
                }
+                return false;
+            };

            if (is_local)
-                return false;
+                return check_auth(storage.session_and_auth[session_id]);

-            for (const auto & delta : deltas)
-            {
-                if (const auto * auth_delta = std::get_if<KeeperStorage::AddAuthDelta>(&delta.operation);
-                    auth_delta && auth_delta->session_id == session_id && predicate(auth_delta->auth_id))
+            if (check_auth(storage.session_and_auth[session_id]))
                return true;
-            }

+            // check if there are uncommitted
+            const auto auth_it = session_and_auth.find(session_id);
+            if (auth_it == session_and_auth.end())
                return false;
+
+            return check_auth(auth_it->second);
        }

        std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path) const;

+        std::unordered_map<int64_t, std::list<const AuthID *>> session_and_auth;
+
        struct UncommittedNode
        {
            std::shared_ptr<Node> node{nullptr};
@ -257,7 +272,32 @@ public:
            int64_t zxid{0};
        };

-        mutable std::unordered_map<std::string, UncommittedNode> nodes;
+        struct Hash
+        {
+            auto operator()(const std::string_view view) const
+            {
+                SipHash hash;
+                hash.update(view);
+                return hash.get64();
+            }
+
+            using is_transparent = void; // required to make find() work with different type than key_type
+        };
+
+        struct Equal
+        {
+            auto operator()(const std::string_view a,
+                            const std::string_view b) const
+            {
+                return a == b;
+            }
+
+            using is_transparent = void; // required to make find() work with different type than key_type
+        };
+
+        mutable std::unordered_map<std::string, UncommittedNode, Hash, Equal> nodes;
+        std::unordered_map<std::string, std::list<const Delta *>, Hash, Equal> deltas_for_path;
+
        std::list<Delta> deltas;
        KeeperStorage & storage;
    };
--- a/src/Coordination/SessionExpiryQueue.h
+++ b/src/Coordination/SessionExpiryQueue.h
@ -53,7 +53,7 @@ public:
    /// Session was actually removed
    bool remove(int64_t session_id);

-    /// Update session expiry time (must be called on hearbeats)
+    /// Update session expiry time (must be called on heartbeats)
    void addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms);

    /// Get all expired sessions
--- a/Show More
+++ b/Show More