Merge branch 'master' into fix-mongo2-compose

2024-11-21 23:21:59 +00:00 · 2022-01-14 18:36:37 +00:00 · 2022-01-14 18:36:37 +00:00 · 4ffc3bb589
commit 4ffc3bb589
parent 917ddcd4af e2df17ef5c
223 changed files with 9201 additions and 2327 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@ -0,0 +1,8 @@
+self-hosted-runner:
+  labels:
+    - builder
+    - fuzzer-unit-tester
+    - stress-tester
+    - style-checker
+    - func-tester-aarch64
+    - func-tester
--- a/.github/workflows/backport.yml
+++ b/.github/workflows/backport.yml
@ -33,11 +33,11 @@ jobs:
      - name: Cherry pick
        run: |
          sudo pip install GitPython
-          cd $GITHUB_WORKSPACE/tests/ci
+          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 cherry_pick.py
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -9,23 +9,68 @@ on: # yamllint disable-line rule:truthy
    branches:
      - 'backport/**'
 jobs:
-  DockerHubPush:
-    runs-on: [self-hosted, style-checker]
+  DockerHubPushAarch64:
+    runs-on: [self-hosted, func-tester-aarch64]
    steps:
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
        run: |
-          cd $GITHUB_WORKSPACE/tests/ci
-          python3 docker_images_check.py
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_images_check.py --suffix aarch64
+      - name: Upload images files to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: changed_images_aarch64
+          path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
+  DockerHubPushAmd64:
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Images check
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_images_check.py --suffix amd64
+      - name: Upload images files to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: changed_images_amd64
+          path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
+  DockerHubPush:
+    needs: [DockerHubPushAmd64, DockerHubPushAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Download changed aarch64 images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images_aarch64
+          path: ${{ runner.temp }}
+      - name: Download changed amd64 images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images_amd64
+          path: ${{ runner.temp }}
+      - name: Images check
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
      - name: Upload images files to artifacts
        uses: actions/upload-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/docker_images_check/changed_images.json
+          path: ${{ runner.temp }}/changed_images.json
  CompatibilityCheck:
    needs: [BuilderDebRelease]
    runs-on: [self-hosted, style-checker]
@ -39,7 +84,7 @@ jobs:
          EOF
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Download json reports
@ -48,16 +93,16 @@ jobs:
          path: ${{ env.REPORTS_PATH }}
      - name: CompatibilityCheck
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci && python3 compatibility_check.py
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
 #########################################################################################
 #################################### ORDINARY BUILDS ####################################
 #########################################################################################
@ -82,7 +127,7 @@ jobs:
          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
@ -90,10 +135,10 @@ jobs:
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
      - name: Upload build URLs to artifacts
        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
@ -103,9 +148,50 @@ jobs:
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH $CACHES_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+  BuilderDebAarch64:
+    needs: [DockerHubPush]
+    runs-on: [self-hosted, builder]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/build_check
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH=${{runner.temp}}/../ccaches
+          CHECK_NAME=ClickHouse build check (actions)
+          BUILD_NAME=package_aarch64
+          EOF
+      - name: Download changed images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images
+          path: ${{ runner.temp }}/images_path
+      - name: Check out repository code
+        uses: actions/checkout@v2
+        with:
+          submodules: 'true'
+          fetch-depth: 0 # otherwise we will have no info about contributors
+      - name: Build
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
+      - name: Upload build URLs to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: ${{ env.BUILD_NAME }}
+          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+      - name: Cleanup
+        if: always()
+        run: |
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderDebAsan:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
@ -127,7 +213,7 @@ jobs:
          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
@ -135,10 +221,10 @@ jobs:
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
      - name: Upload build URLs to artifacts
        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
@ -148,9 +234,9 @@ jobs:
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH $CACHES_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderDebTsan:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
@ -172,7 +258,7 @@ jobs:
          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
@ -180,10 +266,10 @@ jobs:
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
      - name: Upload build URLs to artifacts
        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
@ -193,9 +279,9 @@ jobs:
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH $CACHES_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderDebDebug:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
@ -217,7 +303,7 @@ jobs:
          path: ${{ env.IMAGES_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
@ -225,10 +311,10 @@ jobs:
          fetch-depth: 0 # otherwise we will have no info about contributors
      - name: Build
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
      - name: Upload build URLs to artifacts
        if: ${{ success() || failure() }}
        uses: actions/upload-artifact@v2
@ -238,15 +324,16 @@ jobs:
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH $CACHES_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
 ############################################################################################
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
  BuilderReport:
    needs:
      - BuilderDebRelease
+      - BuilderDebAarch64
      - BuilderDebAsan
      - BuilderDebTsan
      - BuilderDebDebug
@ -265,21 +352,21 @@ jobs:
          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Report Builder
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cd $GITHUB_WORKSPACE/tests/ci
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 build_report_check.py "$CHECK_NAME"
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
@ -302,22 +389,22 @@ jobs:
          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Functional test
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci
-          python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
@ -340,22 +427,22 @@ jobs:
          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Functional test
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci
-          python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
@ -381,22 +468,22 @@ jobs:
          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Stress test
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
          python3 stress_check.py "$CHECK_NAME"
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
@ -418,22 +505,22 @@ jobs:
          path: ${{ env.REPORTS_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Integration test
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
          python3 integration_test_check.py "$CHECK_NAME"
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
  FinishCheck:
    needs:
      - DockerHubPush
@ -447,10 +534,10 @@ jobs:
    steps:
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Finish label
        run: |
-          cd $GITHUB_WORKSPACE/tests/ci
+          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 finish_check.py
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@ -6,7 +6,7 @@ env:

 on: # yamllint disable-line rule:truthy
  workflow_run:
-    workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"]
+    workflows: ["PullRequestCI", "ReleaseCI", "DocsCheck", "BackportPR"]
    types:
      - requested
 jobs:
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@ -21,31 +21,77 @@ jobs:
    steps:
      - name: Clear repository
        run: |
-          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Labels check
        run: |
-          cd $GITHUB_WORKSPACE/tests/ci
+          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 run_check.py
-  DockerHubPush:
+  DockerHubPushAarch64:
+    needs: CheckLabels
+    runs-on: [self-hosted, func-tester-aarch64]
+    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Images check
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_images_check.py --suffix aarch64
+      - name: Upload images files to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: changed_images_aarch64
+          path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
+  DockerHubPushAmd64:
    needs: CheckLabels
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Clear repository
        run: |
-          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
        run: |
-          cd $GITHUB_WORKSPACE/tests/ci
-          python3 docker_images_check.py
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_images_check.py --suffix amd64
+      - name: Upload images files to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: changed_images_amd64
+          path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
+  DockerHubPush:
+    needs: [DockerHubPushAmd64, DockerHubPushAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Download changed aarch64 images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images_aarch64
+          path: ${{ runner.temp }}
+      - name: Download changed amd64 images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images_amd64
+          path: ${{ runner.temp }}
+      - name: Images check
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
      - name: Upload images files to artifacts
        uses: actions/upload-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/docker_images_check/changed_images.json
+          path: ${{ runner.temp }}/changed_images.json
  DocsCheck:
    needs: DockerHubPush
    runs-on: [self-hosted, func-tester]
@ -63,17 +109,17 @@ jobs:
          path: ${{ env.TEMP_PATH }}
      - name: Clear repository
        run: |
-          sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Docs Check
        run: |
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
          python3 docs_check.py
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@ -8,7 +8,7 @@ on: # yamllint disable-line rule:truthy
  schedule:
    - cron: '0 */6 * * *'
  workflow_run:
-    workflows: ["CIGithubActions"]
+    workflows: ["PullRequestCI"]
    types:
      - completed
  workflow_dispatch:
@ -24,21 +24,21 @@ jobs:
          EOF
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Jepsen Test
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
          python3 keeper_jepsen_check.py
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -19,23 +19,68 @@ on: # yamllint disable-line rule:truthy
      - '.github/**'
  workflow_dispatch:
 jobs:
-  DockerHubPush:
-    runs-on: [self-hosted, style-checker]
+  DockerHubPushAarch64:
+    runs-on: [self-hosted, func-tester-aarch64]
    steps:
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
        run: |
-          cd $GITHUB_WORKSPACE/tests/ci
-          python3 docker_images_check.py
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_images_check.py --suffix aarch64
+      - name: Upload images files to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: changed_images_aarch64
+          path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
+  DockerHubPushAmd64:
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Images check
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_images_check.py --suffix amd64
+      - name: Upload images files to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: changed_images_amd64
+          path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
+  DockerHubPush:
+    needs: [DockerHubPushAmd64, DockerHubPushAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Clear repository
+        run: |
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Download changed aarch64 images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images_aarch64
+          path: ${{ runner.temp }}
+      - name: Download changed amd64 images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images_amd64
+          path: ${{ runner.temp }}
+      - name: Images check
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
      - name: Upload images files to artifacts
        uses: actions/upload-artifact@v2
        with:
          name: changed_images
-          path: ${{ runner.temp }}/docker_images_check/changed_images.json
+          path: ${{ runner.temp }}/changed_images.json
  DocsRelease:
    needs: DockerHubPush
    runs-on: [self-hosted, func-tester]
@ -53,7 +98,7 @@ jobs:
          EOF
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Download changed images
@ -63,14 +108,14 @@ jobs:
          path: ${{ env.TEMP_PATH }}
      - name: Docs Release
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
          python3 docs_release.py
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
--- a/.github/workflows/woboq.yml
+++ b/.github/workflows/woboq.yml
@ -23,20 +23,20 @@ jobs:
          EOF
      - name: Clear repository
        run: |
-          sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
+          sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
      - name: Check out repository code
        uses: actions/checkout@v2
        with:
          submodules: 'true'
      - name: Codebrowser
        run: |
-          sudo rm -fr $TEMP_PATH
-          mkdir -p $TEMP_PATH
-          cp -r $GITHUB_WORKSPACE $TEMP_PATH
-          cd $REPO_COPY/tests/ci && python3 codebrowser_check.py
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 codebrowser_check.py
      - name: Cleanup
        if: always()
        run: |
-          docker kill $(docker ps -q) ||:
-          docker rm -f $(docker ps -a -q) ||:
-          sudo rm -fr $TEMP_PATH
+          docker kill "$(docker ps -q)" ||:
+          docker rm -f "$(docker ps -a -q)" ||:
+          sudo rm -fr "$TEMP_PATH"
--- a/.gitmodules
+++ b/.gitmodules
@ -247,6 +247,9 @@
 [submodule "contrib/sysroot"]
 	path = contrib/sysroot
 	url = https://github.com/ClickHouse-Extras/sysroot.git
+[submodule "contrib/hive-metastore"]
+	path = contrib/hive-metastore
+	url = https://github.com/ClickHouse-Extras/hive-metastore
 [submodule "contrib/azure"]
 	path = contrib/azure
 	url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -511,6 +511,7 @@ include (cmake/find/h3.cmake)
 include (cmake/find/libxml2.cmake)
 include (cmake/find/brotli.cmake)
 include (cmake/find/protobuf.cmake)
+include (cmake/find/thrift.cmake)
 include (cmake/find/grpc.cmake)
 include (cmake/find/pdqsort.cmake)
 include (cmake/find/miniselect.cmake)
@ -520,7 +521,7 @@ include (cmake/find/curl.cmake)
 include (cmake/find/s3.cmake)
 include (cmake/find/blob_storage.cmake)
 include (cmake/find/base64.cmake)
-include (cmake/find/parquet.cmake)
+include (cmake/find/parquet.cmake) # uses protobuf
 include (cmake/find/simdjson.cmake)
 include (cmake/find/fast_float.cmake)
 include (cmake/find/rapidjson.cmake)
@ -548,6 +549,7 @@ include (cmake/find/cassandra.cmake)
 include (cmake/find/sentry.cmake)
 include (cmake/find/datasketches.cmake)
 include (cmake/find/libprotobuf-mutator.cmake)
+include (cmake/find/hive-metastore.cmake)

 set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "")
 find_contrib_lib(cityhash)
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -3,10 +3,10 @@
 # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
 SET(VERSION_REVISION 54458)
-SET(VERSION_MAJOR 21)
-SET(VERSION_MINOR 13)
+SET(VERSION_MAJOR 22)
+SET(VERSION_MINOR 1)
 SET(VERSION_PATCH 1)
 SET(VERSION_GITHASH 4cc45c1e15912ee300bca7cc8b8da2b888a70e2a)
-SET(VERSION_DESCRIBE v21.13.1.1-prestable)
-SET(VERSION_STRING 21.13.1.1)
+SET(VERSION_DESCRIBE v22.1.1.1-prestable)
+SET(VERSION_STRING 22.1.1.1)
 # end of autochange
--- a/cmake/find/hive-metastore.cmake
+++ b/cmake/find/hive-metastore.cmake
@ -0,0 +1,26 @@
+option(ENABLE_HIVE "Enable Hive" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_HIVE)
+    message("Hive disabled")
+    return()
+endif()
+
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hive-metastore")
+    message(WARNING "submodule contrib/hive-metastore is missing. to fix try run: \n git submodule update --init")
+    set(USE_HIVE 0)
+elseif (NOT USE_THRIFT)
+    message(WARNING "Thrift is not found, which is needed by Hive")
+    set(USE_HIVE 0)
+elseif (NOT USE_HDFS)
+    message(WARNING "HDFS is not found, which is needed by Hive")
+    set(USE_HIVE 0)
+elseif (NOT USE_ORC OR NOT USE_ARROW OR NOT USE_PARQUET)
+    message(WARNING "ORC/Arrow/Parquet is not found, which are needed by Hive")
+    set(USE_HIVE 0)
+else()
+    set(USE_HIVE 1)
+    set(HIVE_METASTORE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore)
+    set(HIVE_METASTORE_LIBRARY hivemetastore)
+endif()
+
+message (STATUS "Using_Hive=${USE_HIVE}: ${HIVE_METASTORE_INCLUDE_DIR} : ${HIVE_METASTORE_LIBRARY}")
--- a/cmake/find/parquet.cmake
+++ b/cmake/find/parquet.cmake
@ -34,7 +34,6 @@ endif()
 if(NOT USE_INTERNAL_PARQUET_LIBRARY)
    find_package(Arrow)
    find_package(Parquet)
-    find_library(THRIFT_LIBRARY thrift)
    find_library(UTF8_PROC_LIBRARY utf8proc)
    find_package(BZip2)

@ -145,12 +144,10 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
        set(FLATBUFFERS_LIBRARY flatbuffers)
        set(ARROW_LIBRARY arrow_static)
        set(PARQUET_LIBRARY parquet_static)
-        set(THRIFT_LIBRARY thrift_static)
    else()
        set(FLATBUFFERS_LIBRARY flatbuffers_shared)
        set(ARROW_LIBRARY arrow_shared)
        set(PARQUET_LIBRARY parquet_shared)
-        set(THRIFT_LIBRARY thrift)
    endif()

    set(USE_PARQUET 1)
--- a/cmake/find/thrift.cmake
+++ b/cmake/find/thrift.cmake
@ -0,0 +1,34 @@
+option(ENABLE_THRIFT "Enable Thrift" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_THRIFT)
+    message (STATUS "thrift disabled")
+    set(USE_INTERNAL_THRIFT_LIBRARY 0)
+    return()
+endif()
+
+option(USE_INTERNAL_THRIFT_LIBRARY "Set to FALSE to use system thrift library instead of bundled" ON)
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/thrift")
+    if (USE_INTERNAL_THRIFT_LIBRARY)
+        message (WARNING "submodule contrib/thrift is missing. to fix try run: \n git submodule update --init --recursive")
+        set(USE_INTERNAL_THRIFT_LIBRARY 0)
+    endif ()
+endif()
+
+if (USE_INTERNAL_THRIFT_LIBRARY)
+    if (MAKE_STATIC_LIBRARIES)
+        set(THRIFT_LIBRARY thrift_static)
+    else()
+        set(THRIFT_LIBRARY thrift)
+    endif()
+    set (THRIFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src")
+    set(USE_THRIFT 1)
+else()
+    find_library(THRIFT_LIBRARY thrift)
+    if (NOT THRIFT_LIBRARY)
+        set(USE_THRIFT 0)
+    else()
+        set(USE_THRIFT 1)
+    endif()
+endif ()
+
+message (STATUS "Using thrift=${USE_THRIFT}: ${THRIFT_INCLUDE_DIR} : ${THRIFT_LIBRARY}")
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -150,7 +150,6 @@ if (USE_INTERNAL_PARQUET_LIBRARY)

    # The library is large - avoid bloat.
    target_compile_options (${ARROW_LIBRARY} PRIVATE -g0)
-    target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0)
    target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0)
 endif()

@ -206,6 +205,10 @@ if (USE_INTERNAL_PROTOBUF_LIBRARY)
    add_subdirectory(protobuf-cmake)
 endif ()

+if (USE_INTERNAL_THRIFT_LIBRARY)
+    add_subdirectory(thrift-cmake)
+endif ()
+
 if (USE_INTERNAL_HDFS3_LIBRARY)
    add_subdirectory(libhdfs3-cmake)
 endif ()
@ -299,6 +302,10 @@ if (USE_S2_GEOMETRY)
    add_subdirectory(s2geometry-cmake)
 endif()

+if (USE_HIVE)
+    add_subdirectory (hive-metastore-cmake)
+endif()
+
 # Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
 # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
 # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
--- a/contrib/arrow
+++ b/contrib/arrow
@ -1 +1 @@
-Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e
+Subproject commit 1d9cc51daa4e7e9fc6926320ef73759818bd736e
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@ -17,57 +17,8 @@ else()
    set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
 endif()

-# === thrift
-
-set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp")
-# contrib/thrift/lib/cpp/CMakeLists.txt
-set(thriftcpp_SOURCES
-        "${LIBRARY_DIR}/src/thrift/TApplicationException.cpp"
-        "${LIBRARY_DIR}/src/thrift/TOutput.cpp"
-        "${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp"
-        "${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp"
-        "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h"
-        "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp"
-        "${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp"
-        "${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp"
-        "${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp"
-        "${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp"
-        "${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp"
-        "${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp"
-        "${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp"
-        "${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp"
-        "${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp"
-        "${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp"
-        "${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp"
-        "${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp"
-        "${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp"
-        "${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp"
-        "${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp"
-        )
-set(thriftcpp_threads_SOURCES
-        "${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp"
-        "${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp"
-        "${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp"
-        "${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp"
-        )
-add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
-set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
-target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src")
-target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only)
-

 # === orc
-
 set(ORC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++")
 set(ORC_INCLUDE_DIR "${ORC_SOURCE_DIR}/include")
 set(ORC_SOURCE_SRC_DIR "${ORC_SOURCE_DIR}/src")
@ -463,6 +414,7 @@ set(PARQUET_SRCS
 #list(TRANSFORM PARQUET_SRCS PREPEND "${LIBRARY_DIR}/") # cmake 3.12
 add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
 target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src" PRIVATE ${OPENSSL_INCLUDE_DIR})
+# include("${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake") # makes config.h

 set (HAVE_ARPA_INET_H 1)
 set (HAVE_FCNTL_H 1)
--- a/contrib/hive-metastore
+++ b/contrib/hive-metastore
@ -0,0 +1 @@
+Subproject commit 809a77d435ce218d9b000733f19489c606fc567b
--- a/contrib/hive-metastore-cmake/CMakeLists.txt
+++ b/contrib/hive-metastore-cmake/CMakeLists.txt
@ -0,0 +1,9 @@
+set (SRCS
+    ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_constants.cpp
+    ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_types.cpp
+    ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/ThriftHiveMetastore.cpp
+)
+
+add_library(${HIVE_METASTORE_LIBRARY} ${SRCS})
+target_link_libraries(${HIVE_METASTORE_LIBRARY} PUBLIC ${THRIFT_LIBRARY})
+target_include_directories(${HIVE_METASTORE_LIBRARY} SYSTEM PUBLIC ${HIVE_METASTORE_INCLUDE_DIR})
--- a/contrib/thrift-cmake/CMakeLists.txt
+++ b/contrib/thrift-cmake/CMakeLists.txt
@ -0,0 +1,47 @@
+# === thrift
+set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp")
+set(thriftcpp_SOURCES
+        "${LIBRARY_DIR}/src/thrift/TApplicationException.cpp"
+        "${LIBRARY_DIR}/src/thrift/TOutput.cpp"
+        "${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp"
+        "${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp"
+        "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h"
+        "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp"
+        "${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp"
+        "${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp"
+        "${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp"
+        "${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp"
+        "${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp"
+        "${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp"
+        "${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp"
+        "${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp"
+        "${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp"
+        "${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp"
+        "${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp"
+        "${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp"
+        "${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp"
+        "${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp"
+        "${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp"
+        )
+set(thriftcpp_threads_SOURCES
+        "${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp"
+        "${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp"
+        "${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp"
+        "${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp"
+        )
+
+include("${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake") # makes config.h
+
+add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
+target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC "${THRIFT_INCLUDE_DIR}" ${CMAKE_CURRENT_BINARY_DIR})
+target_link_libraries (${THRIFT_LIBRARY} PUBLIC boost::headers_only)
--- a/contrib/thrift-cmake/build/cmake/config.h.in
+++ b/contrib/thrift-cmake/build/cmake/config.h.in
@ -0,0 +1 @@
+../../../thrift/build/cmake/config.h.in
--- a/debian/changelog
+++ b/debian/changelog
@ -1,4 +1,4 @@
-clickhouse (21.13.1.1) unstable; urgency=low
+clickhouse (22.1.1.1) unstable; urgency=low

  * Modified source code

--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
-ARG version=21.13.1.*
+ARG version=22.1.1.*

 RUN apt-get update \
    && apt-get install --yes --no-install-recommends \
--- a/docker/docs/builder/Dockerfile
+++ b/docker/docs/builder/Dockerfile
@ -1,3 +1,4 @@
+# rebuild in #33610
 # docker build -t clickhouse/docs-build .
 FROM ubuntu:20.04

--- a/docker/docs/check/Dockerfile
+++ b/docker/docs/check/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/docs-check .
-FROM clickhouse/docs-builder
+ARG FROM_TAG=latest
+FROM clickhouse/docs-builder:$FROM_TAG

 COPY run.sh /

--- a/docker/docs/release/Dockerfile
+++ b/docker/docs/release/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/docs-release .
-FROM clickhouse/docs-builder
+ARG FROM_TAG=latest
+FROM clickhouse/docs-builder:$FROM_TAG

 COPY run.sh /

--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -1,4 +1,5 @@
-#  docker build -t clickhouse/binary-builder .
+# rebuild in #33610
+# docker build -t clickhouse/binary-builder .
 FROM ubuntu:20.04

 # ARG for quick switch to a given ubuntu mirror
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -1,3 +1,4 @@
+# rebuild in #33610
 # docker build -t clickhouse/deb-builder .
 FROM ubuntu:20.04

@ -28,12 +29,14 @@ RUN apt-get update \
        software-properties-common \
        --yes --no-install-recommends

+# Architecture of the image when BuildKit/buildx is used
+ARG TARGETARCH
+
 # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
 # to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
 # Significantly increase deb packaging speed and compatible with old systems
-RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
-    && chmod +x dpkg-deb \
-    && cp dpkg-deb /usr/bin
+RUN arch=${TARGETARCH:-amd64} \
+  && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}

 RUN apt-get update \
    && apt-get install \
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
-ARG version=21.13.1.*
+ARG version=22.1.1.*
 ARG gosu_ver=1.10

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
-ARG version=21.13.1.*
+ARG version=22.1.1.*

 RUN apt-get update && \
    apt-get install -y apt-transport-https dirmngr && \
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/test-base .
-FROM clickhouse/test-util
+ARG FROM_TAG=latest
+FROM clickhouse/test-util:$FROM_TAG

 # ARG for quick switch to a given ubuntu mirror
 ARG apt_archive="http://archive.ubuntu.com"
@ -28,12 +30,14 @@ RUN apt-get update \
        software-properties-common \
        --yes --no-install-recommends

+# Architecture of the image when BuildKit/buildx is used
+ARG TARGETARCH
+
 # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
 # to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
 # Significantly increase deb packaging speed and compatible with old systems
-RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
-    && chmod +x dpkg-deb \
-    && cp dpkg-deb /usr/bin
+RUN arch=${TARGETARCH:-amd64} \
+  && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}

 RUN apt-get update \
    && apt-get install \
--- a/docker/test/codebrowser/Dockerfile
+++ b/docker/test/codebrowser/Dockerfile
@ -1,12 +1,14 @@
+# rebuild in #33610
 # docker build --network=host -t clickhouse/codebrowser .
 # docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser
-FROM clickhouse/binary-builder
+ARG FROM_TAG=latest
+FROM clickhouse/binary-builder:$FROM_TAG

 # ARG for quick switch to a given ubuntu mirror
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

-RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev
+RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev libmlir-13-dev

 # repo versions doesn't work correctly with C++17
 # also we push reports to s3, so we add index.html to subfolder urls
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 #  docker build -t clickhouse/fasttest .
-FROM clickhouse/test-util
+ARG FROM_TAG=latest
+FROM clickhouse/test-util:$FROM_TAG

 # ARG for quick switch to a given ubuntu mirror
 ARG apt_archive="http://archive.ubuntu.com"
@ -28,12 +30,14 @@ RUN apt-get update \
        software-properties-common \
        --yes --no-install-recommends

+# Architecture of the image when BuildKit/buildx is used
+ARG TARGETARCH
+
 # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
 # to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
 # Significantly increase deb packaging speed and compatible with old systems
-RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
-    && chmod +x dpkg-deb \
-    && cp dpkg-deb /usr/bin
+RUN arch=${TARGETARCH:-amd64} \
+  && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}

 RUN apt-get update \
    && apt-get install \
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/fuzzer .
-FROM clickhouse/test-base
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG

 # ARG for quick switch to a given ubuntu mirror
 ARG apt_archive="http://archive.ubuntu.com"
--- a/docker/test/integration/base/Dockerfile
+++ b/docker/test/integration/base/Dockerfile
@ -1,44 +1,57 @@
+# rebuild in #33610
 # docker build -t clickhouse/integration-test .
-FROM clickhouse/test-base
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG

 SHELL ["/bin/bash", "-c"]

 RUN apt-get update \
    && env DEBIAN_FRONTEND=noninteractive apt-get -y install \
-        tzdata \
-        python3 \
-        libicu-dev \
        bsdutils \
+        curl \
+        default-jre \
+        g++ \
        gdb \
-        unixodbc \
-        odbcinst \
+        iproute2 \
+        krb5-user \
+        libicu-dev \
        libsqlite3-dev \
        libsqliteodbc \
-        odbc-postgresql \
-        sqlite3 \
-        curl \
-        tar \
-        lz4 \
-        krb5-user \
-        iproute2 \
        lsof \
-        g++ \
-        default-jre
+        lz4 \
+        odbc-postgresql \
+        odbcinst \
+        python3 \
+        rpm2cpio \
+        sqlite3 \
+        tar \
+        tzdata \
+        unixodbc \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

-RUN rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/* \
-RUN apt-get clean
+# Architecture of the image when BuildKit/buildx is used
+ARG TARGETARCH

-# Install MySQL ODBC driver
-RUN curl 'https://downloads.mysql.com/archives/get/p/10/file/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --location --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
+# Install MySQL ODBC driver from RHEL rpm
+RUN arch=${TARGETARCH:-amd64} \
+  && case $arch in \
+      amd64) rarch=x86_64 ;; \
+      arm64) rarch=aarch64 ;; \
+    esac \
+  && cd /tmp \
+  && curl -o mysql-odbc.rpm "https://cdn.mysql.com/Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \
+  && rpm2archive mysql-odbc.rpm \
+  && tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \
+  && LINK_DIR=$(dpkg -L libodbc1 | grep '^/usr/lib/.*-linux-gnu/odbc$') \
+  && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR" \
+  && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR"/libmyodbc.so

 # Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper.
 # ZooKeeper is not started by default, but consumes some space in containers.
 # 777 perms used to allow anybody to start/stop ZooKeeper
 ENV ZOOKEEPER_VERSION='3.6.3'
-RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
+RUN curl -O "https://dlcdn.apache.org/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
 RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz
 RUN echo $'tickTime=2500 \n\
 tickTime=2500 \n\
--- a/docker/test/integration/hive_server/Dockerfile
+++ b/docker/test/integration/hive_server/Dockerfile
@ -0,0 +1,47 @@
+FROM ubuntu:20.04
+MAINTAINER lgbo-ustc <lgbo.ustc@gmail.com>
+
+RUN apt-get update 
+RUN apt-get install -y wget openjdk-8-jre
+
+RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \
+        tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz
+RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \
+        tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz
+RUN apt install -y vim
+
+RUN apt install -y openssh-server openssh-client
+
+RUN apt install -y mysql-server
+
+RUN mkdir -p /root/.ssh && \
+        ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa && \
+        cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys && \
+        cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key && \
+        cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub
+
+RUN wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz &&\
+        tar -xf mysql-connector-java-8.0.27.tar.gz && \
+        mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ && \
+        rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27
+
+RUN apt install -y iputils-ping net-tools
+
+ENV JAVA_HOME=/usr
+ENV HADOOP_HOME=/hadoop-3.1.0
+ENV HDFS_NAMENODE_USER=root
+ENV HDFS_DATANODE_USER=root HDFS_SECONDARYNAMENODE_USER=root YARN_RESOURCEMANAGER_USER=root YARN_NODEMANAGER_USER=root HDFS_DATANODE_SECURE_USER=hdfs
+COPY hdfs-site.xml /hadoop-3.1.0/etc/hadoop
+COPY mapred-site.xml /hadoop-3.1.0/etc/hadoop
+COPY yarn-site.xml /hadoop-3.1.0/etc/hadoop
+COPY hadoop-env.sh /hadoop-3.1.0/etc/hadoop/
+#COPY core-site.xml /hadoop-3.1.0/etc/hadoop
+COPY core-site.xml.template /hadoop-3.1.0/etc/hadoop
+COPY hive-site.xml /apache-hive-2.3.9-bin/conf
+COPY prepare_hive_data.sh /
+COPY demo_data.txt /
+
+ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
+
+COPY start.sh /
+
--- a/docker/test/integration/hive_server/core-site.xml.template
+++ b/docker/test/integration/hive_server/core-site.xml.template
@ -0,0 +1,14 @@
+  <configuration>
+      <property>
+          <name>fs.defaultFS</name>
+          <value>hdfs://HOSTNAME:9000</value>
+      </property>
+      <property>
+          <name>hadoop.proxyuser.root.hosts</name>
+          <value>*</value>
+      </property>
+      <property>
+          <name>hadoop.proxyuser.root.groups</name>
+          <value>*</value>
+      </property>
+  </configuration>
--- a/docker/test/integration/hive_server/demo_data.txt
+++ b/docker/test/integration/hive_server/demo_data.txt
@ -0,0 +1,6 @@
+abc,1,2021-11-16
+abd,15,2021-11-05
+aaa,22,2021-11-16
+dda,0,2021-11-01
+dfb,11,2021-11-05
+jhn,89,2021-11-11
--- a/docker/test/integration/hive_server/hadoop-env.sh
+++ b/docker/test/integration/hive_server/hadoop-env.sh
@ -0,0 +1,422 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Set Hadoop-specific environment variables here.
+
+##
+## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS.
+## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS.  THEREFORE,
+## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE
+## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh.
+##
+## Precedence rules:
+##
+## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults
+##
+## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults
+##
+
+# Many of the options here are built from the perspective that users
+# may want to provide OVERWRITING values on the command line.
+# For example:
+#
+JAVA_HOME=/usr/
+#
+# Therefore, the vast majority (BUT NOT ALL!) of these defaults
+# are configured for substitution and not append.  If append
+# is preferable, modify this file accordingly.
+
+###
+# Generic settings for HADOOP
+###
+
+# Technically, the only required environment variable is JAVA_HOME.
+# All others are optional.  However, the defaults are probably not
+# preferred.  Many sites configure these options outside of Hadoop,
+# such as in /etc/profile.d
+
+# The java implementation to use. By default, this environment
+# variable is REQUIRED on ALL platforms except OS X!
+# export JAVA_HOME=
+
+# Location of Hadoop.  By default, Hadoop will attempt to determine
+# this location based upon its execution path.
+# export HADOOP_HOME=
+
+# Location of Hadoop's configuration information.  i.e., where this
+# file is living. If this is not defined, Hadoop will attempt to
+# locate it based upon its execution path.
+#
+# NOTE: It is recommend that this variable not be set here but in
+# /etc/profile.d or equivalent.  Some options (such as
+# --config) may react strangely otherwise.
+#
+# export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
+
+# The maximum amount of heap to use (Java -Xmx).  If no unit
+# is provided, it will be converted to MB.  Daemons will
+# prefer any Xmx setting in their respective _OPT variable.
+# There is no default; the JVM will autoscale based upon machine
+# memory size.
+# export HADOOP_HEAPSIZE_MAX=
+
+# The minimum amount of heap to use (Java -Xms).  If no unit
+# is provided, it will be converted to MB.  Daemons will
+# prefer any Xms setting in their respective _OPT variable.
+# There is no default; the JVM will autoscale based upon machine
+# memory size.
+# export HADOOP_HEAPSIZE_MIN=
+
+# Enable extra debugging of Hadoop's JAAS binding, used to set up
+# Kerberos security.
+# export HADOOP_JAAS_DEBUG=true
+
+# Extra Java runtime options for all Hadoop commands. We don't support
+# IPv6 yet/still, so by default the preference is set to IPv4.
+# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"
+# For Kerberos debugging, an extended option set logs more invormation
+# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug"
+
+# Some parts of the shell code may do special things dependent upon
+# the operating system.  We have to set this here. See the next
+# section as to why....
+export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
+
+
+# Under certain conditions, Java on OS X will throw SCDynamicStore errors
+# in the system logs.
+# See HADOOP-8719 for more information.  If one needs Kerberos
+# support on OS X, one will want to change/remove this extra bit.
+case ${HADOOP_OS_TYPE} in
+  Darwin*)
+    export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= "
+    export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= "
+    export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= "
+  ;;
+esac
+
+# Extra Java runtime options for some Hadoop commands
+# and clients (i.e., hdfs dfs -blah).  These get appended to HADOOP_OPTS for
+# such commands.  In most cases, # this should be left empty and
+# let users supply it on the command line.
+# export HADOOP_CLIENT_OPTS=""
+
+#
+# A note about classpaths.
+#
+# By default, Apache Hadoop overrides Java's CLASSPATH
+# environment variable.  It is configured such
+# that it sarts out blank with new entries added after passing
+# a series of checks (file/dir exists, not already listed aka
+# de-deduplication).  During de-depulication, wildcards and/or
+# directories are *NOT* expanded to keep it simple. Therefore,
+# if the computed classpath has two specific mentions of
+# awesome-methods-1.0.jar, only the first one added will be seen.
+# If two directories are in the classpath that both contain
+# awesome-methods-1.0.jar, then Java will pick up both versions.
+
+# An additional, custom CLASSPATH. Site-wide configs should be
+# handled via the shellprofile functionality, utilizing the
+# hadoop_add_classpath function for greater control and much
+# harder for apps/end-users to accidentally override.
+# Similarly, end users should utilize ${HOME}/.hadooprc .
+# This variable should ideally only be used as a short-cut,
+# interactive way for temporary additions on the command line.
+# export HADOOP_CLASSPATH="/some/cool/path/on/your/machine"
+
+# Should HADOOP_CLASSPATH be first in the official CLASSPATH?
+# export HADOOP_USER_CLASSPATH_FIRST="yes"
+
+# If HADOOP_USE_CLIENT_CLASSLOADER is set, the classpath along
+# with the main jar are handled by a separate isolated
+# client classloader when 'hadoop jar', 'yarn jar', or 'mapred job'
+# is utilized. If it is set, HADOOP_CLASSPATH and
+# HADOOP_USER_CLASSPATH_FIRST are ignored.
+# export HADOOP_USE_CLIENT_CLASSLOADER=true
+
+# HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES overrides the default definition of
+# system classes for the client classloader when HADOOP_USE_CLIENT_CLASSLOADER
+# is enabled. Names ending in '.' (period) are treated as package names, and
+# names starting with a '-' are treated as negative matches. For example,
+# export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop."
+
+# Enable optional, bundled Hadoop features
+# This is a comma delimited list.  It may NOT be overridden via .hadooprc
+# Entries may be added/removed as needed.
+# export HADOOP_OPTIONAL_TOOLS="hadoop-openstack,hadoop-aliyun,hadoop-azure,hadoop-azure-datalake,hadoop-aws,hadoop-kafka"
+
+###
+# Options for remote shell connectivity
+###
+
+# There are some optional components of hadoop that allow for
+# command and control of remote hosts.  For example,
+# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
+
+# Options to pass to SSH when one of the "log into a host and
+# start/stop daemons" scripts is executed
+# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
+
+# The built-in ssh handler will limit itself to 10 simultaneous connections.
+# For pdsh users, this sets the fanout size ( -f )
+# Change this to increase/decrease as necessary.
+# export HADOOP_SSH_PARALLEL=10
+
+# Filename which contains all of the hosts for any remote execution
+# helper scripts # such as workers.sh, start-dfs.sh, etc.
+# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers"
+
+###
+# Options for all daemons
+###
+#
+
+#
+# Many options may also be specified as Java properties.  It is
+# very common, and in many cases, desirable, to hard-set these
+# in daemon _OPTS variables.  Where applicable, the appropriate
+# Java property is also identified.  Note that many are re-used
+# or set differently in certain contexts (e.g., secure vs
+# non-secure)
+#
+
+# Where (primarily) daemon log files are stored.
+# ${HADOOP_HOME}/logs by default.
+# Java property: hadoop.log.dir
+# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+# A string representing this instance of hadoop. $USER by default.
+# This is used in writing log and pid files, so keep that in mind!
+# Java property: hadoop.id.str
+# export HADOOP_IDENT_STRING=$USER
+
+# How many seconds to pause after stopping a daemon
+# export HADOOP_STOP_TIMEOUT=5
+
+# Where pid files are stored.  /tmp by default.
+# export HADOOP_PID_DIR=/tmp
+
+# Default log4j setting for interactive commands
+# Java property: hadoop.root.logger
+# export HADOOP_ROOT_LOGGER=INFO,console
+
+# Default log4j setting for daemons spawned explicitly by
+# --daemon option of hadoop, hdfs, mapred and yarn command.
+# Java property: hadoop.root.logger
+# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
+
+# Default log level and output location for security-related messages.
+# You will almost certainly want to change this on a per-daemon basis via
+# the Java property (i.e., -Dhadoop.security.logger=foo). (Note that the
+# defaults for the NN and 2NN override this by default.)
+# Java property: hadoop.security.logger
+# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
+
+# Default process priority level
+# Note that sub-processes will also run at this level!
+# export HADOOP_NICENESS=0
+
+# Default name for the service level authorization file
+# Java property: hadoop.policy.file
+# export HADOOP_POLICYFILE="hadoop-policy.xml"
+
+#
+# NOTE: this is not used by default!  <-----
+# You can define variables right here and then re-use them later on.
+# For example, it is common to use the same garbage collection settings
+# for all the daemons.  So one could define:
+#
+# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
+#
+# .. and then use it as per the b option under the namenode.
+
+###
+# Secure/privileged execution
+###
+
+#
+# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
+# on privileged ports.  This functionality can be replaced by providing
+# custom functions.  See hadoop-functions.sh for more information.
+#
+
+# The jsvc implementation to use. Jsvc is required to run secure datanodes
+# that bind to privileged ports to provide authentication of data transfer
+# protocol.  Jsvc is not required if SASL is configured for authentication of
+# data transfer protocol using non-privileged ports.
+# export JSVC_HOME=/usr/bin
+
+#
+# This directory contains pids for secure and privileged processes.
+#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
+
+#
+# This directory contains the logs for secure and privileged processes.
+# Java property: hadoop.log.dir
+# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
+
+#
+# When running a secure daemon, the default value of HADOOP_IDENT_STRING
+# ends up being a bit bogus.  Therefore, by default, the code will
+# replace HADOOP_IDENT_STRING with HADOOP_xx_SECURE_USER.  If one wants
+# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
+# export HADOOP_SECURE_IDENT_PRESERVE="true"
+
+###
+# NameNode specific parameters
+###
+
+# Default log level and output location for file system related change
+# messages. For non-namenode daemons, the Java property must be set in
+# the appropriate _OPTS if one wants something other than INFO,NullAppender
+# Java property: hdfs.audit.logger
+# export HDFS_AUDIT_LOGGER=INFO,NullAppender
+
+# Specify the JVM options to be used when starting the NameNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# a) Set JMX options
+# export HDFS_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
+#
+# b) Set garbage collection logs
+# export HDFS_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+#
+# c) ... or set them directly
+# export HDFS_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+
+# this is the default:
+# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
+
+###
+# SecondaryNameNode specific parameters
+###
+# Specify the JVM options to be used when starting the SecondaryNameNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# This is the default:
+# export HDFS_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
+
+###
+# DataNode specific parameters
+###
+# Specify the JVM options to be used when starting the DataNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# This is the default:
+# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
+
+# On secure datanodes, user to run the datanode as after dropping privileges.
+# This **MUST** be uncommented to enable secure HDFS if using privileged ports
+# to provide authentication of data transfer protocol.  This **MUST NOT** be
+# defined if SASL is configured for authentication of data transfer protocol
+# using non-privileged ports.
+# This will replace the hadoop.id.str Java property in secure mode.
+# export HDFS_DATANODE_SECURE_USER=hdfs
+
+# Supplemental options for secure datanodes
+# By default, Hadoop uses jsvc which needs to know to launch a
+# server jvm.
+# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server"
+
+###
+# NFS3 Gateway specific parameters
+###
+# Specify the JVM options to be used when starting the NFS3 Gateway.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_NFS3_OPTS=""
+
+# Specify the JVM options to be used when starting the Hadoop portmapper.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_PORTMAP_OPTS="-Xmx512m"
+
+# Supplemental options for priviliged gateways
+# By default, Hadoop uses jsvc which needs to know to launch a
+# server jvm.
+# export HDFS_NFS3_SECURE_EXTRA_OPTS="-jvm server"
+
+# On privileged gateways, user to run the gateway as after dropping privileges
+# This will replace the hadoop.id.str Java property in secure mode.
+# export HDFS_NFS3_SECURE_USER=nfsserver
+
+###
+# ZKFailoverController specific parameters
+###
+# Specify the JVM options to be used when starting the ZKFailoverController.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_ZKFC_OPTS=""
+
+###
+# QuorumJournalNode specific parameters
+###
+# Specify the JVM options to be used when starting the QuorumJournalNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_JOURNALNODE_OPTS=""
+
+###
+# HDFS Balancer specific parameters
+###
+# Specify the JVM options to be used when starting the HDFS Balancer.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_BALANCER_OPTS=""
+
+###
+# HDFS Mover specific parameters
+###
+# Specify the JVM options to be used when starting the HDFS Mover.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_MOVER_OPTS=""
+
+###
+# Router-based HDFS Federation specific parameters
+# Specify the JVM options to be used when starting the RBF Routers.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_DFSROUTER_OPTS=""
+###
+
+###
+# Advanced Users Only!
+###
+
+#
+# When building Hadoop, one can add the class paths to the commands
+# via this special env var:
+# export HADOOP_ENABLE_BUILD_PATHS="true"
+
+#
+# To prevent accidents, shell commands be (superficially) locked
+# to only allow certain users to execute certain subcommands.
+# It uses the format of (command)_(subcommand)_USER.
+#
+# For example, to limit who can execute the namenode command,
+# export HDFS_NAMENODE_USER=hdfs
--- a/docker/test/integration/hive_server/hdfs-site.xml
+++ b/docker/test/integration/hive_server/hdfs-site.xml
@ -0,0 +1,6 @@
+<configuration>
+    <property>
+        <name>dfs.replication</name>
+        <value>1</value>
+    </property>
+</configuration>
--- a/docker/test/integration/hive_server/hive-site.xml
+++ b/docker/test/integration/hive_server/hive-site.xml
@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<configuration>
+    <property>
+        <name>javax.jdo.option.ConnectionURL</name>
+        <value>jdbc:mysql://localhost/hcatalog?createDatabaseIfNotExist=true</value>
+    </property>
+    <property>
+        <name>javax.jdo.option.ConnectionUserName</name>
+        <value>test</value>
+    </property>
+    <property>
+        <name>javax.jdo.option.ConnectionPassword</name>
+        <value>test</value>
+    </property>
+    <property>
+        <name>javax.jdo.option.ConnectionDriverName</name>
+        <value>com.mysql.jdbc.Driver</value>
+    </property>
+</configuration>
--- a/docker/test/integration/hive_server/mapred-site.xml
+++ b/docker/test/integration/hive_server/mapred-site.xml
@ -0,0 +1,6 @@
+<configuration>
+    <property>
+        <name>mapreduce.framework.name</name>
+        <value>yarn</value>
+    </property>
+</configuration>
--- a/docker/test/integration/hive_server/prepare_hive_data.sh
+++ b/docker/test/integration/hive_server/prepare_hive_data.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+hive -e "create database test"
+
+hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE   'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'  STORED AS INPUTFORMAT  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE   'org.apache.hadoop.hive.ql.io.orc.OrcSerde'  STORED AS INPUTFORMAT  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; "
+hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text "
+ hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
--- a/docker/test/integration/hive_server/start.sh
+++ b/docker/test/integration/hive_server/start.sh
@ -0,0 +1,12 @@
+service ssh start
+sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml
+hadoop namenode -format
+start-all.sh
+service mysql start
+mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\""
+mysql -u root -e "GRANT ALL  ON * . * TO 'test'@'localhost'"
+schematool -initSchema -dbType mysql
+#nohup hiveserver2 &
+nohup hive --service metastore &
+bash /prepare_hive_data.sh
+while true; do sleep 1000; done
--- a/docker/test/integration/hive_server/yarn-site.xml
+++ b/docker/test/integration/hive_server/yarn-site.xml
@ -0,0 +1,32 @@
+<configuration>
+    <property>
+        <name>yarn.nodemanager.aux-services</name>
+        <value>mapreduce_shuffle</value>
+    </property>
+
+    <property>
+      <name>yarn.application.classpath</name>
+      <value>/hadoop-3.1.0/etc/hadoop,/hadoop-3.1.0/share/hadoop/common/*,/hadoop-3.1.0/share/hadoop/common/lib/*,/hadoop-3.1.0/share/hadoop/hdfs/*, /hadoop-3.1.0/share/hadoop/hdfs/lib/*, /hadoop-3.1.0/share/hadoop/mapreduce/*, /hadoop-3.1.0/share/hadoop/mapreduce/lib/*, /hadoop-3.1.0/share/hadoop/yarn/*, /hadoop-3.1.0/share/hadoop/yarn/lib/*</value>
+    </property>
+
+    <property>
+    <description>
+      Number of seconds after an application finishes before the nodemanager's
+      DeletionService will delete the application's localized file directory
+      and log directory.
+
+      To diagnose Yarn application problems, set this property's value large
+      enough (for example, to 600 = 10 minutes) to permit examination of these
+      directories. After changing the property's value, you must restart the
+      nodemanager in order for it to have an effect.
+
+      The roots of Yarn applications' work directories is configurable with
+      the yarn.nodemanager.local-dirs property (see below), and the roots
+      of the Yarn applications' log directories is configurable with the
+      yarn.nodemanager.log-dirs property (see also below).
+    </description>
+    <name>yarn.nodemanager.delete.debug-delay-sec</name>
+    <value>600</value>
+  </property>
+
+</configuration>
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -45,7 +45,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

 ENV DOCKER_CHANNEL stable
 RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
-RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
+RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"

 RUN apt-get update \
    && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
@ -58,7 +58,9 @@ RUN apt-get update \

 RUN dockerd --version; docker --version

-RUN python3 -m pip install \
+ARG TARGETARCH
+# FIXME: psycopg2-binary is not available for aarch64, we skip it for now
+RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
    PyMySQL \
    aerospike==4.0.0 \
    avro==1.10.2 \
@ -88,7 +90,7 @@ RUN python3 -m pip install \
    urllib3 \
    requests-kerberos \
    pyhdfs \
-    azure-storage-blob
+    azure-storage-blob )

 COPY modprobe.sh /usr/local/bin/modprobe
 COPY dockerd-entrypoint.sh /usr/local/bin/
@ -102,8 +104,6 @@ RUN set -x \
  && echo 'dockremap:165536:65536' >> /etc/subuid \
    && echo 'dockremap:165536:65536' >> /etc/subgid

-RUN echo '127.0.0.1 localhost test.com' >> /etc/hosts
-
 EXPOSE 2375
 ENTRYPOINT ["dockerd-entrypoint.sh"]
 CMD ["sh", "-c", "pytest $PYTEST_OPTS"]
--- a/docker/test/integration/runner/compose/docker_compose_hive.yml
+++ b/docker/test/integration/runner/compose/docker_compose_hive.yml
@ -0,0 +1,7 @@
+version: '2.3'
+services:
+    hdfs1:
+        image: lgboustc/hive_test:v1.0
+        hostname: hivetest
+        restart: always
+        entrypoint: bash /start.sh
--- a/docker/test/keeper-jepsen/Dockerfile
+++ b/docker/test/keeper-jepsen/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/keeper-jepsen-test .
-FROM clickhouse/test-base
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG

 ENV DEBIAN_FRONTEND=noninteractive
 ENV CLOJURE_VERSION=1.10.3.814
--- a/docker/test/pvs/Dockerfile
+++ b/docker/test/pvs/Dockerfile
@ -1,8 +1,14 @@
+# rebuild in #33610
 # docker build -t clickhouse/pvs-test .

-FROM clickhouse/binary-builder
+ARG FROM_TAG=latest
+FROM clickhouse/binary-builder:$FROM_TAG

-RUN apt-get update --yes \
+# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere
+# We'll produce an empty image for arm64
+ARG TARGETARCH
+
+RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \
    && apt-get install \
        bash \
        wget \
@ -15,7 +21,7 @@ RUN apt-get update --yes \
        libprotoc-dev \
        libgrpc++-dev \
        libc-ares-dev \
-        --yes --no-install-recommends
+        --yes --no-install-recommends )

 #RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
 #RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list
@ -27,7 +33,7 @@ RUN apt-get update --yes \

 ENV PKG_VERSION="pvs-studio-latest"

-RUN set -x \
+RUN test x$TARGETARCH = xarm64 || ( set -x \
    && export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \
    && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
    && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
@ -35,7 +41,7 @@ RUN set -x \
    && wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \
    && { debsig-verify ${PKG_VERSION}.deb \
    || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \
-    && dpkg -i "${PKG_VERSION}.deb"
+    && dpkg -i "${PKG_VERSION}.deb" )

 ENV CCACHE_DIR=/test_output/ccache

--- a/docker/test/split_build_smoke_test/Dockerfile
+++ b/docker/test/split_build_smoke_test/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/split-build-smoke-test .
-FROM clickhouse/binary-builder
+ARG FROM_TAG=latest
+FROM clickhouse/binary-builder:$FROM_TAG

 COPY run.sh /run.sh
 COPY process_split_build_smoke_test_result.py /
--- a/docker/test/stateful/Dockerfile
+++ b/docker/test/stateful/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/stateful-test .
-FROM clickhouse/stateless-test
+ARG FROM_TAG=latest
+FROM clickhouse/stateless-test:$FROM_TAG

 RUN apt-get update -y \
    && env DEBIAN_FRONTEND=noninteractive \
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -1,11 +1,10 @@
+# rebuild in #33610
 # docker build -t clickhouse/stateless-test .
-FROM clickhouse/test-base
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG

 ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"

-RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \
-    && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5
-
 RUN apt-get update -y \
    && env DEBIAN_FRONTEND=noninteractive \
        apt-get install --yes --no-install-recommends \
@ -30,7 +29,7 @@ RUN apt-get update -y \
            tree \
            unixodbc \
            wget \
-            mysql-client=5.7* \
+            mysql-client=8.0* \
            postgresql-client \
            sqlite3

@ -49,10 +48,13 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 ENV NUM_TRIES=1
 ENV MAX_RUN_TIME=0

+ARG TARGETARCH
+
 # Download Minio-related binaries
-RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \
+RUN arch=${TARGETARCH:-amd64} \
+    && wget "https://dl.min.io/server/minio/release/linux-${arch}/minio" \
    && chmod +x ./minio \
-    && wget 'https://dl.min.io/client/mc/release/linux-amd64/mc' \
+    && wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \
    && chmod +x ./mc

 ENV MINIO_ROOT_USER="clickhouse"
--- a/docker/test/stateless_pytest/Dockerfile
+++ b/docker/test/stateless_pytest/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/stateless-pytest .
-FROM clickhouse/test-base
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG

 RUN apt-get update -y && \
    apt-get install -y --no-install-recommends \
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/stress-test .
-FROM clickhouse/stateful-test
+ARG FROM_TAG=latest
+FROM clickhouse/stateful-test:$FROM_TAG

 RUN apt-get update -y \
    && env DEBIAN_FRONTEND=noninteractive \
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -146,6 +146,7 @@ handle SIGUSR2 nostop noprint pass
 handle SIG$RTMIN nostop noprint pass
 info signals
 continue
+gcore
 backtrace full
 info locals
 info registers
@ -263,3 +264,10 @@ done
 # Write check result into check_status.tsv
 clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
 [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
+
+# Core dumps (see gcore)
+# Default filename is 'core.PROCESS_ID'
+for core in core.*; do
+    pigz $core
+    mv $core.gz /output/
+done
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -1,19 +1,41 @@
 # docker build -t clickhouse/style-test .
 FROM ubuntu:20.04
+ARG ACT_VERSION=0.2.25
+ARG ACTIONLINT_VERSION=1.6.8

 # ARG for quick switch to a given ubuntu mirror
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

 RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
-    shellcheck \
-    libxml2-utils \
+    curl \
    git \
-    python3-pip \
+    libxml2-utils \
    pylint \
+    python3-pip \
+    shellcheck \
    yamllint \
    && pip3 install codespell PyGithub boto3 unidiff dohq-artifactory

+# Architecture of the image when BuildKit/buildx is used
+ARG TARGETARCH
+
+# Get act and actionlint from releases
+RUN arch=${TARGETARCH:-amd64} \
+  && case $arch in \
+      amd64) act_arch=x86_64 ;; \
+      arm64) act_arch=$arch ;; \
+    esac \
+  && curl -o /tmp/act.tgz -L \
+    "https://github.com/nektos/act/releases/download/v${ACT_VERSION}/act_Linux_${act_arch}.tar.gz" \
+  && tar xf /tmp/act.tgz -C /usr/bin act \
+  && rm /tmp/act.tgz \
+  && curl -o /tmp/actiolint.zip -L \
+    "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_${arch}.tar.gz" \
+  && tar xf /tmp/actiolint.zip -C /usr/bin actionlint \
+  && rm /tmp/actiolint.zip
+
+
 COPY run.sh /
 COPY process_style_check_result.py /
 CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/style/process_style_check_result.py
+++ b/docker/test/style/process_style_check_result.py
@ -11,40 +11,7 @@ def process_result(result_folder):
    description = ""
    test_results = []

-    style_log_path = '{}/style_output.txt'.format(result_folder)
-    if not os.path.exists(style_log_path):
-        logging.info("No style check log on path %s", style_log_path)
-        return "exception", "No style check log", []
-    elif os.stat(style_log_path).st_size != 0:
-        description += "Style check failed. "
-        test_results.append(("Style check", "FAIL"))
-        status = "failure"  # Disabled for now
-    else:
-        test_results.append(("Style check", "OK"))
-
-    typos_log_path = '{}/typos_output.txt'.format(result_folder)
-    if not os.path.exists(style_log_path):
-        logging.info("No typos check log on path %s", style_log_path)
-        return "exception", "No typos check log", []
-    elif os.stat(typos_log_path).st_size != 0:
-        description += "Typos check failed. "
-        test_results.append(("Typos check", "FAIL"))
-        status = "failure"
-    else:
-        test_results.append(("Typos check", "OK"))
-
-    whitespaces_log_path = '{}/whitespaces_output.txt'.format(result_folder)
-    if not os.path.exists(style_log_path):
-        logging.info("No whitespaces check log on path %s", style_log_path)
-        return "exception", "No whitespaces check log", []
-    elif os.stat(whitespaces_log_path).st_size != 0:
-        description += "Whitespaces check failed. "
-        test_results.append(("Whitespaces check", "FAIL"))
-        status = "failure"
-    else:
-        test_results.append(("Whitespaces check", "OK"))
-
-    duplicate_log_path = '{}/duplicate_output.txt'.format(result_folder)
+    duplicate_log_path = "{}/duplicate_output.txt".format(result_folder)
    if not os.path.exists(duplicate_log_path):
        logging.info("No header duplicates check log on path %s", duplicate_log_path)
        return "exception", "No header duplicates check log", []
@ -55,7 +22,7 @@ def process_result(result_folder):
    else:
        test_results.append(("Header duplicates check", "OK"))

-    shellcheck_log_path = '{}/shellcheck_output.txt'.format(result_folder)
+    shellcheck_log_path = "{}/shellcheck_output.txt".format(result_folder)
    if not os.path.exists(shellcheck_log_path):
        logging.info("No shellcheck  log on path %s", shellcheck_log_path)
        return "exception", "No shellcheck log", []
@ -66,6 +33,50 @@ def process_result(result_folder):
    else:
        test_results.append(("Shellcheck", "OK"))

+    style_log_path = "{}/style_output.txt".format(result_folder)
+    if not os.path.exists(style_log_path):
+        logging.info("No style check log on path %s", style_log_path)
+        return "exception", "No style check log", []
+    elif os.stat(style_log_path).st_size != 0:
+        description += "Style check failed. "
+        test_results.append(("Style check", "FAIL"))
+        status = "failure"
+    else:
+        test_results.append(("Style check", "OK"))
+
+    typos_log_path = "{}/typos_output.txt".format(result_folder)
+    if not os.path.exists(typos_log_path):
+        logging.info("No typos check log on path %s", typos_log_path)
+        return "exception", "No typos check log", []
+    elif os.stat(typos_log_path).st_size != 0:
+        description += "Typos check failed. "
+        test_results.append(("Typos check", "FAIL"))
+        status = "failure"
+    else:
+        test_results.append(("Typos check", "OK"))
+
+    whitespaces_log_path = "{}/whitespaces_output.txt".format(result_folder)
+    if not os.path.exists(whitespaces_log_path):
+        logging.info("No whitespaces check log on path %s", whitespaces_log_path)
+        return "exception", "No whitespaces check log", []
+    elif os.stat(whitespaces_log_path).st_size != 0:
+        description += "Whitespaces check failed. "
+        test_results.append(("Whitespaces check", "FAIL"))
+        status = "failure"
+    else:
+        test_results.append(("Whitespaces check", "OK"))
+
+    workflows_log_path = "{}/workflows_output.txt".format(result_folder)
+    if not os.path.exists(workflows_log_path):
+        logging.info("No workflows check log on path %s", style_log_path)
+        return "exception", "No workflows check log", []
+    elif os.stat(whitespaces_log_path).st_size != 0:
+        description += "Workflows check failed. "
+        test_results.append(("Workflows check", "FAIL"))
+        status = "failure"
+    else:
+        test_results.append(("Workflows check", "OK"))
+
    if not description:
        description += "Style check success"

@ -73,20 +84,22 @@ def process_result(result_folder):


 def write_results(results_file, status_file, results, status):
-    with open(results_file, 'w') as f:
-        out = csv.writer(f, delimiter='\t')
+    with open(results_file, "w") as f:
+        out = csv.writer(f, delimiter="\t")
        out.writerows(results)
-    with open(status_file, 'w') as f:
-        out = csv.writer(f, delimiter='\t')
+    with open(status_file, "w") as f:
+        out = csv.writer(f, delimiter="\t")
        out.writerow(status)


 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
-    parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of style check")
-    parser.add_argument("--in-results-dir", default='/test_output/')
-    parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
-    parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
+    parser = argparse.ArgumentParser(
+        description="ClickHouse script for parsing results of style check"
+    )
+    parser.add_argument("--in-results-dir", default="/test_output/")
+    parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
+    parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
    args = parser.parse_args()

    state, description, test_results = process_result(args.in_results_dir)
--- a/docker/test/style/run.sh
+++ b/docker/test/style/run.sh
@ -3,9 +3,10 @@
 # yaml check is not the best one

 cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
+./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
 ./check-style -n              |& tee /test_output/style_output.txt
 ./check-typos                 |& tee /test_output/typos_output.txt
 ./check-whitespaces -n        |& tee /test_output/whitespaces_output.txt
-./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
+./check-workflows             |& tee /test_output/workflows_output.txt
 ./shellcheck-run.sh           |& tee /test_output/shellcheck_output.txt
 /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
--- a/docker/test/unit/Dockerfile
+++ b/docker/test/unit/Dockerfile
@ -1,5 +1,7 @@
+# rebuild in #33610
 # docker build -t clickhouse/unit-test .
-FROM clickhouse/stateless-test
+ARG FROM_TAG=latest
+FROM clickhouse/stateless-test:$FROM_TAG

 RUN apt-get install gdb

--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@ -1,3 +1,4 @@
+# rebuild in #33610
 # docker build -t clickhouse/test-util .

 FROM ubuntu:20.04
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@ -175,7 +175,7 @@ When we are going to read something from a part in `MergeTree`, we look at `prim

 When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. That’s why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts.

-`MergeTree` is not an LSM tree because it does not contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications.
+`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications

 There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges are executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form.

--- a/docs/en/engines/table-engines/integrations/hive.md
+++ b/docs/en/engines/table-engines/integrations/hive.md
@ -0,0 +1,408 @@
+---
+toc_priority: 4
+toc_title: Hive
+---
+
+# Hive {#hive}
+
+The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
+
+- Text: only supports simple scalar column types except `binary` 
+
+- ORC: support simple scalar columns types except `char`; only support complex types like `array`
+
+- Parquet: support all simple scalar columns types; only support complex types like `array`
+
+## Creating a Table {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+    name1 [type1] [ALIAS expr1],
+    name2 [type2] [ALIAS expr2],
+    ...
+) ENGINE = Hive('thrift://host:port', 'database', 'table');
+PARTITION BY expr
+```
+See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
+
+The table structure can differ from the original Hive table structure:
+-   Column names should be the same as in the original Hive table, but you can use just some of these columns and in any order, also you can use some alias columns calculated from other columns.
+-   Column types should be the same from those in the original Hive table.
+-   Partition by expression should be consistent with the original Hive table, and columns in partition by expression should be in the table structure.
+
+**Engine Parameters**
+
+-   `thrift://host:port` — Hive Metastore address
+
+-   `database` — Remote database name.
+
+-   `table` — Remote table name.
+
+## Usage Example {#usage-example}
+
+### How to Use Local Cache for HDFS Filesystem
+We strongly advice you to enable local cache for remote filesystems. Benchmark shows that its almost 2x faster with cache.
+
+Before using cache, add it to `config.xml`
+``` xml
+<local_cache_for_remote_fs>
+    <enable>true</enable>
+    <root_dir>local_cache</root_dir>
+    <limit_size>559096952</limit_size>
+    <bytes_read_before_flush>1048576</bytes_read_before_flush>
+</local_cache_for_remote_fs>
+```
+
+- enable: ClickHouse will maintain local cache for remote filesystem(HDFS) after startup if true.
+- root_dir: Required. The root directory to store local cache files for remote filesystem.
+- limit_size: Required. The maximum size(in bytes) of local cache files.
+- bytes_read_before_flush: Control bytes before flush to local filesystem when downloading file from remote filesystem. The default value is 1MB.
+
+When ClickHouse is started up with local cache for remote filesystem enabled, users can still choose not to use cache with `settings use_local_cache_for_remote_fs = 0` in their query. `use_local_cache_for_remote_fs` is `false` in default.
+
+### Query Hive Table with ORC Input Format
+
+#### Create Table in Hive
+``` text
+hive > CREATE TABLE `test`.`test_orc`(
+  `f_tinyint` tinyint, 
+  `f_smallint` smallint, 
+  `f_int` int, 
+  `f_integer` int, 
+  `f_bigint` bigint, 
+  `f_float` float, 
+  `f_double` double, 
+  `f_decimal` decimal(10,0), 
+  `f_timestamp` timestamp, 
+  `f_date` date, 
+  `f_string` string, 
+  `f_varchar` varchar(100), 
+  `f_bool` boolean, 
+  `f_binary` binary, 
+  `f_array_int` array<int>, 
+  `f_array_string` array<string>, 
+  `f_array_float` array<float>, 
+  `f_array_array_int` array<array<int>>, 
+  `f_array_array_string` array<array<string>>, 
+  `f_array_array_float` array<array<float>>)
+PARTITIONED BY ( 
+  `day` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+LOCATION
+  'hdfs://testcluster/data/hive/test.db/test_orc'
+
+OK
+Time taken: 0.51 seconds
+
+hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
+OK
+Time taken: 36.025 seconds
+
+hive > select * from test.test_orc;
+OK
+1	2	3	4	5	6.11	7.22	8	2021-11-05 12:38:16.314	2021-11-05	hello world	hello world	hello world                                                                                         	true	hello world	[1,2,3]	["hello world","hello world"]	[1.1,1.2]	[[1,2],[3,4]]	[["a","b"],["c","d"]]	[[1.11,2.22],[3.33,4.44]]	2021-09-18
+Time taken: 0.295 seconds, Fetched: 1 row(s)
+```
+
+#### Create Table in ClickHouse
+Table in ClickHouse, retrieving data from the Hive table created above:
+``` sql
+CREATE TABLE test.test_orc
+(
+    `f_tinyint` Int8,
+    `f_smallint` Int16,
+    `f_int` Int32,
+    `f_integer` Int32,
+    `f_bigint` Int64,
+    `f_float` Float32,
+    `f_double` Float64,
+    `f_decimal` Float64,
+    `f_timestamp` DateTime,
+    `f_date` Date,
+    `f_string` String,
+    `f_varchar` String,
+    `f_bool` Bool,
+    `f_binary` String,
+    `f_array_int` Array(Int32),
+    `f_array_string` Array(String),
+    `f_array_float` Array(Float32),
+    `f_array_array_int` Array(Array(Int32)),
+    `f_array_array_string` Array(Array(String)),
+    `f_array_array_float` Array(Array(Float32)),
+    `day` String
+)
+ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
+PARTITION BY day
+
+```
+
+``` sql
+SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G
+```
+
+``` text
+SELECT *
+FROM test.test_orc
+SETTINGS input_format_orc_allow_missing_columns = 1
+
+Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658
+
+Row 1:
+──────
+f_tinyint:            1
+f_smallint:           2
+f_int:                3
+f_integer:            4
+f_bigint:             5
+f_float:              6.11
+f_double:             7.22
+f_decimal:            8
+f_timestamp:          2021-12-04 04:00:44
+f_date:               2021-12-03
+f_string:             hello world
+f_varchar:            hello world
+f_bool:               true
+f_binary:             hello world
+f_array_int:          [1,2,3]
+f_array_string:       ['hello world','hello world']
+f_array_float:        [1.1,1.2]
+f_array_array_int:    [[1,2],[3,4]]
+f_array_array_string: [['a','b'],['c','d']]
+f_array_array_float:  [[1.11,2.22],[3.33,4.44]]
+day:                  2021-09-18
+
+
+1 rows in set. Elapsed: 0.078 sec. 
+```
+
+### Query Hive Table with Parquet Input Format
+
+#### Create Table in Hive
+``` text
+hive >
+CREATE TABLE `test`.`test_parquet`(
+  `f_tinyint` tinyint, 
+  `f_smallint` smallint, 
+  `f_int` int, 
+  `f_integer` int, 
+  `f_bigint` bigint, 
+  `f_float` float, 
+  `f_double` double, 
+  `f_decimal` decimal(10,0), 
+  `f_timestamp` timestamp, 
+  `f_date` date, 
+  `f_string` string, 
+  `f_varchar` varchar(100), 
+  `f_char` char(100), 
+  `f_bool` boolean, 
+  `f_binary` binary, 
+  `f_array_int` array<int>, 
+  `f_array_string` array<string>, 
+  `f_array_float` array<float>, 
+  `f_array_array_int` array<array<int>>, 
+  `f_array_array_string` array<array<string>>, 
+  `f_array_array_float` array<array<float>>)
+PARTITIONED BY ( 
+  `day` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+  'hdfs://testcluster/data/hive/test.db/test_parquet'
+OK
+Time taken: 0.51 seconds
+
+hive >  insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
+OK
+Time taken: 36.025 seconds
+
+hive > select * from test.test_parquet;
+OK
+1	2	3	4	5	6.11	7.22	8	2021-12-14 17:54:56.743	2021-12-14	hello world	hello world	hello world                                                                                         	true	hello world	[1,2,3]	["hello world","hello world"]	[1.1,1.2]	[[1,2],[3,4]]	[["a","b"],["c","d"]]	[[1.11,2.22],[3.33,4.44]]	2021-09-18
+Time taken: 0.766 seconds, Fetched: 1 row(s)
+```
+
+#### Create Table in ClickHouse
+Table in ClickHouse, retrieving data from the Hive table created above:
+``` sql
+CREATE TABLE test.test_parquet
+(
+    `f_tinyint` Int8,
+    `f_smallint` Int16,
+    `f_int` Int32,
+    `f_integer` Int32,
+    `f_bigint` Int64,
+    `f_float` Float32,
+    `f_double` Float64,
+    `f_decimal` Float64,
+    `f_timestamp` DateTime,
+    `f_date` Date,
+    `f_string` String,
+    `f_varchar` String,
+    `f_char` String,
+    `f_bool` Bool,
+    `f_binary` String,
+    `f_array_int` Array(Int32),
+    `f_array_string` Array(String),
+    `f_array_float` Array(Float32),
+    `f_array_array_int` Array(Array(Int32)),
+    `f_array_array_string` Array(Array(String)),
+    `f_array_array_float` Array(Array(Float32)),
+    `day` String
+)
+ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet')
+PARTITION BY day
+```
+
+``` sql
+SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G
+```
+
+``` text
+SELECT *
+FROM test_parquet
+SETTINGS input_format_parquet_allow_missing_columns = 1
+
+Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9
+
+Row 1:
+──────
+f_tinyint:            1
+f_smallint:           2
+f_int:                3
+f_integer:            4
+f_bigint:             5
+f_float:              6.11
+f_double:             7.22
+f_decimal:            8
+f_timestamp:          2021-12-14 17:54:56
+f_date:               2021-12-14
+f_string:             hello world
+f_varchar:            hello world
+f_char:               hello world
+f_bool:               true
+f_binary:             hello world
+f_array_int:          [1,2,3]
+f_array_string:       ['hello world','hello world']
+f_array_float:        [1.1,1.2]
+f_array_array_int:    [[1,2],[3,4]]
+f_array_array_string: [['a','b'],['c','d']]
+f_array_array_float:  [[1.11,2.22],[3.33,4.44]]
+day:                  2021-09-18
+
+1 rows in set. Elapsed: 0.357 sec. 
+```
+
+### Query Hive Table with Text Input Format
+#### Create Table in Hive
+``` text
+hive >
+CREATE TABLE `test`.`test_text`(
+  `f_tinyint` tinyint, 
+  `f_smallint` smallint, 
+  `f_int` int, 
+  `f_integer` int, 
+  `f_bigint` bigint, 
+  `f_float` float, 
+  `f_double` double, 
+  `f_decimal` decimal(10,0), 
+  `f_timestamp` timestamp, 
+  `f_date` date, 
+  `f_string` string, 
+  `f_varchar` varchar(100), 
+  `f_char` char(100), 
+  `f_bool` boolean, 
+  `f_binary` binary, 
+  `f_array_int` array<int>, 
+  `f_array_string` array<string>, 
+  `f_array_float` array<float>, 
+  `f_array_array_int` array<array<int>>, 
+  `f_array_array_string` array<array<string>>, 
+  `f_array_array_float` array<array<float>>)
+PARTITIONED BY ( 
+  `day` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'hdfs://testcluster/data/hive/test.db/test_text'
+Time taken: 0.1 seconds, Fetched: 34 row(s)
+
+
+hive >  insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
+OK
+Time taken: 36.025 seconds
+
+hive > select * from test.test_text;
+OK
+1	2	3	4	5	6.11	7.22	8	2021-12-14 18:11:17.239	2021-12-14	hello world	hello world	hello world                                                                                         	true	hello world	[1,2,3]	["hello world","hello world"]	[1.1,1.2]	[[1,2],[3,4]]	[["a","b"],["c","d"]]	[[1.11,2.22],[3.33,4.44]]	2021-09-18
+Time taken: 0.624 seconds, Fetched: 1 row(s)
+```
+
+#### Create Table in ClickHouse
+
+Table in ClickHouse, retrieving data from the Hive table created above:
+``` sql
+CREATE TABLE test.test_text
+(
+    `f_tinyint` Int8,
+    `f_smallint` Int16,
+    `f_int` Int32,
+    `f_integer` Int32,
+    `f_bigint` Int64,
+    `f_float` Float32,
+    `f_double` Float64,
+    `f_decimal` Float64,
+    `f_timestamp` DateTime,
+    `f_date` Date,
+    `f_string` String,
+    `f_varchar` String,
+    `f_char` String,
+    `f_bool` Bool,
+    `day` String
+)
+ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text')
+PARTITION BY day 
+```
+
+``` sql
+SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G
+```
+
+``` text
+SELECT *
+FROM test.test_text
+SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'
+
+Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44
+
+Row 1:
+──────
+f_tinyint:   1
+f_smallint:  2
+f_int:       3
+f_integer:   4
+f_bigint:    5
+f_float:     6.11
+f_double:    7.22
+f_decimal:   8
+f_timestamp: 2021-12-14 18:11:17
+f_date:      2021-12-14
+f_string:    hello world
+f_varchar:   hello world
+f_char:      hello world
+f_bool:      true
+day:         2021-09-18
+```
--- a/docs/en/engines/table-engines/integrations/index.md
+++ b/docs/en/engines/table-engines/integrations/index.md
@ -20,3 +20,4 @@ List of supported integrations:
 -   [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
 -   [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
 -   [SQLite](../../../engines/table-engines/integrations/sqlite.md)
+-   [Hive](../../../engines/table-engines/integrations/hive.md)
--- a/docs/en/engines/table-engines/integrations/mongodb.md
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@ -66,4 +66,14 @@ SELECT COUNT() FROM mongo_table;
 └─────────┘
 ```

+You can also adjust connection timeout:
+
+``` sql
+CREATE TABLE mongo_table
+(
+    key UInt64,
+    data String
+) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'connectTimeoutMS=100000');
+```
+
 [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mongodb/) <!--hide-->
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@ -178,7 +178,7 @@ toc_title: Adopters
 | <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
 | <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) |
 | <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
-| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Macin product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
+| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
 | <a href="https://www.yellowfinbi.com" class="favicon"><COMPANYNAME></a> | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) |
 | <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
 | <a href="https://www.your-analytics.org/" class="favicon">Your Analytics</a> | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) |
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@ -16,7 +16,7 @@ This query tries to initialize an unscheduled merge of data parts for tables.
 OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
 ```

-The `OPTMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.
+The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.

 When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `2`) or on current replica (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `1`).

--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -1,23 +1,17 @@
 #include <stdlib.h>
 #include <fcntl.h>
-#include <signal.h>
 #include <map>
 #include <iostream>
-#include <fstream>
 #include <iomanip>
-#include <unordered_set>
-#include <algorithm>
 #include <optional>
 #include <base/scope_guard_safe.h>
 #include <boost/program_options.hpp>
 #include <boost/algorithm/string/replace.hpp>
-#include <Poco/String.h>
 #include <filesystem>
 #include <string>
 #include "Client.h"
 #include "Core/Protocol.h"

-#include <base/argsToConfig.h>
 #include <base/find_symbols.h>

 #include <Common/config_version.h>
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -58,6 +58,8 @@
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/System/attachSystemTables.h>
 #include <Storages/System/attachInformationSchemaTables.h>
+#include <Storages/Cache/ExternalDataSourceCache.h>
+#include <Storages/Cache/registerRemoteFileMetadatas.h>
 #include <AggregateFunctions/registerAggregateFunctions.h>
 #include <Functions/registerFunctions.h>
 #include <TableFunctions/registerTableFunctions.h>
@ -525,6 +527,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
    registerDictionaries();
    registerDisks();
    registerFormats();
+    registerRemoteFileMetadatas();

    CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
    CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
@ -559,6 +562,21 @@ if (ThreadFuzzer::instance().isEffective())
        config().getUInt("thread_pool_queue_size", 10000)
    );

+
+    /// Initialize global local cache for remote filesystem.
+    if (config().has("local_cache_for_remote_fs"))
+    {
+        bool enable = config().getBool("local_cache_for_remote_fs.enable", false);
+        if (enable)
+        {
+            String root_dir = config().getString("local_cache_for_remote_fs.root_dir");
+            UInt64 limit_size = config().getUInt64("local_cache_for_remote_fs.limit_size");
+            UInt64 bytes_read_before_flush
+                = config().getUInt64("local_cache_for_remote_fs.bytes_read_before_flush", DBMS_DEFAULT_BUFFER_SIZE);
+            ExternalDataSourceCache::instance().initOnce(global_context, root_dir, limit_size, bytes_read_before_flush);
+        }
+    }
+
    Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
    std::mutex servers_lock;
    std::vector<ProtocolServerAdapter> servers;
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -115,6 +115,11 @@ if (USE_HDFS)
    add_headers_and_sources(dbms Disks/HDFS)
 endif()

+add_headers_and_sources(dbms Storages/Cache)
+if (USE_HIVE)
+    add_headers_and_sources(dbms Storages/Hive)
+endif()
+
 if(USE_FILELOG)
    add_headers_and_sources(dbms Storages/FileLog)
 endif()
@ -448,6 +453,12 @@ if (USE_HDFS)
    dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
 endif()

+if (USE_HIVE)
+    dbms_target_link_libraries(PRIVATE hivemetastore)
+    dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore)
+endif()
+
+
 if (USE_AWS_S3)
    target_link_libraries (clickhouse_common_io PUBLIC ${AWS_S3_LIBRARY})
    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_CORE_INCLUDE_DIR})
@ -469,6 +480,11 @@ if (USE_BROTLI)
    target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
 endif()

+if (USE_SNAPPY)
+    target_link_libraries (clickhouse_common_io PUBLIC ${SNAPPY_LIBRARY})
+    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${SNAPPY_INCLUDE_DIR})
+endif()
+
 if (USE_AMQPCPP)
    dbms_target_link_libraries(PUBLIC ${AMQPCPP_LIBRARY})
    dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${AMQPCPP_INCLUDE_DIR})
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -1017,7 +1017,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des

        String current_format = parsed_insert_query->format;
        if (current_format.empty())
-            current_format = FormatFactory::instance().getFormatFromFileName(in_file);
+            current_format = FormatFactory::instance().getFormatFromFileName(in_file, true);

        /// Create temporary storage file, to support globs and parallel reading
        StorageFile::CommonArguments args{
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -606,6 +606,9 @@
    M(635, CANNOT_POLL) \
    M(636, CANNOT_EXTRACT_TABLE_STRUCTURE) \
    M(637, INVALID_TABLE_OVERRIDE) \
+    M(638, SNAPPY_UNCOMPRESS_FAILED) \
+    M(639, SNAPPY_COMPRESS_FAILED) \
+    M(640, NO_HIVEMETASTORE) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/Common/LRUResourceCache.h
+++ b/src/Common/LRUResourceCache.h
@ -12,9 +12,15 @@
 namespace DB
 {
 template <typename T>
-struct TrivailLRUResourceCacheWeightFunction
+struct TrivialLRUResourceCacheWeightFunction
 {
-    size_t operator()(const T &) const { return 1; }
+    size_t operator()(const T &) const noexcept { return 1; }
+};
+
+template <typename T>
+struct TrivialLRUResourceCacheReleaseFunction
+{
+    void operator()(std::shared_ptr<T>) noexcept { }
 };

 /**
@ -24,9 +30,11 @@ struct TrivailLRUResourceCacheWeightFunction
 *
 * Warning (!): This implementation is in development, not to be used.
 */
-template <typename TKey,
+template <
+    typename TKey,
    typename TMapped,
-    typename WeightFunction = TrivailLRUResourceCacheWeightFunction<TMapped>,
+    typename WeightFunction = TrivialLRUResourceCacheWeightFunction<TMapped>,
+    typename ReleaseFunction = TrivialLRUResourceCacheReleaseFunction<TMapped>,
    typename HashFunction = std::hash<TKey>>
 class LRUResourceCache
 {
@ -38,8 +46,7 @@ public:
    class MappedHolder
    {
    public:
-        MappedHolder(LRUResourceCache * cache_, const Key & key_, MappedPtr value_)
-            : cache(cache_), key(key_), val(value_) {}
+        MappedHolder(LRUResourceCache * cache_, const Key & key_, MappedPtr value_) : cache(cache_), key(key_), val(value_) { }

        ~MappedHolder() { cache->release(key); }

@ -54,7 +61,9 @@ public:
    using MappedHolderPtr = std::unique_ptr<MappedHolder>;

    explicit LRUResourceCache(size_t max_weight_, size_t max_element_size_ = 0)
-        : max_weight(max_weight_), max_element_size(max_element_size_) {}
+        : max_weight(max_weight_), max_element_size(max_element_size_)
+    {
+    }

    MappedHolderPtr get(const Key & key)
    {
@ -86,6 +95,7 @@ public:
        {
            queue.erase(cell.queue_iterator);
            current_weight -= cell.weight;
+            release_function(cell.value);
            cells.erase(it);
        }
        else
@ -198,6 +208,7 @@ private:
    friend struct InsertTokenHolder;
    InsertTokenById insert_tokens;
    WeightFunction weight_function;
+    ReleaseFunction release_function;
    std::atomic<size_t> hits{0};
    std::atomic<size_t> misses{0};
    std::atomic<size_t> evict_count{0};
@ -305,6 +316,7 @@ private:
        {
            queue.erase(cell.queue_iterator);
            current_weight -= cell.weight;
+            release_function(cell.value);
            cells.erase(it);
        }
    }
@ -330,12 +342,11 @@ private:
    // key mustn't be in the cache
    Cell * set(const Key & insert_key, MappedPtr value)
    {
-        auto weight = value ? weight_function(*value) : 0;
-        auto queue_size = cells.size() + 1;
-        auto loss_weight = 0;
-
+        size_t weight = value ? weight_function(*value) : 0;
+        size_t queue_size = cells.size() + 1;
+        size_t loss_weight = 0;
        auto is_overflow = [&] {
-            return current_weight + weight - loss_weight > max_weight || (max_element_size != 0 && queue_size > max_element_size);
+            return current_weight + weight > max_weight + loss_weight || (max_element_size != 0 && queue_size > max_element_size);
        };

        auto key_it = queue.begin();
@ -356,7 +367,7 @@ private:
            if (cell.reference_count == 0)
            {
                loss_weight += cell.weight;
-                queue_size -= 1;
+                queue_size--;
                to_release_keys.insert(key);
            }

@ -376,6 +387,7 @@ private:
        {
            auto & cell = cells[key];
            queue.erase(cell.queue_iterator);
+            release_function(cell.value);
            cells.erase(key);
            ++evict_count;
        }
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -277,6 +277,8 @@
    \
    M(AsynchronousReadWaitMicroseconds, "Time spent in waiting for asynchronous reads.") \
    \
+    M(ExternalDataSourceLocalCacheReadBytes, "Bytes read from local cache buffer in RemoteReadBufferCache")\
+    \
    M(MainConfigLoads, "Number of times the main configuration was reloaded.") \

 namespace ProfileEvents
--- a/src/Common/ZooKeeper/ZooKeeperConstants.h
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.h
@ -39,6 +39,7 @@ std::string toString(OpNum op_num);
 OpNum getOpNum(int32_t raw_op_num);

 static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION = 0;
+static constexpr int32_t KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT = 42;
 static constexpr int32_t CLIENT_HANDSHAKE_LENGTH = 44;
 static constexpr int32_t CLIENT_HANDSHAKE_LENGTH_WITH_READONLY = 45;
 static constexpr int32_t SERVER_HANDSHAKE_LENGTH = 36;
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@ -489,7 +489,15 @@ void ZooKeeper::receiveHandshake()

    read(protocol_version_read);
    if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION)
-        throw Exception("Unexpected protocol version: " + DB::toString(protocol_version_read), Error::ZMARSHALLINGERROR);
+    {
+        /// Special way to tell a client that server is not ready to serve it.
+        /// It's better for faster failover than just connection drop.
+        /// Implemented in clickhouse-keeper.
+        if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT)
+            throw Exception("Keeper server rejected the connection during the handshake. Possibly it's overloaded, doesn't see leader or stale", Error::ZCONNECTIONLOSS);
+        else
+            throw Exception("Unexpected protocol version: " + DB::toString(protocol_version_read), Error::ZMARSHALLINGERROR);
+    }

    read(timeout);
    if (timeout != session_timeout.totalMilliseconds())
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -20,3 +20,5 @@
 #cmakedefine01 USE_YAML_CPP
 #cmakedefine01 CLICKHOUSE_SPLIT_BINARY
 #cmakedefine01 USE_BZIP2
+#cmakedefine01 USE_SNAPPY
+#cmakedefine01 USE_HIVE
--- a/src/Common/examples/CMakeLists.txt
+++ b/src/Common/examples/CMakeLists.txt
@ -80,3 +80,6 @@ target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io)

 add_executable (executable_udf executable_udf.cpp)
 target_link_libraries (executable_udf PRIVATE dbms)
+
+add_executable(hive_metastore_client hive_metastore_client.cpp)
+target_link_libraries (hive_metastore_client PUBLIC hivemetastore ${THRIFT_LIBRARY})
--- a/src/Common/examples/hive_metastore_client.cpp
+++ b/src/Common/examples/hive_metastore_client.cpp
@ -0,0 +1,43 @@
+#include <iostream>
+
+#include <thrift/protocol/TBinaryProtocol.h>
+#include <thrift/transport/TSocket.h>
+#include <thrift/transport/TTransportUtils.h>
+#include <ThriftHiveMetastore.h>
+
+
+using namespace std;
+using namespace apache::thrift;
+using namespace apache::thrift::protocol;
+using namespace apache::thrift::transport;
+using namespace Apache::Hadoop::Hive;
+
+int main()
+{
+    std::shared_ptr<TTransport> socket(new TSocket("localhost", 9083));
+    std::shared_ptr<TTransport> transport(new TBufferedTransport(socket));
+    std::shared_ptr<TProtocol> protocol(new TBinaryProtocol(transport));
+    ThriftHiveMetastoreClient client(protocol);
+
+    try
+    {
+        transport->open();
+
+        Table table;
+        client.get_table(table, "default", "persons");
+        table.printTo(std::cout);
+
+        vector<Partition> partitions;
+        client.get_partitions(partitions, "default", "persons", 0);
+        for (const auto & part : partitions)
+        {
+            part.printTo(std::cout);
+        }
+
+        transport->close();
+    }
+    catch (TException & tx)
+    {
+        cout << "ERROR: " << tx.what() << endl;
+    }
+}
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@ -168,7 +168,7 @@ void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::st
    auto max_session_id = deserializeSessionAndTimeout(storage, reader);
    LOG_INFO(log, "Sessions and timeouts deserialized");

-    storage.session_id_counter = max_session_id;
+    storage.session_id_counter = max_session_id + 1; /// session_id_counter pointer to next slot
    deserializeACLMap(storage, reader);
    LOG_INFO(log, "ACLs deserialized");

--- a/src/Core/DecimalComparison.h
+++ b/src/Core/DecimalComparison.h
@ -52,7 +52,6 @@ struct DecCompareInt
    using TypeB = Type;
 };

-///
 template <typename A, typename B, template <typename, typename> typename Operation, bool _check_overflow = true,
    bool _actual = is_decimal<A> || is_decimal<B>>
 class DecimalComparison
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -544,6 +544,7 @@ class IColumn;
    \
    M(Bool, force_remove_data_recursively_on_drop, false, "Recursively remove data on DROP query. Avoids 'Directory not empty' error, but may silently remove detached data", 0) \
    M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \
+    M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \
    \
    /** Experimental functions */ \
    M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
@ -596,6 +597,12 @@ class IColumn;
    M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
    M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
    M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
+    M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
+    M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
+    M(Bool, input_format_arrow_allow_missing_columns, false, "Allow missing columns while reading Arrow input formats", 0) \
+    M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \
+    M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \
+    M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
    M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
    M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
    \
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@ -506,7 +506,7 @@ inline bool isNotCreatable(const T & data_type)
 inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type)
 {
    WhichDataType which(data_type);
-    return which.isInt() || which.isUInt();
+    return which.isInt() || which.isUInt() || which.isFloat();
 }

 inline bool isCompilableType(const DataTypePtr & data_type)
--- a/src/DataTypes/Serializations/SerializationNumber.cpp
+++ b/src/DataTypes/Serializations/SerializationNumber.cpp
@ -95,7 +95,7 @@ void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
 }

 template <typename T>
-void SerializationNumber<T>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+void SerializationNumber<T>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & /*settings*/) const
 {
    FieldType x;
    readCSV(x, istr);
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -25,6 +25,7 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
    extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT;
    extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT;
+    extern const int BAD_ARGUMENTS;
 }

 const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const
@ -62,6 +63,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number;
    format_settings.csv.null_representation = settings.format_csv_null_representation;
    format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
+    format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
+    format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
+    format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
    format_settings.custom.escaping_rule = settings.format_custom_escaping_rule;
    format_settings.custom.field_delimiter = settings.format_custom_field_delimiter;
    format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter;
@ -86,6 +90,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
    format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
    format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
+    format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
    format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
    format_settings.pretty.color = settings.output_format_pretty_color;
    format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
@ -114,7 +119,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.write_statistics = settings.output_format_write_statistics;
    format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
    format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
+    format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
    format_settings.orc.import_nested = settings.input_format_orc_import_nested;
+    format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
    format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
    format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
    format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
@ -193,7 +200,8 @@ InputFormatPtr FormatFactory::getInput(


        ParallelParsingInputFormat::Params params{
-            buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing};
+            buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing,
+               context->getApplicationType() == Context::ApplicationType::SERVER};
        return std::make_shared<ParallelParsingInputFormat>(params);
    }

@ -375,6 +383,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_
    if (target)
        throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
    target = std::move(input_creator);
+    registerFileExtension(name, name);
 }

 void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker)
@ -391,14 +400,15 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp
    if (target)
        throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
    target = std::move(output_creator);
+    registerFileExtension(name, name);
 }

 void FormatFactory::registerFileExtension(const String & extension, const String & format_name)
 {
-    file_extension_formats[extension] = format_name;
+    file_extension_formats[boost::to_lower_copy(extension)] = format_name;
 }

-String FormatFactory::getFormatFromFileName(String file_name)
+String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_found)
 {
    CompressionMethod compression_method = chooseCompressionMethod(file_name, "");
    if (CompressionMethod::None != compression_method)
@ -410,11 +420,22 @@ String FormatFactory::getFormatFromFileName(String file_name)

    auto pos = file_name.find_last_of('.');
    if (pos == String::npos)
+    {
+        if (throw_if_not_found)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
        return "";
+    }

    String file_extension = file_name.substr(pos + 1, String::npos);
    boost::algorithm::to_lower(file_extension);
-    return file_extension_formats[file_extension];
+    auto it = file_extension_formats.find(file_extension);
+    if (it == file_extension_formats.end())
+    {
+        if (throw_if_not_found)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
+        return "";
+    }
+    return it->second;
 }

 void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine)
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@ -173,7 +173,7 @@ public:

    /// Register file extension for format
    void registerFileExtension(const String & extension, const String & format_name);
-    String getFormatFromFileName(String file_name);
+    String getFormatFromFileName(String file_name, bool throw_if_not_found = false);

    /// Register schema readers for format its name.
    void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator);
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -1,5 +1,6 @@
 #pragma once

+#include <Core/Names.h>
 #include <base/types.h>


@ -71,6 +72,7 @@ struct FormatSettings
        UInt64 row_group_size = 1000000;
        bool low_cardinality_as_dictionary = false;
        bool import_nested = false;
+        bool allow_missing_columns = false;
    } arrow;

    struct
@ -99,6 +101,14 @@ struct FormatSettings
        char tuple_delimiter = ',';
    } csv;

+    struct HiveText
+    {
+        char fields_delimiter = '\x01';
+        char collection_items_delimiter = '\x02';
+        char map_keys_delimiter = '\x03';
+        Names input_field_names;
+    } hive_text;
+
    struct Custom
    {
        std::string result_before_delimiter;
@ -124,6 +134,7 @@ struct FormatSettings
    {
        UInt64 row_group_size = 1000000;
        bool import_nested = false;
+        bool allow_missing_columns = false;
    } parquet;

    struct Pretty
@ -202,6 +213,7 @@ struct FormatSettings
    struct
    {
        bool import_nested = false;
+        bool allow_missing_columns = false;
        int64_t row_batch_size = 100'000;
    } orc;

--- a/src/Formats/registerFormats.cpp
+++ b/src/Formats/registerFormats.cpp
@ -77,6 +77,10 @@ void registerInputFormatJSONAsString(FormatFactory & factory);
 void registerInputFormatLineAsString(FormatFactory & factory);
 void registerInputFormatCapnProto(FormatFactory & factory);

+#if USE_HIVE
+void registerInputFormatHiveText(FormatFactory & factory);
+#endif
+
 /// Non trivial prefix and suffix checkers for disabling parallel parsing.
 void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory);
 void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory);
@ -103,6 +107,8 @@ void registerTSKVSchemaReader(FormatFactory & factory);
 void registerValuesSchemaReader(FormatFactory & factory);
 void registerTemplateSchemaReader(FormatFactory & factory);

+void registerFileExtensions(FormatFactory & factory);
+
 void registerFormats()
 {
    auto & factory = FormatFactory::instance();
@ -169,6 +175,9 @@ void registerFormats()
    registerInputFormatRegexp(factory);
    registerInputFormatJSONAsString(factory);
    registerInputFormatLineAsString(factory);
+#if USE_HIVE
+    registerInputFormatHiveText(factory);
+#endif

    registerInputFormatCapnProto(factory);

@ -196,16 +205,6 @@ void registerFormats()
    registerTSKVSchemaReader(factory);
    registerValuesSchemaReader(factory);
    registerTemplateSchemaReader(factory);
-
-    factory.registerFileExtension("csv", "CSV");
-    factory.registerFileExtension("tsv", "TSV");
-    factory.registerFileExtension("parquet", "Parquet");
-    factory.registerFileExtension("orc", "ORC");
-    factory.registerFileExtension("native", "Native");
-    factory.registerFileExtension("json", "JSON");
-    factory.registerFileExtension("ndjson", "JSONEachRow");
-    factory.registerFileExtension("xml", "XML");
-    factory.registerFileExtension("avro", "Avro");
 }

 }
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@ -11,17 +11,17 @@
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnArray.h>

-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDateTime64.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeUUID.h>
-#include <DataTypes/DataTypeFixedString.h>
-#include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/getLeastSupertype.h>

 #include <Interpreters/convertFieldToType.h>
@ -1175,8 +1175,11 @@ public:
        const bool left_is_num = col_left_untyped->isNumeric();
        const bool right_is_num = col_right_untyped->isNumeric();

-        const bool left_is_string = isStringOrFixedString(which_left);
-        const bool right_is_string = isStringOrFixedString(which_right);
+        const bool left_is_string = which_left.isStringOrFixedString();
+        const bool right_is_string = which_right.isStringOrFixedString();
+
+        const bool left_is_float = which_left.isFloat();
+        const bool right_is_float = which_right.isFloat();

        bool date_and_datetime = (which_left.idx != which_right.idx) && (which_left.isDate() || which_left.isDate32() || which_left.isDateTime() || which_left.isDateTime64())
            && (which_right.isDate() || which_right.isDate32() || which_right.isDateTime() || which_right.isDateTime64());
@ -1232,11 +1235,23 @@ public:
            }
            else
            {
-                // compare
+                /// Check does another data type is comparable to Decimal, includes Int and Float.
                if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime)
                    throw Exception(
                        "No operation " + getName() + " between " + left_type->getName() + " and " + right_type->getName(),
                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                /// When Decimal comparing to Float32/64, we convert both of them into Float64.
+                /// Other systems like MySQL and Spark also do as this.
+                if (left_is_float || right_is_float)
+                {
+                    const auto converted_type = std::make_shared<DataTypeFloat64>();
+                    ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, converted_type);
+                    ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, converted_type);
+
+                    auto new_arguments
+                        = ColumnsWithTypeAndName{{c0_converted, converted_type, "left"}, {c1_converted, converted_type, "right"}};
+                    return executeImpl(new_arguments, result_type, input_rows_count);
+                }
                return executeDecimal(col_with_type_and_name_left, col_with_type_and_name_right);
            }

--- a/src/IO/CompressionMethod.cpp
+++ b/src/IO/CompressionMethod.cpp
@ -14,6 +14,7 @@
 #include <IO/Lz4InflatingReadBuffer.h>
 #include <IO/Bzip2ReadBuffer.h>
 #include <IO/Bzip2WriteBuffer.h>
+#include <IO/HadoopSnappyReadBuffer.h>

 #include <Common/config.h>

@ -46,6 +47,8 @@ std::string toContentEncodingName(CompressionMethod method)
            return "lz4";
        case CompressionMethod::Bzip2:
            return "bz2";
+        case CompressionMethod::Snappy:
+            return "snappy";
        case CompressionMethod::None:
            return "";
    }
@ -79,11 +82,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s
        return CompressionMethod::Lz4;
    if (method_str == "bz2")
        return CompressionMethod::Bzip2;
+    if (method_str == "snappy")
+        return CompressionMethod::Snappy;
    if (hint.empty() || hint == "auto" || hint == "none")
        return CompressionMethod::None;

    throw Exception(
-        "Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'lz4', 'bz2' are supported as compression methods",
+        "Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'lz4', 'bz2', 'snappy' are supported as compression methods",
        ErrorCodes::NOT_IMPLEMENTED);
 }

@ -107,6 +112,11 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
    if (method == CompressionMethod::Bzip2)
        return std::make_unique<Bzip2ReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
 #endif
+#if USE_SNAPPY
+    if (method == CompressionMethod::Snappy)
+        return std::make_unique<HadoopSnappyReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
+#endif
+
    if (method == CompressionMethod::None)
        return nested;

@ -136,6 +146,10 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
 #if USE_BZIP2
    if (method == CompressionMethod::Bzip2)
        return std::make_unique<Bzip2WriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
+#endif
+#if USE_SNAPPY
+    if (method == CompressionMethod::Snappy)
+        throw Exception("Unsupported compression method", ErrorCodes::NOT_IMPLEMENTED);
 #endif
    if (method == CompressionMethod::None)
        return nested;
--- a/src/IO/CompressionMethod.h
+++ b/src/IO/CompressionMethod.h
@ -33,7 +33,8 @@ enum class CompressionMethod
    Zstd,
    Brotli,
    Lz4,
-    Bzip2
+    Bzip2,
+    Snappy,
 };

 /// How the compression method is named in HTTP.
--- a/src/IO/HadoopSnappyReadBuffer.cpp
+++ b/src/IO/HadoopSnappyReadBuffer.cpp
@ -0,0 +1,218 @@
+#include <Common/config.h>
+
+#if USE_SNAPPY
+#include <fcntl.h>
+#include <sys/types.h>
+#include <memory>
+#include <string>
+#include <string.h>
+
+#include <snappy-c.h>
+
+#include "HadoopSnappyReadBuffer.h"
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int SNAPPY_UNCOMPRESS_FAILED;
+}
+
+
+inline bool HadoopSnappyDecoder::checkBufferLength(int max) const
+{
+    return buffer_length >= 0 && buffer_length < max;
+}
+
+inline bool HadoopSnappyDecoder::checkAvailIn(size_t avail_in, int min)
+{
+    return avail_in >= size_t(min);
+}
+
+inline void HadoopSnappyDecoder::copyToBuffer(size_t * avail_in, const char ** next_in)
+{
+    assert(*avail_in <= sizeof(buffer));
+
+    memcpy(buffer, *next_in, *avail_in);
+
+    buffer_length = *avail_in;
+    *next_in += *avail_in;
+    *avail_in = 0;
+}
+
+
+inline uint32_t HadoopSnappyDecoder::readLength(const char * in)
+{
+    uint32_t b1 = *(reinterpret_cast<const uint8_t *>(in));
+    uint32_t b2 = *(reinterpret_cast<const uint8_t *>(in + 1));
+    uint32_t b3 = *(reinterpret_cast<const uint8_t *>(in + 2));
+    uint32_t b4 = *(reinterpret_cast<const uint8_t *>(in + 3));
+    uint32_t res = ((b1 << 24) + (b2 << 16) + (b3 << 8) + b4);
+    return res;
+}
+
+
+inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readLength(size_t * avail_in, const char ** next_in, int * length)
+{
+    char tmp[4] = {0};
+
+    if (!checkBufferLength(4))
+        return Status::INVALID_INPUT;
+    memcpy(tmp, buffer, buffer_length);
+
+    if (!checkAvailIn(*avail_in, 4 - buffer_length))
+    {
+        copyToBuffer(avail_in, next_in);
+        return Status::NEEDS_MORE_INPUT;
+    }
+    memcpy(tmp + buffer_length, *next_in, 4 - buffer_length);
+
+    *avail_in -= 4 - buffer_length;
+    *next_in += 4 - buffer_length;
+    buffer_length = 0;
+    *length = readLength(tmp);
+    return Status::OK;
+}
+
+inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlockLength(size_t * avail_in, const char ** next_in)
+{
+    if (block_length < 0)
+        return readLength(avail_in, next_in, &block_length);
+    return Status::OK;
+}
+
+inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(size_t * avail_in, const char ** next_in)
+{
+    if (compressed_length < 0)
+        return readLength(avail_in, next_in, &compressed_length);
+    return Status::OK;
+}
+
+inline HadoopSnappyDecoder::Status
+HadoopSnappyDecoder::readCompressedData(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out)
+{
+    if (!checkBufferLength(compressed_length))
+        return Status::INVALID_INPUT;
+
+    if (!checkAvailIn(*avail_in, compressed_length - buffer_length))
+    {
+        copyToBuffer(avail_in, next_in);
+        return Status::NEEDS_MORE_INPUT;
+    }
+
+    const char * compressed = nullptr;
+    if (buffer_length > 0)
+    {
+        compressed = buffer;
+        memcpy(buffer + buffer_length, *next_in, compressed_length - buffer_length);
+    }
+    else
+    {
+        compressed = const_cast<char *>(*next_in);
+    }
+
+    size_t uncompressed_length = *avail_out;
+    auto status = snappy_uncompress(compressed, compressed_length, *next_out, &uncompressed_length);
+    if (status != SNAPPY_OK)
+    {
+        return Status(status);
+    }
+
+    *avail_in -= compressed_length - buffer_length;
+    *next_in += compressed_length - buffer_length;
+    *avail_out -= uncompressed_length;
+    *next_out += uncompressed_length;
+
+    total_uncompressed_length += uncompressed_length;
+    compressed_length = -1;
+    buffer_length = 0;
+    return Status::OK;
+}
+
+HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out)
+{
+    if (*avail_in == 0)
+    {
+        if (buffer_length == 0 && block_length < 0 && compressed_length < 0)
+            return Status::OK;
+        return Status::NEEDS_MORE_INPUT;
+    }
+
+    HadoopSnappyDecoder::Status status = readBlockLength(avail_in, next_in);
+    if (status != Status::OK)
+        return status;
+
+    while (total_uncompressed_length < block_length)
+    {
+        status = readCompressedLength(avail_in, next_in);
+        if (status != Status::OK)
+            return status;
+
+        status = readCompressedData(avail_in, next_in, avail_out, next_out);
+        if (status != Status::OK)
+            return status;
+    }
+    if (total_uncompressed_length != block_length)
+        return Status::INVALID_INPUT;
+    return Status::OK;
+}
+
+HadoopSnappyReadBuffer::HadoopSnappyReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char * existing_memory, size_t alignment)
+    : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
+    , in(std::move(in_))
+    , decoder(std::make_unique<HadoopSnappyDecoder>())
+    , in_available(0)
+    , in_data(nullptr)
+    , out_capacity(0)
+    , out_data(nullptr)
+    , eof(false)
+{
+}
+
+HadoopSnappyReadBuffer::~HadoopSnappyReadBuffer() = default;
+
+bool HadoopSnappyReadBuffer::nextImpl()
+{
+    if (eof)
+        return false;
+
+    if (!in_available)
+    {
+        in->nextIfAtEnd();
+        in_available = in->buffer().end() - in->position();
+        in_data = in->position();
+    }
+
+    if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof()))
+    {
+        throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED);
+    }
+
+    out_capacity = internal_buffer.size();
+    out_data = internal_buffer.begin();
+    decoder->result = decoder->readBlock(&in_available, &in_data, &out_capacity, &out_data);
+
+    in->position() = in->buffer().end() - in_available;
+    working_buffer.resize(internal_buffer.size() - out_capacity);
+
+    if (decoder->result == Status::OK)
+    {
+        decoder->reset();
+        if (in->eof())
+        {
+            eof = true;
+            return !working_buffer.empty();
+        }
+        return true;
+    }
+    else if (decoder->result == Status::INVALID_INPUT || decoder->result == Status::BUFFER_TOO_SMALL)
+    {
+        throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED);
+    }
+    return true;
+}
+
+}
+
+#endif
--- a/src/IO/HadoopSnappyReadBuffer.h
+++ b/src/IO/HadoopSnappyReadBuffer.h
@ -0,0 +1,115 @@
+#pragma once
+
+#include <Common/config.h>
+
+#if USE_SNAPPY
+
+#include <memory>
+#include <IO/ReadBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+
+namespace DB
+{
+
+
+/*
+ * Hadoop-snappy format is one of the compression formats base on Snappy used in Hadoop. It uses its own framing format as follows:
+ * 1. A compressed file consists of one or more blocks.
+ * 2. A block consists of uncompressed length (big endian 4 byte integer) and one or more subblocks.
+ * 3. A subblock consists of compressed length (big endian 4 byte integer) and raw compressed data.
+ *
+ * HadoopSnappyDecoder implements the decompression of data compressed with hadoop-snappy format.
+ */
+class HadoopSnappyDecoder
+{
+public:
+    enum class Status : int
+    {
+        OK = 0,
+        INVALID_INPUT = 1,
+        BUFFER_TOO_SMALL = 2,
+        NEEDS_MORE_INPUT = 3,
+    };
+
+    HadoopSnappyDecoder() = default;
+    ~HadoopSnappyDecoder() = default;
+
+    Status readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out);
+
+    inline void reset()
+    {
+        buffer_length = 0;
+        block_length = -1;
+        compressed_length = -1;
+        total_uncompressed_length = 0;
+    }
+
+    Status result = Status::OK;
+
+private:
+    inline bool checkBufferLength(int max) const;
+    inline static bool checkAvailIn(size_t avail_in, int min);
+
+    inline void copyToBuffer(size_t * avail_in, const char ** next_in);
+
+    inline static uint32_t readLength(const char * in);
+    inline Status readLength(size_t * avail_in, const char ** next_in, int * length);
+    inline Status readBlockLength(size_t * avail_in, const char ** next_in);
+    inline Status readCompressedLength(size_t * avail_in, const char ** next_in);
+    inline Status readCompressedData(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out);
+
+    char buffer[DBMS_DEFAULT_BUFFER_SIZE] = {0};
+    int buffer_length = 0;
+
+    int block_length = -1;
+    int compressed_length = -1;
+    int total_uncompressed_length = 0;
+};
+
+/// HadoopSnappyReadBuffer implements read buffer for data compressed with hadoop-snappy format.
+class HadoopSnappyReadBuffer : public BufferWithOwnMemory<ReadBuffer>
+{
+public:
+    using Status = HadoopSnappyDecoder::Status;
+
+    inline static String statusToString(Status status)
+    {
+        switch (status)
+        {
+            case Status::OK:
+                return "OK";
+            case Status::INVALID_INPUT:
+                return "INVALID_INPUT";
+            case Status::BUFFER_TOO_SMALL:
+                return "BUFFER_TOO_SMALL";
+            case Status::NEEDS_MORE_INPUT:
+                return "NEEDS_MORE_INPUT";
+        }
+        __builtin_unreachable();
+    }
+
+    explicit HadoopSnappyReadBuffer(
+        std::unique_ptr<ReadBuffer> in_,
+        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+        char * existing_memory = nullptr,
+        size_t alignment = 0);
+
+    ~HadoopSnappyReadBuffer() override;
+
+private:
+    bool nextImpl() override;
+
+    std::unique_ptr<ReadBuffer> in;
+    std::unique_ptr<HadoopSnappyDecoder> decoder;
+
+    size_t in_available;
+    const char * in_data;
+
+    size_t out_capacity;
+    char * out_data;
+
+    bool eof;
+};
+
+}
+#endif
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -1068,7 +1068,6 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf)
    assertChar('"', buf);
 }

-
 /// CSV, for numbers, dates: quotes are optional, no special escaping rules.
 template <typename T>
 inline void readCSVSimple(T & x, ReadBuffer & buf)
@ -1088,8 +1087,10 @@ inline void readCSVSimple(T & x, ReadBuffer & buf)
 }

 template <typename T>
-inline std::enable_if_t<is_arithmetic_v<T>, void>
-readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
+inline std::enable_if_t<is_arithmetic_v<T>, void> readCSV(T & x, ReadBuffer & buf)
+{
+    readCSVSimple(x, buf);
+}

 inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); }
 inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
--- a/src/IO/SnappyReadBuffer.cpp
+++ b/src/IO/SnappyReadBuffer.cpp
@ -0,0 +1,76 @@
+#include <Common/config.h>
+
+#if USE_SNAPPY
+#include <memory>
+#include <fcntl.h>
+#include <sys/types.h>
+
+#include <snappy.h>
+
+#include <IO/copyData.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteHelpers.h>
+
+#include "SnappyReadBuffer.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int SNAPPY_UNCOMPRESS_FAILED;
+    extern const int SEEK_POSITION_OUT_OF_BOUND;
+}
+
+
+SnappyReadBuffer::SnappyReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char * existing_memory, size_t alignment)
+    : BufferWithOwnMemory<SeekableReadBuffer>(buf_size, existing_memory, alignment), in(std::move(in_))
+{
+}
+
+bool SnappyReadBuffer::nextImpl()
+{
+    if (compress_buffer.empty() && uncompress_buffer.empty())
+    {
+        WriteBufferFromString wb(compress_buffer);
+        copyData(*in, wb);
+
+        bool success = snappy::Uncompress(compress_buffer.data(), wb.count(), &uncompress_buffer);
+        if (!success)
+        {
+            throw Exception("snappy uncomress failed: ", ErrorCodes::SNAPPY_UNCOMPRESS_FAILED);
+        }
+        BufferBase::set(const_cast<char *>(uncompress_buffer.data()), uncompress_buffer.size(), 0);
+        return true;
+    }
+    return false;
+}
+
+SnappyReadBuffer::~SnappyReadBuffer() = default;
+
+off_t SnappyReadBuffer::seek(off_t off, int whence)
+{
+    off_t new_pos;
+    if (whence == SEEK_SET)
+        new_pos = off;
+    else if (whence == SEEK_CUR)
+        new_pos = count() + off;
+    else
+        throw Exception("Only SEEK_SET and SEEK_CUR seek modes allowed.", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
+
+    working_buffer = internal_buffer;
+    if (new_pos < 0 || new_pos > off_t(working_buffer.size()))
+        throw Exception(
+            String("Cannot seek through buffer") + " because seek position (" + toString(new_pos) + ") is out of bounds [0, "
+                + toString(working_buffer.size()) + "]",
+            ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
+    position() = working_buffer.begin() + new_pos;
+    return new_pos;
+}
+
+off_t SnappyReadBuffer::getPosition()
+{
+    return count();
+}
+
+}
+#endif
--- a/src/IO/SnappyReadBuffer.h
+++ b/src/IO/SnappyReadBuffer.h
@ -0,0 +1,35 @@
+#pragma once
+
+#include <Common/config.h>
+
+#if USE_SNAPPY
+
+#include <IO/ReadBuffer.h>
+#include <IO/SeekableReadBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+
+namespace DB
+{
+class SnappyReadBuffer : public BufferWithOwnMemory<SeekableReadBuffer>
+{
+public:
+    explicit SnappyReadBuffer(
+        std::unique_ptr<ReadBuffer> in_,
+        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+        char * existing_memory = nullptr,
+        size_t alignment = 0);
+
+    ~SnappyReadBuffer() override;
+
+    bool nextImpl() override;
+    off_t seek(off_t off, int whence) override;
+    off_t getPosition() override;
+
+private:
+    std::unique_ptr<ReadBuffer> in;
+    String compress_buffer;
+    String uncompress_buffer;
+};
+
+}
+#endif
--- a/src/IO/SnappyWriteBuffer.cpp
+++ b/src/IO/SnappyWriteBuffer.cpp
@ -0,0 +1,92 @@
+#include <Common/config.h>
+
+#if USE_SNAPPY
+#include <string.h>
+
+#include <snappy.h>
+
+#include <Common/ErrorCodes.h>
+#include "SnappyWriteBuffer.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int SNAPPY_COMPRESS_FAILED;
+}
+
+SnappyWriteBuffer::SnappyWriteBuffer(std::unique_ptr<WriteBuffer> out_, size_t buf_size, char * existing_memory, size_t alignment)
+    : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment), out(std::move(out_))
+{
+}
+
+SnappyWriteBuffer::~SnappyWriteBuffer()
+{
+    finish();
+}
+
+void SnappyWriteBuffer::nextImpl()
+{
+    if (!offset())
+    {
+        return;
+    }
+
+    const char * in_data = reinterpret_cast<const char *>(working_buffer.begin());
+    size_t in_available = offset();
+    uncompress_buffer.append(in_data, in_available);
+}
+
+void SnappyWriteBuffer::finish()
+{
+    if (finished)
+        return;
+
+    try
+    {
+        finishImpl();
+        out->finalize();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void SnappyWriteBuffer::finishImpl()
+{
+    next();
+
+    bool success = snappy::Compress(uncompress_buffer.data(), uncompress_buffer.size(), &compress_buffer);
+    if (!success)
+    {
+        throw Exception("snappy compress failed: ", ErrorCodes::SNAPPY_COMPRESS_FAILED);
+    }
+
+    char * in_data = compress_buffer.data();
+    size_t in_available = compress_buffer.size();
+    char * out_data = nullptr;
+    size_t out_capacity = 0;
+    size_t len = 0;
+    while (in_available > 0)
+    {
+        out->nextIfAtEnd();
+        out_data = out->position();
+        out_capacity = out->buffer().end() - out->position();
+        len = in_available > out_capacity ? out_capacity : in_available;
+
+        memcpy(out_data, in_data, len);
+        in_data += len;
+        in_available -= len;
+        out->position() += len;
+    }
+}
+
+}
+
+#endif
+
--- a/src/IO/SnappyWriteBuffer.h
+++ b/src/IO/SnappyWriteBuffer.h
@ -0,0 +1,41 @@
+#pragma once
+
+#include <Common/config.h>
+
+#if USE_SNAPPY
+#include <IO/BufferWithOwnMemory.h>
+#include <IO/WriteBuffer.h>
+
+namespace DB
+{
+/// Performs compression using snappy library and write compressed data to the underlying buffer.
+class SnappyWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
+{
+public:
+    explicit SnappyWriteBuffer(
+        std::unique_ptr<WriteBuffer> out_,
+        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+        char * existing_memory = nullptr,
+        size_t alignment = 0);
+
+    ~SnappyWriteBuffer() override;
+
+    void finalizeImpl() override { finish(); }
+
+private:
+    void nextImpl() override;
+
+    void finishImpl();
+    void finish();
+
+    std::unique_ptr<WriteBuffer> out;
+    bool finished = false;
+
+    String uncompress_buffer;
+    String compress_buffer;
+};
+
+}
+
+#endif
+
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 809a77d435ce218d9b000733f19489c606fc567b`