diff --git a/.github/actionlint.yml b/.github/actionlint.yml new file mode 100644 index 00000000000..8083186117f --- /dev/null +++ b/.github/actionlint.yml @@ -0,0 +1,8 @@ +self-hosted-runner: + labels: + - builder + - fuzzer-unit-tester + - stress-tester + - style-checker + - func-tester-aarch64 + - func-tester diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml index 05cfc6a9405..600cde33f90 100644 --- a/.github/workflows/backport.yml +++ b/.github/workflows/backport.yml @@ -33,11 +33,11 @@ jobs: - name: Cherry pick run: | sudo pip install GitPython - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 cherry_pick.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index a9e472ab09c..749c248af82 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -9,23 +9,68 @@ on: # yamllint disable-line rule:truthy branches: - 'backport/**' jobs: - DockerHubPush: - runs-on: [self-hosted, style-checker] + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Images check run: | - cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json CompatibilityCheck: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -39,7 +84,7 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Download json reports @@ -48,16 +93,16 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 compatibility_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### @@ -82,7 +127,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -90,10 +135,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -103,9 +148,50 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderDebAarch64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=package_aarch64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -127,7 +213,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -135,10 +221,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -148,9 +234,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebTsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -172,7 +258,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -180,10 +266,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -193,9 +279,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebDebug: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -217,7 +303,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -225,10 +311,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -238,15 +324,16 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: needs: - BuilderDebRelease + - BuilderDebAarch64 - BuilderDebAsan - BuilderDebTsan - BuilderDebDebug @@ -265,21 +352,21 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Report Builder run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cd $GITHUB_WORKSPACE/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cd "$GITHUB_WORKSPACE/tests/ci" python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ########################### FUNCTIONAl STATELESS TESTS ####################################### ############################################################################################## @@ -302,22 +389,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################################################################################## @@ -340,22 +427,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ######################################### STRESS TESTS ####################################### ############################################################################################## @@ -381,22 +468,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# @@ -418,22 +505,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FinishCheck: needs: - DockerHubPush @@ -447,10 +534,10 @@ jobs: steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Finish label run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml index 857cbf2c495..cb06d853219 100644 --- a/.github/workflows/cancel.yml +++ b/.github/workflows/cancel.yml @@ -6,7 +6,7 @@ env: on: # yamllint disable-line rule:truthy workflow_run: - workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"] + workflows: ["PullRequestCI", "ReleaseCI", "DocsCheck", "BackportPR"] types: - requested jobs: diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 23c0840d379..633e654d656 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -21,31 +21,77 @@ jobs: steps: - name: Clear repository run: | - sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Labels check run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 run_check.py - DockerHubPush: + DockerHubPushAarch64: + needs: CheckLabels + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | - sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Images check run: | - cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json DocsCheck: needs: DockerHubPush runs-on: [self-hosted, func-tester] @@ -63,17 +109,17 @@ jobs: path: ${{ env.TEMP_PATH }} - name: Clear repository run: | - sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Docs Check run: | - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 docs_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 1b01b4d5074..624b98ba465 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -8,7 +8,7 @@ on: # yamllint disable-line rule:truthy schedule: - cron: '0 */6 * * *' workflow_run: - workflows: ["CIGithubActions"] + workflows: ["PullRequestCI"] types: - completed workflow_dispatch: @@ -24,21 +24,21 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: fetch-depth: 0 - name: Jepsen Test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 keeper_jepsen_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c2ed39224aa..3970e64f959 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -9,23 +9,68 @@ on: # yamllint disable-line rule:truthy branches: - 'master' jobs: - DockerHubPush: - runs-on: [self-hosted, style-checker] + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Images check run: | - cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json StyleCheck: needs: DockerHubPush runs-on: [self-hosted, style-checker] @@ -42,19 +87,19 @@ jobs: path: ${{ env.TEMP_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Style Check run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 style_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" CompatibilityCheck: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -68,7 +113,7 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Download json reports @@ -77,16 +122,16 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 compatibility_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" SplitBuildSmokeTest: needs: [BuilderDebSplitted] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -101,7 +146,7 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Download json reports @@ -110,16 +155,16 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Split build check run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 split_build_smoke_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 split_build_smoke_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### @@ -144,7 +189,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -152,10 +197,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -165,9 +210,50 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + BuilderDebAarch64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=package_aarch64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderPerformance: needs: DockerHubPush runs-on: [self-hosted, builder] @@ -189,7 +275,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -197,10 +283,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -210,9 +296,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -235,7 +321,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -243,10 +329,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -256,9 +342,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -280,7 +366,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -288,10 +374,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -301,9 +387,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebUBsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -325,7 +411,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -333,10 +419,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -346,9 +432,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebTsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -370,7 +456,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -378,10 +464,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -391,9 +477,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebMsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -415,7 +501,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -423,10 +509,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -436,9 +522,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebDebug: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -460,7 +546,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -468,10 +554,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -481,9 +567,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ########################################################################################## ##################################### SPECIAL BUILDS ##################################### ########################################################################################## @@ -509,7 +595,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -517,10 +603,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -530,9 +616,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinTidy: needs: [DockerHubPush] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -555,7 +641,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -563,10 +649,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -576,9 +662,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwin: needs: [DockerHubPush] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -601,7 +687,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -609,10 +695,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -622,9 +708,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinAarch64: needs: [DockerHubPush] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -647,7 +733,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -655,10 +741,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -668,9 +754,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinFreeBSD: needs: [DockerHubPush] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -693,7 +779,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -701,10 +787,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -714,9 +800,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwinAarch64: needs: [DockerHubPush] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -739,7 +825,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -747,10 +833,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -760,9 +846,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinPPC64: needs: [DockerHubPush] if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} @@ -785,7 +871,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -793,10 +879,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -806,15 +892,16 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: needs: - BuilderDebRelease + - BuilderDebAarch64 - BuilderBinRelease - BuilderDebAsan - BuilderDebTsan @@ -836,21 +923,21 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Report Builder run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cd $GITHUB_WORKSPACE/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cd "$GITHUB_WORKSPACE/tests/ci" python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" BuilderSpecialReport: needs: - BuilderBinTidy @@ -875,21 +962,21 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Report Builder run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cd $GITHUB_WORKSPACE/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cd "$GITHUB_WORKSPACE/tests/ci" python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ########################### FUNCTIONAl STATELESS TESTS ####################################### ############################################################################################## @@ -912,22 +999,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestReleaseDatabaseOrdinary: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -947,22 +1034,57 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -984,22 +1106,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAsan1: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1021,22 +1143,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1058,22 +1180,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan1: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1095,22 +1217,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan2: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1132,22 +1254,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] @@ -1167,22 +1289,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1204,22 +1326,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan1: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1241,22 +1363,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan2: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1278,22 +1400,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1315,22 +1437,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug1: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1352,22 +1474,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug2: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1389,22 +1511,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################################################################################## @@ -1427,22 +1549,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestReleaseDatabaseOrdinary: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -1462,22 +1584,57 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatefulTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateful_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateful tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse + KILL_TIMEOUT=3600 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1497,22 +1654,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1532,22 +1689,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1567,22 +1724,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] @@ -1602,22 +1759,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1637,22 +1794,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ######################################### STRESS TESTS ####################################### ############################################################################################## @@ -1674,22 +1831,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestTsan: needs: [BuilderDebTsan] # func testers have 16 cores + 128 GB memory @@ -1712,22 +1869,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, stress-tester] @@ -1746,22 +1903,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, stress-tester] @@ -1780,22 +1937,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, stress-tester] @@ -1814,22 +1971,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# @@ -1853,22 +2010,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsAsan1: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -1889,22 +2046,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsAsan2: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -1925,22 +2082,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -1961,22 +2118,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan1: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -1997,22 +2154,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan2: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -2033,22 +2190,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan3: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -2069,22 +2226,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsRelease0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -2105,22 +2262,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsRelease1: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -2141,22 +2298,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ##################################### AST FUZZERS ############################################ ############################################################################################## @@ -2178,22 +2335,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2212,22 +2369,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestUBSan: needs: [BuilderDebUBsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2246,22 +2403,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestMSan: needs: [BuilderDebMsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2280,22 +2437,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2314,22 +2471,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# #################################### UNIT TESTS ############################################# ############################################################################################# @@ -2351,22 +2508,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsReleaseClang: needs: [BuilderBinRelease] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2385,22 +2542,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2419,22 +2576,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2453,22 +2610,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2487,22 +2644,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# #################################### PERFORMANCE TESTS ###################################### ############################################################################################# @@ -2526,22 +2683,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" PerformanceComparison1: needs: [BuilderPerformance] runs-on: [self-hosted, stress-tester] @@ -2562,22 +2719,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" PerformanceComparison2: needs: [BuilderPerformance] runs-on: [self-hosted, stress-tester] @@ -2598,22 +2755,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" PerformanceComparison3: needs: [BuilderPerformance] runs-on: [self-hosted, stress-tester] @@ -2634,22 +2791,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FinishCheck: needs: - DockerHubPush @@ -2659,6 +2816,7 @@ jobs: - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseOrdinary + - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 - FunctionalStatelessTestTsan0 @@ -2671,6 +2829,7 @@ jobs: - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease - FunctionalStatefulTestReleaseDatabaseOrdinary + - FunctionalStatefulTestAarch64 - FunctionalStatefulTestAsan - FunctionalStatefulTestTsan - FunctionalStatefulTestMsan @@ -2709,10 +2868,10 @@ jobs: steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Finish label run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py diff --git a/.github/workflows/main.yml b/.github/workflows/pull_request.yml similarity index 69% rename from .github/workflows/main.yml rename to .github/workflows/pull_request.yml index c42513ff9a8..0bd02de48d0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/pull_request.yml @@ -1,4 +1,4 @@ -name: CIGithubActions +name: PullRequestCI env: # Force the stdout and stderr streams to be unbuffered @@ -24,31 +24,77 @@ jobs: steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Labels check run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 run_check.py - DockerHubPush: + DockerHubPushAarch64: + needs: CheckLabels + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Images check run: | - cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json StyleCheck: needs: DockerHubPush runs-on: [self-hosted, style-checker] @@ -65,19 +111,19 @@ jobs: path: ${{ env.TEMP_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Style Check run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 style_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FastTest: needs: DockerHubPush runs-on: [self-hosted, builder] @@ -91,21 +137,21 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fast Test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 fast_test_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 fast_test_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" PVSCheck: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, func-tester] @@ -118,23 +164,23 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: submodules: 'true' - name: PVS Check run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 pvs_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 pvs_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" CompatibilityCheck: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -148,7 +194,7 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Download json reports @@ -157,16 +203,16 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 compatibility_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" SplitBuildSmokeTest: needs: [BuilderDebSplitted] runs-on: [self-hosted, style-checker] @@ -180,7 +226,7 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Download json reports @@ -189,16 +235,16 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Split build check run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 split_build_smoke_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 split_build_smoke_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### @@ -223,7 +269,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -231,10 +277,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -244,9 +290,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" BuilderPerformance: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -268,7 +314,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -276,10 +322,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -289,9 +335,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -313,7 +359,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -321,10 +367,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -334,9 +380,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAarch64: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -363,10 +409,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -376,9 +422,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAsan: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -400,7 +446,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -408,10 +454,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -421,9 +467,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebUBsan: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -445,7 +491,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -453,10 +499,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -466,9 +512,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebTsan: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -490,7 +536,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -498,10 +544,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -511,9 +557,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebMsan: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -535,7 +581,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -543,10 +589,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -556,9 +602,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebDebug: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -580,7 +626,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -588,10 +634,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -601,9 +647,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ########################################################################################## ##################################### SPECIAL BUILDS ##################################### ########################################################################################## @@ -628,7 +674,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -636,10 +682,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -649,9 +695,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinTidy: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -673,7 +719,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -681,10 +727,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -694,9 +740,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwin: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -718,7 +764,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -726,10 +772,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -739,9 +785,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinAarch64: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -763,7 +809,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -771,10 +817,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -784,9 +830,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinFreeBSD: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -808,7 +854,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -816,10 +862,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -829,9 +875,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwinAarch64: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -853,7 +899,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -861,10 +907,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -874,9 +920,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinPPC64: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -898,7 +944,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -906,10 +952,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -919,17 +965,17 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: needs: - BuilderDebRelease - - BuilderBinRelease - BuilderDebAarch64 + - BuilderBinRelease - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -951,21 +997,21 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Report Builder run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cd $GITHUB_WORKSPACE/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cd "$GITHUB_WORKSPACE/tests/ci" python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" BuilderSpecialReport: needs: - BuilderDebSplitted @@ -991,21 +1037,21 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Report Builder run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cd $GITHUB_WORKSPACE/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cd "$GITHUB_WORKSPACE/tests/ci" python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ########################### FUNCTIONAl STATELESS TESTS ####################################### ############################################################################################## @@ -1028,22 +1074,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestReleaseDatabaseReplicated0: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -1065,22 +1111,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestReleaseDatabaseReplicated1: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -1102,22 +1148,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestReleaseWideParts: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -1137,22 +1183,57 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1174,22 +1255,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAsan1: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1211,22 +1292,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1248,22 +1329,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan1: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1285,22 +1366,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan2: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1322,22 +1403,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] @@ -1357,22 +1438,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1394,22 +1475,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan1: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1431,22 +1512,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan2: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1468,22 +1549,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1505,22 +1586,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug1: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1542,22 +1623,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug2: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1579,22 +1660,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestFlakyCheck: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1614,22 +1695,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################################################################################## @@ -1652,22 +1733,57 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatefulTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateful_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateful tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse + KILL_TIMEOUT=3600 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1687,22 +1803,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -1722,22 +1838,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -1757,22 +1873,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] @@ -1792,22 +1908,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1827,22 +1943,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ######################################### STRESS TESTS ####################################### ############################################################################################## @@ -1864,22 +1980,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestTsan: needs: [BuilderDebTsan] # func testers have 16 cores + 128 GB memory @@ -1902,22 +2018,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, stress-tester] @@ -1936,22 +2052,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, stress-tester] @@ -1970,22 +2086,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, stress-tester] @@ -2004,22 +2120,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ##################################### AST FUZZERS ############################################ ############################################################################################## @@ -2041,22 +2157,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2075,22 +2191,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestUBSan: needs: [BuilderDebUBsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2109,22 +2225,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestMSan: needs: [BuilderDebMsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2143,22 +2259,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ASTFuzzerTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2177,22 +2293,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Fuzzer run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 ast_fuzzer_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# @@ -2216,22 +2332,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsAsan1: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -2252,22 +2368,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsAsan2: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -2288,22 +2404,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -2324,22 +2440,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan1: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -2360,22 +2476,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan2: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -2396,22 +2512,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan3: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -2432,22 +2548,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsRelease0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -2468,22 +2584,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsRelease1: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -2504,22 +2620,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsFlakyCheck: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -2538,22 +2654,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# #################################### UNIT TESTS ############################################# ############################################################################################# @@ -2575,22 +2691,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsReleaseClang: needs: [BuilderBinRelease] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2609,22 +2725,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2643,22 +2759,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2677,22 +2793,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2711,22 +2827,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Unit test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 unit_tests_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# #################################### PERFORMANCE TESTS ###################################### ############################################################################################# @@ -2750,22 +2866,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" PerformanceComparison1: needs: [BuilderPerformance] runs-on: [self-hosted, stress-tester] @@ -2786,22 +2902,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" PerformanceComparison2: needs: [BuilderPerformance] runs-on: [self-hosted, stress-tester] @@ -2822,22 +2938,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" PerformanceComparison3: needs: [BuilderPerformance] runs-on: [self-hosted, stress-tester] @@ -2858,22 +2974,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Performance Comparison run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 performance_comparison_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FinishCheck: needs: - StyleCheck @@ -2888,6 +3004,7 @@ jobs: - FunctionalStatelessTestReleaseDatabaseReplicated0 - FunctionalStatelessTestReleaseDatabaseReplicated1 - FunctionalStatelessTestReleaseWideParts + - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 - FunctionalStatelessTestTsan0 @@ -2899,6 +3016,7 @@ jobs: - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease + - FunctionalStatefulTestAarch64 - FunctionalStatefulTestAsan - FunctionalStatefulTestTsan - FunctionalStatefulTestMsan @@ -2939,10 +3057,10 @@ jobs: steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Finish label run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1212bddb4a5..54e1f27ab7c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,23 +19,68 @@ on: # yamllint disable-line rule:truthy - '.github/**' workflow_dispatch: jobs: - DockerHubPush: - runs-on: [self-hosted, style-checker] + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Images check run: | - cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json DocsRelease: needs: DockerHubPush runs-on: [self-hosted, func-tester] @@ -53,7 +98,7 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Download changed images @@ -63,14 +108,14 @@ jobs: path: ${{ env.TEMP_PATH }} - name: Docs Release run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 docs_release.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 68a3554741d..dc60c9df3a6 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -12,23 +12,68 @@ on: # yamllint disable-line rule:truthy - '23.[1-9][1-9]' - '24.[1-9][1-9]' jobs: - DockerHubPush: - runs-on: [self-hosted, style-checker] + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Images check run: | - cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json CompatibilityCheck: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -42,7 +87,7 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Download json reports @@ -51,16 +96,16 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 compatibility_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### @@ -85,7 +130,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -93,10 +138,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -106,9 +151,50 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderDebAarch64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=package_aarch64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -130,7 +216,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -138,10 +224,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -151,9 +237,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebUBsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -175,7 +261,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -183,10 +269,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -196,9 +282,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebTsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -220,7 +306,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -228,10 +314,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -241,9 +327,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebMsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -265,7 +351,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -273,10 +359,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -286,9 +372,9 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebDebug: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -310,7 +396,7 @@ jobs: path: ${{ env.IMAGES_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: @@ -318,10 +404,10 @@ jobs: fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -331,15 +417,16 @@ jobs: - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH $CACHES_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: needs: - BuilderDebRelease + - BuilderDebAarch64 - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -360,21 +447,21 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Report Builder run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cd $GITHUB_WORKSPACE/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cd "$GITHUB_WORKSPACE/tests/ci" python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ########################### FUNCTIONAl STATELESS TESTS ####################################### ############################################################################################## @@ -397,22 +484,57 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -434,22 +556,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAsan1: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -471,22 +593,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -508,22 +630,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan1: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -545,22 +667,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestTsan2: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -582,22 +704,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] @@ -617,22 +739,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -654,22 +776,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan1: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -691,22 +813,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestMsan2: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -728,22 +850,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -765,22 +887,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug1: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -802,22 +924,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestDebug2: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -839,22 +961,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################################################################################## @@ -877,22 +999,57 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatefulTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateful_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateful tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse + KILL_TIMEOUT=3600 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -912,22 +1069,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] @@ -947,22 +1104,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] @@ -982,22 +1139,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] @@ -1017,22 +1174,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatefulTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] @@ -1052,22 +1209,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Functional test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ######################################### STRESS TESTS ####################################### ############################################################################################## @@ -1089,22 +1246,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestTsan: needs: [BuilderDebTsan] # func testers have 16 cores + 128 GB memory @@ -1127,22 +1284,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, stress-tester] @@ -1161,22 +1318,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestUBsan: needs: [BuilderDebUBsan] runs-on: [self-hosted, stress-tester] @@ -1195,22 +1352,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" StressTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, stress-tester] @@ -1229,22 +1386,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Stress test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 stress_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# @@ -1268,22 +1425,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsAsan1: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -1304,22 +1461,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsAsan2: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -1340,22 +1497,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -1376,22 +1533,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan1: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -1412,22 +1569,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan2: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -1448,22 +1605,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan3: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -1484,22 +1641,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsRelease0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -1520,22 +1677,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsRelease1: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -1556,22 +1713,22 @@ jobs: path: ${{ env.REPORTS_PATH }} - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Integration test run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" python3 integration_test_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FinishCheck: needs: - DockerHubPush @@ -1580,6 +1737,7 @@ jobs: - FunctionalStatelessTestDebug1 - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease + - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 - FunctionalStatelessTestTsan0 @@ -1591,6 +1749,7 @@ jobs: - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease + - FunctionalStatefulTestAarch64 - FunctionalStatefulTestAsan - FunctionalStatefulTestTsan - FunctionalStatefulTestMsan @@ -1614,10 +1773,10 @@ jobs: steps: - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 - name: Finish label run: | - cd $GITHUB_WORKSPACE/tests/ci + cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml index f3cd7ab6245..c773aee9c7a 100644 --- a/.github/workflows/woboq.yml +++ b/.github/workflows/woboq.yml @@ -23,20 +23,20 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 with: submodules: 'true' - name: Codebrowser run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 codebrowser_check.py + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 codebrowser_check.py - name: Cleanup if: always() run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.gitmodules b/.gitmodules index 5321712f1f1..04d32f4af40 100644 --- a/.gitmodules +++ b/.gitmodules @@ -247,6 +247,9 @@ [submodule "contrib/sysroot"] path = contrib/sysroot url = https://github.com/ClickHouse-Extras/sysroot.git +[submodule "contrib/hive-metastore"] + path = contrib/hive-metastore + url = https://github.com/ClickHouse-Extras/hive-metastore [submodule "contrib/azure"] path = contrib/azure url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 9027cfb117a..87860deea9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,2045 +1,179 @@ -### ClickHouse release v21.12, 2021-12-15 - -#### Backward Incompatible Change - -* *A fix for a feature that previously had unwanted behaviour.* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). -* *A slight change in behaviour of a new function.* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). -* *Setting rename.* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). -* *Further deprecation of already unused code.* This is relevant only for users of ClickHouse versions older than 20.6. A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). - -#### New Feature - -* Implemented more of the ZooKeeper Four Letter Words commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). Now `clickhouse-keeper` is feature complete. -* Support for `Bool` data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)). -* Support for `PARTITION BY` in File, URL, HDFS storages and with `INSERT INTO` table function. Closes [#30273](https://github.com/ClickHouse/ClickHouse/issues/30273). [#30690](https://github.com/ClickHouse/ClickHouse/pull/30690) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added `CONSTRAINT ... ASSUME ...` (without checking during `INSERT`). Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization. Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later). Added ability to replace heavy columns with light columns if it's possible. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)). -* Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)). -* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)). -* *TLDR: Major improvements of completeness and consistency of text formats.* Refactor formats `TSV`, `TSVRaw`, `CSV` and `JSONCompactEachRow`, `JSONCompactStringsEachRow`, remove code duplication, add base interface for formats with `-WithNames` and `-WithNamesAndTypes` suffixes. Add formats `CSVWithNamesAndTypes`, `TSVRawWithNames`, `TSVRawWithNamesAndTypes`, `JSONCompactEachRowWIthNames`, `JSONCompactStringsEachRowWIthNames`, `RowBinaryWithNames`. Support parallel parsing for formats `TSVWithNamesAndTypes`, `TSVRaw(WithNames/WIthNamesAndTypes)`, `CSVWithNamesAndTypes`, `JSONCompactEachRow(WithNames/WIthNamesAndTypes)`, `JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes)`. Support columns mapping and types checking for `RowBinaryWithNamesAndTypes` format. Add setting `input_format_with_types_use_header` which specify if we should check that types written in `WIthNamesAndTypes` format matches with table structure. Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should not control `csv_empty_as_default`). Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields). Fix Nullable input/output in `TSVRaw` format, make this format fully compatible with inserting into TSV. Fix inserting NULLs in `LowCardinality(Nullable)` when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). Fix strings deserialization in `JSONStringsEachRow`/`JSONCompactStringsEachRow` formats (strings were parsed just until first '\n' or '\t'). Add ability to use `Raw` escaping rule in Template input format. Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. Fix bug with parallel parsing of `-WithNames` formats in case when setting `min_chunk_bytes_for_parallel_parsing` is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). Allow to print/parse names and types of colums in `CustomSeparated` input/output format. Add formats `CustomSeparatedWithNames/WithNamesAndTypes` similar to `TSVWithNames/WithNamesAndTypes`. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)). -* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)). -* Exposes all settings of the global thread pool in the configuration file. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)). -* Introduced window functions `exponentialTimeDecayedSum`, `exponentialTimeDecayedMax`, `exponentialTimeDecayedCount` and `exponentialTimeDecayedAvg` which are more effective than `exponentialMovingAverage` for bigger windows. Also more use-cases were covered. [#29799](https://github.com/ClickHouse/ClickHouse/pull/29799) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Add option to compress logs before writing them to a file using LZ4. Closes [#23860](https://github.com/ClickHouse/ClickHouse/issues/23860). [#29219](https://github.com/ClickHouse/ClickHouse/pull/29219) ([Nikolay Degterinsky](https://github.com/evillique)). -* Support `JOIN ON 1 = 1` that have CROSS JOIN semantic. This closes [#25578](https://github.com/ClickHouse/ClickHouse/issues/25578). [#25894](https://github.com/ClickHouse/ClickHouse/pull/25894) ([Vladimir C](https://github.com/vdimir)). -* Add Map combinator for `Map` type. - Rename old `sum-, min-, max- Map` for mapped arrays to `sum-, min-, max- MappedArrays`. [#24539](https://github.com/ClickHouse/ClickHouse/pull/24539) ([Ildus Kurbangaliev](https://github.com/ildus)). -* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Experimental Feature - -* `WINDOW VIEW` to enable stream processing in ClickHouse. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)). -* Drop support for using Ordinary databases with `MaterializedMySQL`. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)). -* Implement the commands BACKUP and RESTORE for the Log family. This feature is under development. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)). - -#### Performance Improvement - -* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)). -* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)). -* Speed up count over nullable columns. [#31806](https://github.com/ClickHouse/ClickHouse/pull/31806) ([Raúl Marín](https://github.com/Algunenano)). -* Speed up `avg` and `sumCount` aggregate functions. [#31694](https://github.com/ClickHouse/ClickHouse/pull/31694) ([Raúl Marín](https://github.com/Algunenano)). -* Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance of syncing data to block device. This closes [#31181](https://github.com/ClickHouse/ClickHouse/issues/31181). [#31229](https://github.com/ClickHouse/ClickHouse/pull/31229) ([zhanglistar](https://github.com/zhanglistar)). -* Fixing query performance issue in `LiveView` tables. Fixes [#30831](https://github.com/ClickHouse/ClickHouse/issues/30831). [#31006](https://github.com/ClickHouse/ClickHouse/pull/31006) ([vzakaznikov](https://github.com/vzakaznikov)). -* Speed up query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)). -* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional `rule_type` field). [#25122](https://github.com/ClickHouse/ClickHouse/pull/25122) ([Michail Safronov](https://github.com/msaf1980)). -* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)). -* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)). -* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)). -* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)). - -#### Improvement - -* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([tavplubix](https://github.com/tavplubix)). -* Allow versioning of aggregate function states. Now we can introduce backward compatible changes in serialization format of aggregate function states. Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support PostgreSQL style `ALTER MODIFY COLUMN` syntax. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)). -* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)). -* The `murmurHash3_128` and `sipHash128` functions now accept an arbitrary number of arguments. This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)). -* Support default expression for `HDFS` storage and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)). -* Improve the operation name of an opentelemetry span. [#32234](https://github.com/ClickHouse/ClickHouse/pull/32234) ([Frank Chen](https://github.com/FrankChen021)). -* Use `Content-Type: application/x-ndjson` (http://ndjson.org/) for output format `JSONEachRow`. [#32223](https://github.com/ClickHouse/ClickHouse/pull/32223) ([Dmitriy Dorofeev](https://github.com/deem0n)). -* Improve skipping unknown fields with quoted escaping rule in Template/CustomSeparated formats. Previously you could skip only quoted strings, now you can skip values with any type. [#32204](https://github.com/ClickHouse/ClickHouse/pull/32204) ([Kruglov Pavel](https://github.com/Avogar)). -* Now `clickhouse-keeper` refuses to start or apply configuration changes when they contain duplicated IDs or endpoints. Fixes [#31339](https://github.com/ClickHouse/ClickHouse/issues/31339). [#32121](https://github.com/ClickHouse/ClickHouse/pull/32121) ([alesapin](https://github.com/alesapin)). -* Set Content-Type in HTTP packets issued from URL engine. [#32113](https://github.com/ClickHouse/ClickHouse/pull/32113) ([Frank Chen](https://github.com/FrankChen021)). -* Return Content-Type as 'application/json' for `JSONEachRow` format if `output_format_json_array_of_rows` is enabled. [#32112](https://github.com/ClickHouse/ClickHouse/pull/32112) ([Frank Chen](https://github.com/FrankChen021)). -* Allow to parse `+` before `Float32`/`Float64` values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow a user configured `hdfs_replication` parameter for `DiskHDFS` and `StorageHDFS`. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)). -* Added ClickHouse `exception` and `exception_code` fields to opentelemetry span log. [#32040](https://github.com/ClickHouse/ClickHouse/pull/32040) ([Frank Chen](https://github.com/FrankChen021)). -* Improve opentelemetry span log duration - it was is zero at the query level if there is a query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)). -* Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Recreate `system.*_log` tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)). -* `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)). -* ClickHouse dictionary source: support predefined connections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow to use predefined connections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)) (author: Amos Bird). -* Add key bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)). -* Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)). -* Better exception message when `users.xml` cannot be loaded due to bad password hash. This closes [#24126](https://github.com/ClickHouse/ClickHouse/issues/24126). [#31557](https://github.com/ClickHouse/ClickHouse/pull/31557) ([Vitaly Baranov](https://github.com/vitlibar)). -* Use shard and replica name from `Replicated` database arguments when expanding macros in `ReplicatedMergeTree` arguments if these macros are not defined in config. Closes [#31471](https://github.com/ClickHouse/ClickHouse/issues/31471). [#31488](https://github.com/ClickHouse/ClickHouse/pull/31488) ([tavplubix](https://github.com/tavplubix)). -* Better analysis for `min/max/count` projection. Now, with enabled `allow_experimental_projection_optimization`, virtual `min/max/count` projection can be used together with columns from partition key. [#31474](https://github.com/ClickHouse/ClickHouse/pull/31474) ([Amos Bird](https://github.com/amosbird)). -* Add `--pager` support for `clickhouse-local`. [#31457](https://github.com/ClickHouse/ClickHouse/pull/31457) ([Azat Khuzhin](https://github.com/azat)). -* Fix waiting of the editor during interactive query edition (`waitpid()` returns -1 on `SIGWINCH` and `EDITOR` and `clickhouse-local`/`clickhouse-client` works concurrently). [#31456](https://github.com/ClickHouse/ClickHouse/pull/31456) ([Azat Khuzhin](https://github.com/azat)). -* Throw an exception if there is some garbage after field in `JSONCompactStrings(EachRow)` format. [#31455](https://github.com/ClickHouse/ClickHouse/pull/31455) ([Kruglov Pavel](https://github.com/Avogar)). -* Default value of `http_send_timeout` and `http_receive_timeout` settings changed from 1800 (30 minutes) to 180 (3 minutes). [#31450](https://github.com/ClickHouse/ClickHouse/pull/31450) ([tavplubix](https://github.com/tavplubix)). -* `MaterializedMySQL` now handles `CREATE TABLE ... LIKE ...` DDL queries. [#31410](https://github.com/ClickHouse/ClickHouse/pull/31410) ([Stig Bakken](https://github.com/stigsb)). -* Return artificial create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)). -* Previously progress was shown only for `numbers` table function. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Initial user's roles are used now to find row policies, see [#31080](https://github.com/ClickHouse/ClickHouse/issues/31080). [#31262](https://github.com/ClickHouse/ClickHouse/pull/31262) ([Vitaly Baranov](https://github.com/vitlibar)). -* If some obsolete setting is changed - show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([tavplubix](https://github.com/tavplubix)). -* Improved backoff for background cleanup tasks in `MergeTree`. Settings `merge_tree_clear_old_temporary_directories_interval_seconds` and `merge_tree_clear_old_parts_interval_seconds` moved from users settings to merge tree settings. [#31180](https://github.com/ClickHouse/ClickHouse/pull/31180) ([tavplubix](https://github.com/tavplubix)). -* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). This makes `--hardware_utilization` option in `clickhouse-client` usable. -* Enable multiline editing in clickhouse-client by default. This addresses [#31121](https://github.com/ClickHouse/ClickHouse/issues/31121) . [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123) ([Amos Bird](https://github.com/amosbird)). -* Function name normalization for `ALTER` queries. This helps avoid metadata mismatch between creating table with indices/projections and adding indices/projections via alter commands. This is a follow-up PR of https://github.com/ClickHouse/ClickHouse/pull/20174. Mark as improvements as there are no bug reports and the senario is somehow rare. [#31095](https://github.com/ClickHouse/ClickHouse/pull/31095) ([Amos Bird](https://github.com/amosbird)). -* Support `IF EXISTS` modifier for `RENAME DATABASE`/`TABLE`/`DICTIONARY` query. If this directive is used, one will not get an error if the DATABASE/TABLE/DICTIONARY to be renamed doesn't exist. [#31081](https://github.com/ClickHouse/ClickHouse/pull/31081) ([victorgao](https://github.com/kafka1991)). -* Cancel vertical merges when partition is dropped. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/25684 and https://github.com/ClickHouse/ClickHouse/pull/30996. [#31057](https://github.com/ClickHouse/ClickHouse/pull/31057) ([Amos Bird](https://github.com/amosbird)). -* The local session inside a Clickhouse dictionary source won't send its events to the session log anymore. This fixes a possible deadlock (tsan alert) on shutdown. Also this PR fixes flaky `test_dictionaries_dependency_xml/`. [#31013](https://github.com/ClickHouse/ClickHouse/pull/31013) ([Vitaly Baranov](https://github.com/vitlibar)). -* Less locking in ALTER command. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)). -* Fix `--verbose` option in clickhouse-local interactive mode and allow logging into file. [#30881](https://github.com/ClickHouse/ClickHouse/pull/30881) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added `\l`, `\d`, `\c` commands in `clickhouse-client` like in MySQL and PostgreSQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)). -* For clickhouse-local or clickhouse-client: if there is `--interactive` option with `--query` or `--queries-file`, then first execute them like in non-interactive and then start interactive mode. [#30851](https://github.com/ClickHouse/ClickHouse/pull/30851) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error (if DROP fails during removing znodes from zookeeper). [#30826](https://github.com/ClickHouse/ClickHouse/pull/30826) ([Azat Khuzhin](https://github.com/azat)). -* Avro format works against Kafka. Setting `output_format_avro_rows_in_file` added. [#30351](https://github.com/ClickHouse/ClickHouse/pull/30351) ([Ilya Golshtein](https://github.com/ilejn)). -* Allow to specify one or any number of PostgreSQL schemas for one `MaterializedPostgreSQL` database. Closes [#28901](https://github.com/ClickHouse/ClickHouse/issues/28901). Closes [#29324](https://github.com/ClickHouse/ClickHouse/issues/29324). [#28933](https://github.com/ClickHouse/ClickHouse/pull/28933) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)). -* Implement function transform with Decimal arguments. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([李帅](https://github.com/loneylee)). -* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Bug Fixes - -* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). -* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). -* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). -* Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). -* Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). -* Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([tavplubix](https://github.com/tavplubix)). -* Server might fail to start with `Cannot attach 1 tables due to cyclic dependencies` error if `Dictionary` table looks at XML-dictionary with the same name, it's fixed. Fixes [#31315](https://github.com/ClickHouse/ClickHouse/issues/31315). [#32288](https://github.com/ClickHouse/ClickHouse/pull/32288) ([tavplubix](https://github.com/tavplubix)). -* Fix parsing error while NaN deserializing for `Nullable(Float)` for `Quoted` escaping rule. [#32190](https://github.com/ClickHouse/ClickHouse/pull/32190) ([Kruglov Pavel](https://github.com/Avogar)). -* XML dictionaries: identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). -* Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([tavplubix](https://github.com/tavplubix)). -* Dictionaries: fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)). -* Fix CREATE TABLE of Join Storage in some obscure cases. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)). -* Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([tavplubix](https://github.com/tavplubix)). -* `MaterializedMySQL` (experimental feature): Fix misinterpretation of `DECIMAL` data from MySQL. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)). -* `FileLog` (experimental feature) engine unnesessary created meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). -* Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([tavplubix](https://github.com/tavplubix)). -* Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). -* Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler`. Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). -* Fix invalid cast of Nullable type when nullable primary key is used. (Nullable primary key is a discouraged feature - please do not use). This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). -* Fix crash in recursive UDF in SQL. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crash with empty result of ODBC query (with some ODBC drivers). Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix disabling query profiler (In case of `query_profiler_real_time_period_ns>0`/`query_profiler_cpu_time_period_ns>0` query profiler can stayed enabled even after query finished). [#31740](https://github.com/ClickHouse/ClickHouse/pull/31740) ([Azat Khuzhin](https://github.com/azat)). -* Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([tavplubix](https://github.com/tavplubix)). -* Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([tavplubix](https://github.com/tavplubix)). -* Fix exception on some of the applications of `decrypt` function on Nullable columns. This closes [#31662](https://github.com/ClickHouse/ClickHouse/issues/31662). This closes [#31426](https://github.com/ClickHouse/ClickHouse/issues/31426). [#31707](https://github.com/ClickHouse/ClickHouse/pull/31707) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed function ngrams when string contains UTF-8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). -* Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([tavplubix](https://github.com/tavplubix)). -* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([tavplubix](https://github.com/tavplubix)). -* Implement `sparkbar` aggregate function as it was intended, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). -* Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). -* Fix progress for short `INSERT SELECT` queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)). -* Fix wrong behavior with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Remove `notLike` function from index analysis, because it was wrong. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). -* Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). -* Rewrite right distributed table in local join. solves [#25809](https://github.com/ClickHouse/ClickHouse/issues/25809). [#31105](https://github.com/ClickHouse/ClickHouse/pull/31105) ([abel-cheng](https://github.com/abel-cheng)). -* Fix `Merge` table with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix JSON_VALUE/JSON_QUERY with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Using `formatRow` function with not row-oriented formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Skip `max_partition_size_to_drop check` in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). -* Fix some corner cases with `INTERSECT` and `EXCEPT` operators. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Build/Testing/Packaging Improvement - -* Fix incorrect filtering result on non-x86 builds. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([alexey-milovidov](https://github.com/alexey-milovidov)). Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Use our own CMakeLists for `zlib-ng`, `cassandra`, `mariadb-connector-c` and `xz`, `re2`, `sentry`, `gsasl`, `arrow`, `protobuf`. This is needed for [#20151](https://github.com/ClickHouse/ClickHouse/issues/20151). Part of [#9226](https://github.com/ClickHouse/ClickHouse/issues/9226). A small step towards removal of annoying trash from the build system. [#30599](https://github.com/ClickHouse/ClickHouse/pull/30599) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Hermetic builds: use fixed version of libc and make sure that no source or binary files from the host OS are using during build. This closes [#27133](https://github.com/ClickHouse/ClickHouse/issues/27133). This closes [#21435](https://github.com/ClickHouse/ClickHouse/issues/21435). This closes [#30462](https://github.com/ClickHouse/ClickHouse/issues/30462). [#30011](https://github.com/ClickHouse/ClickHouse/pull/30011) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)). -* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)). -* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)). -* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)). -* Support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)). - - -### ClickHouse release v21.11, 2021-11-09 - -#### Backward Incompatible Change - -* Change order of json_path and json arguments in SQL/JSON functions (to be consistent with the standard). Closes [#30449](https://github.com/ClickHouse/ClickHouse/issues/30449). [#30474](https://github.com/ClickHouse/ClickHouse/pull/30474) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Remove `MergeTree` table setting `write_final_mark`. It will be always `true`. [#30455](https://github.com/ClickHouse/ClickHouse/pull/30455) ([Kseniia Sumarokova](https://github.com/kssenii)). No actions required, all tables are compatible with the new version. -* Function `bayesAB` is removed. Please help to return this function back, refreshed. This closes [#26233](https://github.com/ClickHouse/ClickHouse/issues/26233). [#29934](https://github.com/ClickHouse/ClickHouse/pull/29934) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* This is relevant only if you already started using the experimental `clickhouse-keeper` support. Now ClickHouse Keeper snapshots compressed with `ZSTD` codec by default instead of custom ClickHouse LZ4 block compression. This behavior can be turned off with `compress_snapshots_with_zstd_format` coordination setting (must be equal on all quorum replicas). Backward incompatibility is quite rare and may happen only when new node will send snapshot (happens in case of recovery) to the old node which is unable to read snapshots in ZSTD format. [#29417](https://github.com/ClickHouse/ClickHouse/pull/29417) ([alesapin](https://github.com/alesapin)). - -#### New Feature - -* New asynchronous INSERT mode allows to accumulate inserted data and store it in a single batch in background. On client it can be enabled by setting `async_insert` for `INSERT` queries with data inlined in query or in separate buffer (e.g. for `INSERT` queries via HTTP protocol). If `wait_for_async_insert` is true (by default) the client will wait until data will be flushed to table. On server-side it controlled by the settings `async_insert_threads`, `async_insert_max_data_size` and `async_insert_busy_timeout_ms`. Implements [#18282](https://github.com/ClickHouse/ClickHouse/issues/18282). [#27537](https://github.com/ClickHouse/ClickHouse/pull/27537) ([Anton Popov](https://github.com/CurtizJ)). [#20557](https://github.com/ClickHouse/ClickHouse/pull/20557) ([Ivan](https://github.com/abyss7)). Notes on performance: with asynchronous inserts you can do up to around 10 000 individual INSERT queries per second, so it is still recommended to insert in batches if you want to achieve performance up to millions inserted rows per second. -* Add interactive mode for `clickhouse-local`. So, you can just run `clickhouse-local` to get a command line ClickHouse interface without connecting to a server and process data from files and external data sources. Also merge the code of `clickhouse-client` and `clickhouse-local` together. Closes [#7203](https://github.com/ClickHouse/ClickHouse/issues/7203). Closes [#25516](https://github.com/ClickHouse/ClickHouse/issues/25516). Closes [#22401](https://github.com/ClickHouse/ClickHouse/issues/22401). [#26231](https://github.com/ClickHouse/ClickHouse/pull/26231) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added support for executable (scriptable) user defined functions. These are UDFs that can be written in any programming language. [#28803](https://github.com/ClickHouse/ClickHouse/pull/28803) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow predefined connections to external data sources. This allows to avoid specifying credentials or addresses while using external data sources, they can be referenced by names instead. Closes [#28367](https://github.com/ClickHouse/ClickHouse/issues/28367). [#28577](https://github.com/ClickHouse/ClickHouse/pull/28577) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added `INFORMATION_SCHEMA` database with `SCHEMATA`, `TABLES`, `VIEWS` and `COLUMNS` views to the corresponding tables in `system` database. Closes [#9770](https://github.com/ClickHouse/ClickHouse/issues/9770). [#28691](https://github.com/ClickHouse/ClickHouse/pull/28691) ([tavplubix](https://github.com/tavplubix)). -* Support `EXISTS (subquery)`. Closes [#6852](https://github.com/ClickHouse/ClickHouse/issues/6852). [#29731](https://github.com/ClickHouse/ClickHouse/pull/29731) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Session logging for audit. Logging all successful and failed login and logout events to a new `system.session_log` table. [#22415](https://github.com/ClickHouse/ClickHouse/pull/22415) ([Vasily Nemkov](https://github.com/Enmk)) ([Vitaly Baranov](https://github.com/vitlibar)). -* Support multidimensional cosine distance and euclidean distance functions; L1, L2, Lp, Linf distances and norms. Scalar product on tuples and various arithmetic operators on tuples. This fully closes [#4509](https://github.com/ClickHouse/ClickHouse/issues/4509) and even more. [#27933](https://github.com/ClickHouse/ClickHouse/pull/27933) ([Alexey Boykov](https://github.com/mathalex)). -* Add support for compression and decompression for `INTO OUTFILE` and `FROM INFILE` (with autodetect or with additional optional parameter). [#27135](https://github.com/ClickHouse/ClickHouse/pull/27135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Add CORS (Cross Origin Resource Sharing) support with HTTP `OPTIONS` request. It means, now Grafana will work with serverless requests without a kludges. Closes [#18693](https://github.com/ClickHouse/ClickHouse/issues/18693). [#29155](https://github.com/ClickHouse/ClickHouse/pull/29155) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Queries with JOIN ON now supports disjunctions (OR). [#21320](https://github.com/ClickHouse/ClickHouse/pull/21320) ([Ilya Golshtein](https://github.com/ilejn)). -* Added function `tokens`. That allow to split string into tokens using non-alpha numeric ASCII characters as separators. [#29981](https://github.com/ClickHouse/ClickHouse/pull/29981) ([Maksim Kita](https://github.com/kitaisreal)). Added function `ngrams` to extract ngrams from text. Closes [#29699](https://github.com/ClickHouse/ClickHouse/issues/29699). [#29738](https://github.com/ClickHouse/ClickHouse/pull/29738) ([Maksim Kita](https://github.com/kitaisreal)). -* Add functions for Unicode normalization: `normalizeUTF8NFC`, `normalizeUTF8NFD`, `normalizeUTF8NFKC`, `normalizeUTF8NFKD` functions. [#28633](https://github.com/ClickHouse/ClickHouse/pull/28633) ([darkkeks](https://github.com/darkkeks)). -* Streaming consumption of application log files in ClickHouse with `FileLog` table engine. It's like `Kafka` or `RabbitMQ` engine but for append-only and rotated logs in local filesystem. Closes [#6953](https://github.com/ClickHouse/ClickHouse/issues/6953). [#25969](https://github.com/ClickHouse/ClickHouse/pull/25969) ([flynn](https://github.com/ucasfl)) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add `CapnProto` output format, refactor `CapnProto` input format. [#29291](https://github.com/ClickHouse/ClickHouse/pull/29291) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to write number in query as binary literal. Example `SELECT 0b001;`. [#29304](https://github.com/ClickHouse/ClickHouse/pull/29304) ([Maksim Kita](https://github.com/kitaisreal)). -* Added `hashed_array` dictionary type. It saves memory when using dictionaries with multiple attributes. Closes [#30236](https://github.com/ClickHouse/ClickHouse/issues/30236). [#30242](https://github.com/ClickHouse/ClickHouse/pull/30242) ([Maksim Kita](https://github.com/kitaisreal)). -* Added `JSONExtractKeys` function. [#30056](https://github.com/ClickHouse/ClickHouse/pull/30056) ([Vitaly](https://github.com/orloffv)). -* Add a function `getOSKernelVersion` - it returns a string with OS kernel version. [#29755](https://github.com/ClickHouse/ClickHouse/pull/29755) ([Memo](https://github.com/Joeywzr)). -* Added `MD4` and `SHA384` functions. MD4 is an obsolete and insecure hash function, it can be used only in rare cases when MD4 is already being used in some legacy system and you need to get exactly the same result. [#29602](https://github.com/ClickHouse/ClickHouse/pull/29602) ([Nikita Tikhomirov](https://github.com/NSTikhomirov)). -* HSTS can be enabled for Clickhouse HTTP server by setting `hsts_max_age` in configuration file with a positive number. [#29516](https://github.com/ClickHouse/ClickHouse/pull/29516) ([凌涛](https://github.com/lingtaolf)). -* Huawei OBS Storage support. Closes [#24294](https://github.com/ClickHouse/ClickHouse/issues/24294). [#29511](https://github.com/ClickHouse/ClickHouse/pull/29511) ([kevin wan](https://github.com/MaxWk)). -* New function `mapContainsKeyLike` to get the map that key matches a simple regular expression. [#29471](https://github.com/ClickHouse/ClickHouse/pull/29471) ([凌涛](https://github.com/lingtaolf)). New function `mapExtractKeyLike` to get the map only kept elements matched specified pattern. [#30793](https://github.com/ClickHouse/ClickHouse/pull/30793) ([凌涛](https://github.com/lingtaolf)). -* Implemented `ALTER TABLE x MODIFY COMMENT`. [#29264](https://github.com/ClickHouse/ClickHouse/pull/29264) ([Vasily Nemkov](https://github.com/Enmk)). -* Adds H3 inspection functions that are missing from ClickHouse but are available via the H3 api: https://h3geo.org/docs/api/inspection. [#29209](https://github.com/ClickHouse/ClickHouse/pull/29209) ([Bharat Nallan](https://github.com/bharatnc)). -* Allow non-replicated ALTER TABLE FETCH and ATTACH in Replicated databases. [#29202](https://github.com/ClickHouse/ClickHouse/pull/29202) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Added a setting `output_format_csv_null_representation`: This is the same as `output_format_tsv_null_representation` but is for CSV output. [#29123](https://github.com/ClickHouse/ClickHouse/pull/29123) ([PHO](https://github.com/depressed-pho)). -* Added function `zookeeperSessionUptime()` which returns uptime of current ZooKeeper session in seconds. [#28983](https://github.com/ClickHouse/ClickHouse/pull/28983) ([tavplubix](https://github.com/tavplubix)). -* Implements the `h3ToGeoBoundary` function. [#28952](https://github.com/ClickHouse/ClickHouse/pull/28952) ([Ivan Veselov](https://github.com/fuzzERot)). -* Add aggregate function `exponentialMovingAverage` that can be used as window function. This closes [#27511](https://github.com/ClickHouse/ClickHouse/issues/27511). [#28914](https://github.com/ClickHouse/ClickHouse/pull/28914) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow to include subcolumns of table columns into `DESCRIBE` query result (can be enabled by setting `describe_include_subcolumns`). [#28905](https://github.com/ClickHouse/ClickHouse/pull/28905) ([Anton Popov](https://github.com/CurtizJ)). -* `Executable`, `ExecutablePool` added option `send_chunk_header`. If this option is true then chunk rows_count with line break will be sent to client before chunk. [#28833](https://github.com/ClickHouse/ClickHouse/pull/28833) ([Maksim Kita](https://github.com/kitaisreal)). -* `tokenbf_v1` and `ngram` support Map with key of String of FixedSring type. It enhance data skipping in query with map key filter. ```sql CREATE TABLE map_tokenbf ( row_id UInt32, map Map(String, String), INDEX map_tokenbf map TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1 ) Engine=MergeTree() Order by id ``` With table above, the query `select * from map_tokebf where map['K']='V'` will skip the granule that doesn't contain key `A` . Of course, how many rows will skipped is depended on the `granularity` and `index_granularity` you set. [#28511](https://github.com/ClickHouse/ClickHouse/pull/28511) ([凌涛](https://github.com/lingtaolf)). -* Send profile events from server to client. New packet type `ProfileEvents` was introduced. Closes [#26177](https://github.com/ClickHouse/ClickHouse/issues/26177). [#28364](https://github.com/ClickHouse/ClickHouse/pull/28364) ([Dmitry Novik](https://github.com/novikd)). -* Bit shift operations for `FixedString` and `String` data types. This closes [#27763](https://github.com/ClickHouse/ClickHouse/issues/27763). [#28325](https://github.com/ClickHouse/ClickHouse/pull/28325) ([小路](https://github.com/nicelulu)). -* Support adding / deleting tables to replication from PostgreSQL dynamically in database engine MaterializedPostgreSQL. Support alter for database settings. Closes [#27573](https://github.com/ClickHouse/ClickHouse/issues/27573). [#28301](https://github.com/ClickHouse/ClickHouse/pull/28301) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added function accurateCastOrDefault(x, T). Closes [#21330](https://github.com/ClickHouse/ClickHouse/issues/21330). Authors @taiyang-li. [#23028](https://github.com/ClickHouse/ClickHouse/pull/23028) ([Maksim Kita](https://github.com/kitaisreal)). -* Add Function `toUUIDOrDefault`, `toUInt8/16/32/64/256OrDefault`, `toInt8/16/32/64/128/256OrDefault`, which enables user defining default value(not null) when string parsing is failed. [#21330](https://github.com/ClickHouse/ClickHouse/pull/21330) ([taiyang-li](https://github.com/taiyang-li)). - -#### Performance Improvement - -* Background merges can be preempted by each other and they are scheduled with appropriate priorities. Now long running merges won't prevent short merges to proceed. This is needed for a better scheduling and controlling of merges execution. It reduces the chances to get "too many parts" error. [#22381](https://github.com/ClickHouse/ClickHouse/issues/22381). [#25165](https://github.com/ClickHouse/ClickHouse/pull/25165) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Added an ability to execute more merges and mutations than the number of threads in background pool. Merges and mutations will be executed step by step according to their sizes (lower is more prioritized). The ratio of the number of tasks to threads to execute is controlled by a setting `background_merges_mutations_concurrency_ratio`, 2 by default. [#29140](https://github.com/ClickHouse/ClickHouse/pull/29140) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow to use asynchronous reads for remote filesystems. Lower the number of seeks while reading from remote filesystems. It improves performance tremendously and makes the experimental `web` and `s3` disks to work faster than EBS under certain conditions. [#29205](https://github.com/ClickHouse/ClickHouse/pull/29205) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, the `web` disk type (static dataset hosted on a web server) is graduated from being experimental to be production ready. -* Queries with `INTO OUTFILE` in `clickhouse-client` will use multiple threads. Fix the issue with flickering progress-bar when using `INTO OUTFILE`. This closes [#30873](https://github.com/ClickHouse/ClickHouse/issues/30873). This closes [#30872](https://github.com/ClickHouse/ClickHouse/issues/30872). [#30886](https://github.com/ClickHouse/ClickHouse/pull/30886) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Reduce amount of redundant compressed data read from disk for some types `SELECT` queries (only for `MergeTree` engines family). [#30111](https://github.com/ClickHouse/ClickHouse/pull/30111) ([alesapin](https://github.com/alesapin)). -* Remove some redundant `seek` calls while reading compressed blocks in MergeTree table engines family. [#29766](https://github.com/ClickHouse/ClickHouse/pull/29766) ([alesapin](https://github.com/alesapin)). -* Make `url` table function to process multiple URLs in parallel. This closes [#29670](https://github.com/ClickHouse/ClickHouse/issues/29670) and closes [#29671](https://github.com/ClickHouse/ClickHouse/issues/29671). [#29673](https://github.com/ClickHouse/ClickHouse/pull/29673) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance of aggregation in order of primary key (with enabled setting `optimize_aggregation_in_order`). [#30266](https://github.com/ClickHouse/ClickHouse/pull/30266) ([Anton Popov](https://github.com/CurtizJ)). -* Now clickhouse is using DNS cache while communicating with external S3. [#29999](https://github.com/ClickHouse/ClickHouse/pull/29999) ([alesapin](https://github.com/alesapin)). -* Add support for pushdown of `IS NULL`/`IS NOT NULL` to external databases (i.e. MySQL). [#29463](https://github.com/ClickHouse/ClickHouse/pull/29463) ([Azat Khuzhin](https://github.com/azat)). Transform `isNull`/`isNotNull` to `IS NULL`/`IS NOT NULL` (for external dbs, i.e. MySQL). [#29446](https://github.com/ClickHouse/ClickHouse/pull/29446) ([Azat Khuzhin](https://github.com/azat)). -* SELECT queries from Dictionary tables will use multiple threads. [#30500](https://github.com/ClickHouse/ClickHouse/pull/30500) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance for filtering (WHERE operation) of `Decimal` columns. [#30431](https://github.com/ClickHouse/ClickHouse/pull/30431) ([Jun Jin](https://github.com/vesslanjin)). -* Remove branchy code in filter operation with a better implementation with popcnt/ctz which have better performance. [#29881](https://github.com/ClickHouse/ClickHouse/pull/29881) ([Jun Jin](https://github.com/vesslanjin)). -* Improve filter bytemask generator (used for WHERE operator) function all in one with SSE/AVX2/AVX512 instructions. Note that by default ClickHouse is only using SSE, so it's only relevant for custom builds. [#30014](https://github.com/ClickHouse/ClickHouse/pull/30014) ([jasperzhu](https://github.com/jinjunzh)). [#30670](https://github.com/ClickHouse/ClickHouse/pull/30670) ([jasperzhu](https://github.com/jinjunzh)). -* Improve the performance of SUM aggregate function of Nullable floating point numbers. [#28906](https://github.com/ClickHouse/ClickHouse/pull/28906) ([Raúl Marín](https://github.com/Algunenano)). -* Speed up part loading process with multiple disks are in use. The idea is similar to https://github.com/ClickHouse/ClickHouse/pull/16423 . Prod env shows improvement: 24 min -> 16 min . [#28363](https://github.com/ClickHouse/ClickHouse/pull/28363) ([Amos Bird](https://github.com/amosbird)). -* Reduce default settings for S3 multipart upload part size to lower memory usage. [#28679](https://github.com/ClickHouse/ClickHouse/pull/28679) ([ianton-ru](https://github.com/ianton-ru)). -* Speed up `bitmapAnd` function. [#28332](https://github.com/ClickHouse/ClickHouse/pull/28332) ([dddounaiking](https://github.com/OodounaikingoO)). -* Removed sub-optimal mutation notifications in `StorageMergeTree` when merges are still going. [#27552](https://github.com/ClickHouse/ClickHouse/pull/27552) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Attempt to improve performance of string comparison. [#28767](https://github.com/ClickHouse/ClickHouse/pull/28767) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Primary key index and partition filter can work in tuple. [#29281](https://github.com/ClickHouse/ClickHouse/pull/29281) ([凌涛](https://github.com/lingtaolf)). -* If query has multiple quantile aggregate functions with the same arguments but different level parameter, they will be fused together and executed in one pass if the setting `optimize_syntax_fuse_functions` is enabled. [#26657](https://github.com/ClickHouse/ClickHouse/pull/26657) ([hexiaoting](https://github.com/hexiaoting)). -* Now min-max aggregation over the first expression of primary key is optimized by projection. This is for [#329](https://github.com/ClickHouse/ClickHouse/issues/329). [#29918](https://github.com/ClickHouse/ClickHouse/pull/29918) ([Amos Bird](https://github.com/amosbird)). - -#### Experimental Feature - -* Add ability to change nodes configuration (in `.xml` file) for ClickHouse Keeper. [#30372](https://github.com/ClickHouse/ClickHouse/pull/30372) ([alesapin](https://github.com/alesapin)). -* Add `sparkbar` aggregate function. This closes [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175). [#27481](https://github.com/ClickHouse/ClickHouse/pull/27481) ([小路](https://github.com/nicelulu)). Note: there is one flaw in this function, the behaviour will be changed in future releases. - -#### Improvement - -* Allow user to change log levels without restart. [#29586](https://github.com/ClickHouse/ClickHouse/pull/29586) ([Nikolay Degterinsky](https://github.com/evillique)). -* Multiple improvements for SQL UDF. Queries for manipulation of SQL User Defined Functions now support ON CLUSTER clause. Example `CREATE FUNCTION test_function ON CLUSTER 'cluster' AS x -> x + 1;`. Closes [#30666](https://github.com/ClickHouse/ClickHouse/issues/30666). [#30734](https://github.com/ClickHouse/ClickHouse/pull/30734) ([Maksim Kita](https://github.com/kitaisreal)). Support `CREATE OR REPLACE`, `CREATE IF NOT EXISTS` syntaxes. [#30454](https://github.com/ClickHouse/ClickHouse/pull/30454) ([Maksim Kita](https://github.com/kitaisreal)). Added DROP IF EXISTS support. Example `DROP FUNCTION IF EXISTS test_function`. [#30437](https://github.com/ClickHouse/ClickHouse/pull/30437) ([Maksim Kita](https://github.com/kitaisreal)). Support lambdas. Example `CREATE FUNCTION lambda_function AS x -> arrayMap(element -> element * 2, x);`. [#30435](https://github.com/ClickHouse/ClickHouse/pull/30435) ([Maksim Kita](https://github.com/kitaisreal)). Support SQL user defined functions for `clickhouse-local`. [#30179](https://github.com/ClickHouse/ClickHouse/pull/30179) ([Maksim Kita](https://github.com/kitaisreal)). -* Enable per-query memory profiler (set to `memory_profiler_step` = 4MiB) globally. [#29455](https://github.com/ClickHouse/ClickHouse/pull/29455) ([Azat Khuzhin](https://github.com/azat)). -* Added columns `data_compressed_bytes`, `data_uncompressed_bytes`, `marks_bytes` into `system.data_skipping_indices`. Added columns `secondary_indices_compressed_bytes`, `secondary_indices_uncompressed_bytes`, `secondary_indices_marks_bytes` into `system.parts`. Closes [#29697](https://github.com/ClickHouse/ClickHouse/issues/29697). [#29896](https://github.com/ClickHouse/ClickHouse/pull/29896) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `table` alias to system.tables and `database` alias to system.databases [#29677](https://github.com/ClickHouse/ClickHouse/issues/29677). [#29882](https://github.com/ClickHouse/ClickHouse/pull/29882) ([kevin wan](https://github.com/MaxWk)). -* Correctly resolve interdependencies between tables on server startup. Closes [#8004](https://github.com/ClickHouse/ClickHouse/issues/8004), closes [#15170](https://github.com/ClickHouse/ClickHouse/issues/15170). [#28373](https://github.com/ClickHouse/ClickHouse/pull/28373) ([tavplubix](https://github.com/tavplubix)). -* Avoid error "Division by zero" when denominator is Nullable in functions `divide`, `intDiv` and `modulo`. Closes [#22621](https://github.com/ClickHouse/ClickHouse/issues/22621). [#28352](https://github.com/ClickHouse/ClickHouse/pull/28352) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to parse values of `Date` data type in text formats as `YYYYMMDD` in addition to `YYYY-MM-DD`. This closes [#30870](https://github.com/ClickHouse/ClickHouse/issues/30870). [#30871](https://github.com/ClickHouse/ClickHouse/pull/30871) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Web UI: render bars in table cells. [#29792](https://github.com/ClickHouse/ClickHouse/pull/29792) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* User can now create dictionaries with comments: `CREATE DICTIONARY ... COMMENT 'vaue'` ... [#29899](https://github.com/ClickHouse/ClickHouse/pull/29899) ([Vasily Nemkov](https://github.com/Enmk)). Users now can set comments to database in `CREATE DATABASE` statement ... [#29429](https://github.com/ClickHouse/ClickHouse/pull/29429) ([Vasily Nemkov](https://github.com/Enmk)). -* Introduce `compiled_expression_cache_elements_size` setting. If you will ever want to use this setting, you will already know what it does. [#30667](https://github.com/ClickHouse/ClickHouse/pull/30667) ([Maksim Kita](https://github.com/kitaisreal)). -* clickhouse-format now supports option `--query`. In previous versions you have to pass the query to stdin. [#29325](https://github.com/ClickHouse/ClickHouse/pull/29325) ([凌涛](https://github.com/lingtaolf)). -* Support `ALTER TABLE` for tables in `Memory` databases. Memory databases are used in `clickhouse-local`. [#30866](https://github.com/ClickHouse/ClickHouse/pull/30866) ([tavplubix](https://github.com/tavplubix)). -* Arrays of all serializable types are now supported by `arrayStringConcat`. [#30840](https://github.com/ClickHouse/ClickHouse/pull/30840) ([Nickita Taranov](https://github.com/nickitat)). -* ClickHouse now will account docker/cgroups limitations to get system memory amount. See [#25662](https://github.com/ClickHouse/ClickHouse/issues/25662). [#30574](https://github.com/ClickHouse/ClickHouse/pull/30574) ([Pavel Medvedev](https://github.com/pmed)). -* Fetched table structure for PostgreSQL database is more reliable now. [#30477](https://github.com/ClickHouse/ClickHouse/pull/30477) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Full support of positional arguments in GROUP BY and ORDER BY. [#30433](https://github.com/ClickHouse/ClickHouse/pull/30433) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow extracting non-string element as string using JSONExtractString. This is for [pull/25452#issuecomment-927123287](https://github.com/ClickHouse/ClickHouse/pull/25452#issuecomment-927123287). [#30426](https://github.com/ClickHouse/ClickHouse/pull/30426) ([Amos Bird](https://github.com/amosbird)). -* Added an ability to use FINAL clause in SELECT queries from `GraphiteMergeTree`. [#30360](https://github.com/ClickHouse/ClickHouse/pull/30360) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Minor improvements in replica cloning and enqueuing fetch for broken parts, that should avoid extremely rare hanging of `GET_PART` entries in replication queue. [#30346](https://github.com/ClickHouse/ClickHouse/pull/30346) ([tavplubix](https://github.com/tavplubix)). -* Allow symlinks to files in `user_files` directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed comparison of `Date32` with `Date`, `DateTime`, `DateTime64` and `String`. [#30219](https://github.com/ClickHouse/ClickHouse/pull/30219) ([liang.huang](https://github.com/lhuang09287750)). -* Allow to remove `SAMPLE BY` expression from `MergeTree` tables (`ALTER TABLE REMOVE SAMPLE BY`). [#30180](https://github.com/ClickHouse/ClickHouse/pull/30180) ([Anton Popov](https://github.com/CurtizJ)). -* Now `Keeper` (as part of `clickhouse-server`) will start asynchronously if it can connect to some other node. [#30170](https://github.com/ClickHouse/ClickHouse/pull/30170) ([alesapin](https://github.com/alesapin)). -* Now `clickhouse-client` supports native multi-line editing. [#30143](https://github.com/ClickHouse/ClickHouse/pull/30143) ([Amos Bird](https://github.com/amosbird)). -* `polygon` dictionaries (reverse geocoding): added support for reading the dictionary content with SELECT query method if setting `store_polygon_key_column` = true. Closes [#30090](https://github.com/ClickHouse/ClickHouse/issues/30090). [#30142](https://github.com/ClickHouse/ClickHouse/pull/30142) ([Maksim Kita](https://github.com/kitaisreal)). -* Add ClickHouse logo to Play UI. [#29674](https://github.com/ClickHouse/ClickHouse/pull/29674) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Better exception message while reading column from Arrow-supported formats like `Arrow`, `ArrowStream`, `Parquet` and `ORC`. This closes [#29926](https://github.com/ClickHouse/ClickHouse/issues/29926). [#29927](https://github.com/ClickHouse/ClickHouse/pull/29927) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix data-race between flush and startup in `Buffer` tables. This can appear in tests. [#29930](https://github.com/ClickHouse/ClickHouse/pull/29930) ([Azat Khuzhin](https://github.com/azat)). -* Fix `lock-order-inversion` between `DROP TABLE` for `DatabaseMemory` and `LiveView`. Live View is an experimental feature. Memory database is used in clickhouse-local. [#29929](https://github.com/ClickHouse/ClickHouse/pull/29929) ([Azat Khuzhin](https://github.com/azat)). -* Fix lock-order-inversion between periodic dictionary reload and config reload. [#29928](https://github.com/ClickHouse/ClickHouse/pull/29928) ([Azat Khuzhin](https://github.com/azat)). -* Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add ability to configure retries and delays between them for `clickhouse-copier`. [#29921](https://github.com/ClickHouse/ClickHouse/pull/29921) ([Azat Khuzhin](https://github.com/azat)). -* Add `shutdown_wait_unfinished_queries` server setting to allowing waiting for running queries up to `shutdown_wait_unfinished` time. This is for [#24451](https://github.com/ClickHouse/ClickHouse/issues/24451). [#29914](https://github.com/ClickHouse/ClickHouse/pull/29914) ([Amos Bird](https://github.com/amosbird)). -* Add ability to trace peak memory usage (with new trace_type in `system.trace_log` - `MemoryPeak`). [#29858](https://github.com/ClickHouse/ClickHouse/pull/29858) ([Azat Khuzhin](https://github.com/azat)). -* PostgreSQL foreign tables: Added partitioned table prefix 'p' for the query for fetching replica identity index. [#29828](https://github.com/ClickHouse/ClickHouse/pull/29828) ([Shoh Jahon](https://github.com/Shohjahon)). -* Apply `max_untracked_memory`/`memory_profiler_step`/`memory_profiler_sample_probability` during mutate/merge to profile memory usage during merges. [#29681](https://github.com/ClickHouse/ClickHouse/pull/29681) ([Azat Khuzhin](https://github.com/azat)). -* Query obfuscator: `clickhouse-format --obfuscate` now works with more types of queries. [#29672](https://github.com/ClickHouse/ClickHouse/pull/29672) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed the issue: `clickhouse-format --obfuscate` cannot process queries with embedded dictionaries (functions `regionTo...`). [#29667](https://github.com/ClickHouse/ClickHouse/pull/29667) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect Nullable processing of JSON functions. This fixes [#29615](https://github.com/ClickHouse/ClickHouse/issues/29615) . Mark as improvement because https://github.com/ClickHouse/ClickHouse/pull/28012 is not released. [#29659](https://github.com/ClickHouse/ClickHouse/pull/29659) ([Amos Bird](https://github.com/amosbird)). -* Increase `listen_backlog` by default (to match default in newer linux kernel). [#29643](https://github.com/ClickHouse/ClickHouse/pull/29643) ([Azat Khuzhin](https://github.com/azat)). -* Reload dictionaries, models, user defined executable functions if servers config `dictionaries_config`, `models_config`, `user_defined_executable_functions_config` changes. Closes [#28142](https://github.com/ClickHouse/ClickHouse/issues/28142). [#29529](https://github.com/ClickHouse/ClickHouse/pull/29529) ([Maksim Kita](https://github.com/kitaisreal)). -* Get rid of pointless restriction on projection name. Now projection name can start with `tmp_`. [#29520](https://github.com/ClickHouse/ClickHouse/pull/29520) ([Amos Bird](https://github.com/amosbird)). -* Fixed `There is no query or query context has expired` error in mutations with nested subqueries. Do not allow subqueries in mutation if table is replicated and `allow_nondeterministic_mutations` setting is disabled. [#29495](https://github.com/ClickHouse/ClickHouse/pull/29495) ([tavplubix](https://github.com/tavplubix)). -* Apply config changes to `max_concurrent_queries` during runtime (no need to restart). [#29414](https://github.com/ClickHouse/ClickHouse/pull/29414) ([Raúl Marín](https://github.com/Algunenano)). -* Added setting `use_skip_indexes`. [#29405](https://github.com/ClickHouse/ClickHouse/pull/29405) ([Maksim Kita](https://github.com/kitaisreal)). -* Add support for `FREEZE`ing in-memory parts (for backups). [#29376](https://github.com/ClickHouse/ClickHouse/pull/29376) ([Mo Xuan](https://github.com/mo-avatar)). -* Pass through initial query_id for `clickhouse-benchmark` (previously if you run remote query via `clickhouse-benchmark`, queries on shards will not be linked to the initial query via `initial_query_id`). [#29364](https://github.com/ClickHouse/ClickHouse/pull/29364) ([Azat Khuzhin](https://github.com/azat)). -* Skip indexes `tokenbf_v1` and `ngrambf_v1`: added support for `Array` data type with key of `String` of `FixedString` type. [#29280](https://github.com/ClickHouse/ClickHouse/pull/29280) ([Maksim Kita](https://github.com/kitaisreal)). Skip indexes `tokenbf_v1` and `ngrambf_v1` added support for `Map` data type with key of `String` of `FixedString` type. Author @lingtaolf. [#29220](https://github.com/ClickHouse/ClickHouse/pull/29220) ([Maksim Kita](https://github.com/kitaisreal)). -* Function `has`: added support for `Map` data type. [#29267](https://github.com/ClickHouse/ClickHouse/pull/29267) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `compress_logs` settings for clickhouse-keeper which allow to compress clickhouse-keeper logs (for replicated state machine) in `ZSTD` . Implements: [#26977](https://github.com/ClickHouse/ClickHouse/issues/26977). [#29223](https://github.com/ClickHouse/ClickHouse/pull/29223) ([alesapin](https://github.com/alesapin)). -* Add a setting `external_table_strict_query` - it will force passing the whole WHERE expression in queries to foreign databases even if it is incompatible. [#29206](https://github.com/ClickHouse/ClickHouse/pull/29206) ([Azat Khuzhin](https://github.com/azat)). -* Disable projections when `ARRAY JOIN` is used. In previous versions projection analysis may break aliases in array join. [#29139](https://github.com/ClickHouse/ClickHouse/pull/29139) ([Amos Bird](https://github.com/amosbird)). -* Support more types in `MsgPack` input/output format. [#29077](https://github.com/ClickHouse/ClickHouse/pull/29077) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to input and output `LowCardinality` columns in `ORC` input/output format. [#29062](https://github.com/ClickHouse/ClickHouse/pull/29062) ([Kruglov Pavel](https://github.com/Avogar)). -* Select from `system.distributed_ddl_queue` might show incorrect values, it's fixed. [#29061](https://github.com/ClickHouse/ClickHouse/pull/29061) ([tavplubix](https://github.com/tavplubix)). -* Correct behaviour with unknown methods for HTTP connection. Solves [#29050](https://github.com/ClickHouse/ClickHouse/issues/29050). [#29057](https://github.com/ClickHouse/ClickHouse/pull/29057) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* `clickhouse-keeper`: Fix bug in `clickhouse-keeper-converter` which can lead to some data loss while restoring from ZooKeeper logs (not snapshot). [#29030](https://github.com/ClickHouse/ClickHouse/pull/29030) ([小路](https://github.com/nicelulu)). Fix bug in `clickhouse-keeper-converter` which can lead to incorrect ZooKeeper log deserialization. [#29071](https://github.com/ClickHouse/ClickHouse/pull/29071) ([小路](https://github.com/nicelulu)). -* Apply settings from `CREATE ... AS SELECT` queries (fixes: [#28810](https://github.com/ClickHouse/ClickHouse/issues/28810)). [#28962](https://github.com/ClickHouse/ClickHouse/pull/28962) ([Azat Khuzhin](https://github.com/azat)). -* Respect default database setting for ALTER TABLE ... ON CLUSTER ... REPLACE/MOVE PARTITION FROM/TO ... [#28955](https://github.com/ClickHouse/ClickHouse/pull/28955) ([anneji-dev](https://github.com/anneji-dev)). -* gRPC protocol: Allow change server-side compression from client. [#28953](https://github.com/ClickHouse/ClickHouse/pull/28953) ([Vitaly Baranov](https://github.com/vitlibar)). -* Skip "no data" exception when reading thermal sensors for asynchronous metrics. This closes [#28852](https://github.com/ClickHouse/ClickHouse/issues/28852). [#28882](https://github.com/ClickHouse/ClickHouse/pull/28882) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed logical race condition that might cause `Dictionary not found` error for existing dictionary in rare cases. [#28853](https://github.com/ClickHouse/ClickHouse/pull/28853) ([tavplubix](https://github.com/tavplubix)). -* Relax nested function for If-combinator check (but forbid nested identical combinators). [#28828](https://github.com/ClickHouse/ClickHouse/pull/28828) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible uncaught exception during server termination. [#28761](https://github.com/ClickHouse/ClickHouse/pull/28761) ([Azat Khuzhin](https://github.com/azat)). -* Forbid cleaning of tmp directories that can be used by an active mutation/merge if mutation/merge is extraordinarily long. [#28760](https://github.com/ClickHouse/ClickHouse/pull/28760) ([Azat Khuzhin](https://github.com/azat)). -* Allow optimization `optimize_arithmetic_operations_in_aggregate_functions = 1` when alias is used. [#28746](https://github.com/ClickHouse/ClickHouse/pull/28746) ([Amos Bird](https://github.com/amosbird)). -* Implement `detach_not_byte_identical_parts` setting for `ReplicatedMergeTree`, that will detach instead of remove not byte-identical parts (after mege/mutate). [#28708](https://github.com/ClickHouse/ClickHouse/pull/28708) ([Azat Khuzhin](https://github.com/azat)). -* Implement `max_suspicious_broken_parts_bytes` setting for `MergeTree` (to limit total size of all broken parts, default is `1GiB`). [#28707](https://github.com/ClickHouse/ClickHouse/pull/28707) ([Azat Khuzhin](https://github.com/azat)). -* Enable expanding macros in `RabbitMQ` table settings. [#28683](https://github.com/ClickHouse/ClickHouse/pull/28683) ([Vitaly Baranov](https://github.com/vitlibar)). -* Restore the possibility to read data of a table using the `Log` engine in multiple threads. [#28125](https://github.com/ClickHouse/ClickHouse/pull/28125) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix misbehavior of NULL column handling in JSON functions. This fixes [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930). [#28012](https://github.com/ClickHouse/ClickHouse/pull/28012) ([Amos Bird](https://github.com/amosbird)). -* Allow to set the size of Mark/Uncompressed cache for skip indices separately from columns. [#27961](https://github.com/ClickHouse/ClickHouse/pull/27961) ([Amos Bird](https://github.com/amosbird)). -* Allow to mix JOIN with `USING` with other JOIN types. [#23881](https://github.com/ClickHouse/ClickHouse/pull/23881) ([darkkeks](https://github.com/darkkeks)). -* Update aws-sdk submodule for throttling in Yandex Cloud S3. [#30646](https://github.com/ClickHouse/ClickHouse/pull/30646) ([ianton-ru](https://github.com/ianton-ru)). -* Fix releasing query ID and session ID at the end of query processing while handing gRPC call. [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix shutdown of `AccessControlManager` to fix flaky test. [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix failed assertion in reading from `HDFS`. Update libhdfs3 library to be able to run in tests in debug. Closes [#29251](https://github.com/ClickHouse/ClickHouse/issues/29251). Closes [#27814](https://github.com/ClickHouse/ClickHouse/issues/27814). [#29276](https://github.com/ClickHouse/ClickHouse/pull/29276) ([Kseniia Sumarokova](https://github.com/kssenii)). - - -#### Build/Testing/Packaging Improvement - -* Add support for FreeBSD builds for Aarch64 machines. [#29952](https://github.com/ClickHouse/ClickHouse/pull/29952) ([MikaelUrankar](https://github.com/MikaelUrankar)). -* Recursive submodules are no longer needed for ClickHouse. [#30315](https://github.com/ClickHouse/ClickHouse/pull/30315) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* ClickHouse can be statically built with Musl. This is added as experiment, it does not support building `odbc-bridge`, `library-bridge`, integration with CatBoost and some libraries. [#30248](https://github.com/ClickHouse/ClickHouse/pull/30248) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Enable `Protobuf`, `Arrow`, `ORC`, `Parquet` for `AArch64` and `Darwin` (macOS) builds. This closes [#29248](https://github.com/ClickHouse/ClickHouse/issues/29248). This closes [#28018](https://github.com/ClickHouse/ClickHouse/issues/28018). [#30015](https://github.com/ClickHouse/ClickHouse/pull/30015) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add cross-build for PowerPC (powerpc64le). This closes [#9589](https://github.com/ClickHouse/ClickHouse/issues/9589). Enable support for interaction with MySQL for AArch64 and PowerPC. This closes [#26301](https://github.com/ClickHouse/ClickHouse/issues/26301). [#30010](https://github.com/ClickHouse/ClickHouse/pull/30010) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Leave only required files in cross-compile toolchains. Include them as submodules (earlier they were downloaded as tarballs). [#29974](https://github.com/ClickHouse/ClickHouse/pull/29974) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Implemented structure-aware fuzzing approach in ClickHouse for select statement parser. [#30012](https://github.com/ClickHouse/ClickHouse/pull/30012) ([Paul](https://github.com/PaulCher)). -* Turning on experimental constexpr expressions evaluator for clang to speed up template code compilation. [#29668](https://github.com/ClickHouse/ClickHouse/pull/29668) ([myrrc](https://github.com/myrrc)). -* Add ability to compile using newer version fo glibc without using new symbols. [#29594](https://github.com/ClickHouse/ClickHouse/pull/29594) ([Azat Khuzhin](https://github.com/azat)). -* Reduce Debug build binary size by clang optimization option. [#28736](https://github.com/ClickHouse/ClickHouse/pull/28736) ([flynn](https://github.com/ucasfl)). -* Now all images for CI will be placed in the separate dockerhub repo. [#28656](https://github.com/ClickHouse/ClickHouse/pull/28656) ([alesapin](https://github.com/alesapin)). -* Improve support for build with clang-13. [#28046](https://github.com/ClickHouse/ClickHouse/pull/28046) ([Sergei Semin](https://github.com/syominsergey)). -* Add ability to print raw profile events to `clickhouse-client` (This can be useful for debugging and for testing). [#30064](https://github.com/ClickHouse/ClickHouse/pull/30064) ([Azat Khuzhin](https://github.com/azat)). -* Add time dependency for clickhouse-server unit (systemd and sysvinit init). [#28891](https://github.com/ClickHouse/ClickHouse/pull/28891) ([Azat Khuzhin](https://github.com/azat)). -* Reload stacktrace cache when symbol is reloaded. [#28137](https://github.com/ClickHouse/ClickHouse/pull/28137) ([Amos Bird](https://github.com/amosbird)). - -#### Bug Fix - -* Functions for case-insensitive search in UTF-8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match in very rare cases, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([tavplubix](https://github.com/tavplubix)). -* Fix reading from empty file on encrypted disk. [#30494](https://github.com/ClickHouse/ClickHouse/pull/30494) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). -* Allow using a materialized column as the sharding key in a distributed table even if `insert_allow_materialized_columns=0`:. [#28637](https://github.com/ClickHouse/ClickHouse/pull/28637) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). -* Fix set index not used in AND/OR expressions when there are more than two operands. This fixes [#30416](https://github.com/ClickHouse/ClickHouse/issues/30416) . [#30887](https://github.com/ClickHouse/ClickHouse/pull/30887) ([Amos Bird](https://github.com/amosbird)). -* Fix crash when projection with hashing function is materialized. This fixes [#30861](https://github.com/ClickHouse/ClickHouse/issues/30861) . The issue is similar to https://github.com/ClickHouse/ClickHouse/pull/28560 which is a lack of proper understanding of the invariant of header's emptyness. [#30877](https://github.com/ClickHouse/ClickHouse/pull/30877) ([Amos Bird](https://github.com/amosbird)). -* Fixed ambiguity when extracting auxiliary ZooKeeper name from ZooKeeper path in `ReplicatedMergeTree`. Previously server might fail to start with `Unknown auxiliary ZooKeeper name` if ZooKeeper path contains a colon. Fixes [#29052](https://github.com/ClickHouse/ClickHouse/issues/29052). Also it was allowed to specify ZooKeeper path that does not start with slash, but now it's deprecated and creation of new tables with such path is not allowed. Slashes and colons in auxiliary ZooKeeper names are not allowed too. [#30822](https://github.com/ClickHouse/ClickHouse/pull/30822) ([tavplubix](https://github.com/tavplubix)). -* Clean temporary directory when localBackup failed by some reason. [#30797](https://github.com/ClickHouse/ClickHouse/pull/30797) ([ianton-ru](https://github.com/ianton-ru)). -* Fixed a race condition between `REPLACE/MOVE PARTITION` and background merge in non-replicated `MergeTree` that might cause a part of moved/replaced data to remain in partition. Fixes [#29327](https://github.com/ClickHouse/ClickHouse/issues/29327). [#30717](https://github.com/ClickHouse/ClickHouse/pull/30717) ([tavplubix](https://github.com/tavplubix)). -* Fix PREWHERE with WHERE in case of always true PREWHERE. [#30668](https://github.com/ClickHouse/ClickHouse/pull/30668) ([Azat Khuzhin](https://github.com/azat)). -* Limit push down optimization could cause a error `Cannot find column`. Fixes [#30438](https://github.com/ClickHouse/ClickHouse/issues/30438). [#30562](https://github.com/ClickHouse/ClickHouse/pull/30562) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Add missing parenthesis for `isNotNull`/`isNull` rewrites to `IS [NOT] NULL` (fixes queries that has something like `isNotNull(1)+isNotNull(2)`). [#30520](https://github.com/ClickHouse/ClickHouse/pull/30520) ([Azat Khuzhin](https://github.com/azat)). -* Fix deadlock on ALTER with scalar subquery to the same table, close [#30461](https://github.com/ClickHouse/ClickHouse/issues/30461). [#30492](https://github.com/ClickHouse/ClickHouse/pull/30492) ([Vladimir C](https://github.com/vdimir)). -* Fixed segfault which might happen if session expired during execution of REPLACE PARTITION. [#30432](https://github.com/ClickHouse/ClickHouse/pull/30432) ([tavplubix](https://github.com/tavplubix)). -* Queries with condition like `IN (subquery)` could return incorrect result in case if aggregate projection applied. Fixed creation of sets for projections. [#30310](https://github.com/ClickHouse/ClickHouse/pull/30310) ([Amos Bird](https://github.com/amosbird)). -* Fix column alias resolution of JOIN queries when projection is enabled. This fixes [#30146](https://github.com/ClickHouse/ClickHouse/issues/30146). [#30293](https://github.com/ClickHouse/ClickHouse/pull/30293) ([Amos Bird](https://github.com/amosbird)). -* Fix some deficiency in `replaceRegexpAll` function. [#30292](https://github.com/ClickHouse/ClickHouse/pull/30292) ([Memo](https://github.com/Joeywzr)). -* Fix ComplexKeyHashedDictionary, ComplexKeySparseHashedDictionary parsing `preallocate` option from layout config. [#30246](https://github.com/ClickHouse/ClickHouse/pull/30246) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix `[I]LIKE` function. Closes [#28661](https://github.com/ClickHouse/ClickHouse/issues/28661). [#30244](https://github.com/ClickHouse/ClickHouse/pull/30244) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix crash with shortcircuit and lowcardinality in multiIf. [#30243](https://github.com/ClickHouse/ClickHouse/pull/30243) ([Raúl Marín](https://github.com/Algunenano)). -* FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes. [#30238](https://github.com/ClickHouse/ClickHouse/pull/30238) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow identifiers starting with numbers in multiple joins. [#30230](https://github.com/ClickHouse/ClickHouse/pull/30230) ([Vladimir C](https://github.com/vdimir)). -* Fix reading from `MergeTree` with `max_read_buffer_size = 0` (when the user wants to shoot himself in the foot) (can lead to exceptions `Can't adjust last granule`, `LOGICAL_ERROR`, or even data loss). [#30192](https://github.com/ClickHouse/ClickHouse/pull/30192) ([Azat Khuzhin](https://github.com/azat)). -* Fix `pread_fake_async`/`pread_threadpool` with `min_bytes_to_use_direct_io`. [#30191](https://github.com/ClickHouse/ClickHouse/pull/30191) ([Azat Khuzhin](https://github.com/azat)). -* Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column. [#30189](https://github.com/ClickHouse/ClickHouse/pull/30189) ([Azat Khuzhin](https://github.com/azat)). -* Support nullable arguments in function `initializeAggregation`. [#30177](https://github.com/ClickHouse/ClickHouse/pull/30177) ([Anton Popov](https://github.com/CurtizJ)). -* Fix error `Port is already connected` for queries with `GLOBAL IN` and `WITH TOTALS`. Only for 21.9 and 21.10. [#30086](https://github.com/ClickHouse/ClickHouse/pull/30086) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix race between MOVE PARTITION and merges/mutations for MergeTree. [#30074](https://github.com/ClickHouse/ClickHouse/pull/30074) ([Azat Khuzhin](https://github.com/azat)). -* Dropped `Memory` database might reappear after server restart, it's fixed ([#29795](https://github.com/ClickHouse/ClickHouse/issues/29795)). Also added `force_remove_data_recursively_on_drop` setting as a workaround for `Directory not empty` error when dropping `Ordinary` database (because it's not possible to remove data leftovers manually in cloud environment). [#30054](https://github.com/ClickHouse/ClickHouse/pull/30054) ([tavplubix](https://github.com/tavplubix)). -* Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). -* try to close issue: [#29965](https://github.com/ClickHouse/ClickHouse/issues/29965). [#29976](https://github.com/ClickHouse/ClickHouse/pull/29976) ([hexiaoting](https://github.com/hexiaoting)). -* Fix possible data-race between `FileChecker` and `StorageLog`/`StorageStripeLog`. [#29959](https://github.com/ClickHouse/ClickHouse/pull/29959) ([Azat Khuzhin](https://github.com/azat)). -* Fix data-race between `LogSink::writeMarks()` and `LogSource` in `StorageLog`. [#29946](https://github.com/ClickHouse/ClickHouse/pull/29946) ([Azat Khuzhin](https://github.com/azat)). -* Fix potential resource leak of the concurrent query limit of merge tree tables introduced in https://github.com/ClickHouse/ClickHouse/pull/19544. [#29879](https://github.com/ClickHouse/ClickHouse/pull/29879) ([Amos Bird](https://github.com/amosbird)). -* Fix system tables recreation check (fails to detect changes in enum values). [#29857](https://github.com/ClickHouse/ClickHouse/pull/29857) ([Azat Khuzhin](https://github.com/azat)). -* MaterializedMySQL: Fix an issue where if the connection to MySQL was lost, only parts of a transaction could be processed. [#29837](https://github.com/ClickHouse/ClickHouse/pull/29837) ([Håvard Kvålen](https://github.com/havardk)). -* Avoid `Timeout exceeded: elapsed 18446744073.709553 seconds` error that might happen in extremely rare cases, presumably due to some bug in kernel. Fixes [#29154](https://github.com/ClickHouse/ClickHouse/issues/29154). [#29811](https://github.com/ClickHouse/ClickHouse/pull/29811) ([tavplubix](https://github.com/tavplubix)). -* Fix bad cast in `ATTACH TABLE ... FROM 'path'` query when non-string literal is used instead of path. It may lead to reading of uninitialized memory. [#29790](https://github.com/ClickHouse/ClickHouse/pull/29790) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix concurrent access to `LowCardinality` during `GROUP BY` (in combination with `Buffer` tables it may lead to troubles). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect `GROUP BY` (multiple rows with the same keys in result) in case of distributed query when shards had mixed versions `<= 21.3` and `>= 21.4`, `GROUP BY` key had several columns all with fixed size, and two-level aggregation was activated (see `group_by_two_level_threshold` and `group_by_two_level_threshold_bytes`). Fixes [#29580](https://github.com/ClickHouse/ClickHouse/issues/29580). [#29735](https://github.com/ClickHouse/ClickHouse/pull/29735) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed incorrect behaviour of setting `materialized_postgresql_tables_list` at server restart. Found in [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#29686](https://github.com/ClickHouse/ClickHouse/pull/29686) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix JIT expression compilation with aliases and short-circuit expression evaluation. Closes [#29403](https://github.com/ClickHouse/ClickHouse/issues/29403). [#29574](https://github.com/ClickHouse/ClickHouse/pull/29574) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix rare segfault in `ALTER MODIFY` query when using incorrect table identifier in `DEFAULT` expression like `x.y.z...` Fixes [#29184](https://github.com/ClickHouse/ClickHouse/issues/29184). [#29573](https://github.com/ClickHouse/ClickHouse/pull/29573) ([alesapin](https://github.com/alesapin)). -* Fix nullptr deference for `GROUP BY WITH TOTALS HAVING` (when the column from `HAVING` wasn't selected). [#29553](https://github.com/ClickHouse/ClickHouse/pull/29553) ([Azat Khuzhin](https://github.com/azat)). -* Avoid deadlocks when reading and writting on Join table engine tables at the same time. [#29544](https://github.com/ClickHouse/ClickHouse/pull/29544) ([Raúl Marín](https://github.com/Algunenano)). -* Fix bug in check `pathStartsWith` becuase there was bug with the usage of `std::mismatch`: ` The behavior is undefined if the second range is shorter than the first range.`. [#29531](https://github.com/ClickHouse/ClickHouse/pull/29531) ([Kseniia Sumarokova](https://github.com/kssenii)). -* In ODBC bridge add retries for error Invalid cursor state. It is a retriable error. Closes [#29473](https://github.com/ClickHouse/ClickHouse/issues/29473). [#29518](https://github.com/ClickHouse/ClickHouse/pull/29518) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed incorrect table name parsing on loading of `Lazy` database. Fixes [#29456](https://github.com/ClickHouse/ClickHouse/issues/29456). [#29476](https://github.com/ClickHouse/ClickHouse/pull/29476) ([tavplubix](https://github.com/tavplubix)). -* Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix Logical error `Cannot capture columns` in functions greatest/least. Closes [#29334](https://github.com/ClickHouse/ClickHouse/issues/29334). [#29454](https://github.com/ClickHouse/ClickHouse/pull/29454) ([Kruglov Pavel](https://github.com/Avogar)). -* RocksDB table engine: fix race condition during multiple DB opening (and get back some tests that triggers the problem on CI). [#29393](https://github.com/ClickHouse/ClickHouse/pull/29393) ([Azat Khuzhin](https://github.com/azat)). -* Fix replicated access storage not shutting down cleanly when misconfigured. [#29388](https://github.com/ClickHouse/ClickHouse/pull/29388) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Remove window function `nth_value` as it is not memory-safe. This closes [#29347](https://github.com/ClickHouse/ClickHouse/issues/29347). [#29348](https://github.com/ClickHouse/ClickHouse/pull/29348) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix vertical merges of projection parts. This fixes [#29253](https://github.com/ClickHouse/ClickHouse/issues/29253) . This PR also fixes several projection merge/mutation issues introduced in https://github.com/ClickHouse/ClickHouse/pull/25165. [#29337](https://github.com/ClickHouse/ClickHouse/pull/29337) ([Amos Bird](https://github.com/amosbird)). -* Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Fix connection timeouts (`send_timeout`/`receive_timeout`). [#29282](https://github.com/ClickHouse/ClickHouse/pull/29282) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible `Table columns structure in ZooKeeper is different from local table structure` exception while recreating or creating new replicas of `ReplicatedMergeTree`, when one of table columns have default expressions with case-insensitive functions. [#29266](https://github.com/ClickHouse/ClickHouse/pull/29266) ([Anton Popov](https://github.com/CurtizJ)). -* Send normal `Database doesn't exist error` (`UNKNOWN_DATABASE`) to the client (via TCP) instead of `Attempt to read after eof` (`ATTEMPT_TO_READ_AFTER_EOF`). [#29229](https://github.com/ClickHouse/ClickHouse/pull/29229) ([Azat Khuzhin](https://github.com/azat)). -* Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). -* Do not allow to reuse previous credentials in case of inter-server secret (Before INSERT via Buffer/Kafka to Distributed table with interserver secret configured for that cluster, may re-use previously set user for that connection). [#29060](https://github.com/ClickHouse/ClickHouse/pull/29060) ([Azat Khuzhin](https://github.com/azat)). -* Handle `any_join_distinct_right_table_keys` when join with dictionary, close [#29007](https://github.com/ClickHouse/ClickHouse/issues/29007). [#29014](https://github.com/ClickHouse/ClickHouse/pull/29014) ([Vladimir C](https://github.com/vdimir)). -* Fix "Not found column ... in block" error, when join on alias column, close [#26980](https://github.com/ClickHouse/ClickHouse/issues/26980). [#29008](https://github.com/ClickHouse/ClickHouse/pull/29008) ([Vladimir C](https://github.com/vdimir)). -* Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bad optimizations of ORDER BY if it contains WITH FILL. This closes [#28908](https://github.com/ClickHouse/ClickHouse/issues/28908). This closes [#26049](https://github.com/ClickHouse/ClickHouse/issues/26049). [#28910](https://github.com/ClickHouse/ClickHouse/pull/28910) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). -* Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). -* Fix queries to external databases (i.e. MySQL) with multiple columns in IN ( i.e. `(k,v) IN ((1, 2))` ). [#28888](https://github.com/ClickHouse/ClickHouse/pull/28888) ([Azat Khuzhin](https://github.com/azat)). -* Fix bug with `LowCardinality` in short-curcuit function evaluation. Closes [#28884](https://github.com/ClickHouse/ClickHouse/issues/28884). [#28887](https://github.com/ClickHouse/ClickHouse/pull/28887) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed a race condition between `DROP PART` and `REPLACE/MOVE PARTITION` that might cause replicas to diverge in rare cases. [#28864](https://github.com/ClickHouse/ClickHouse/pull/28864) ([tavplubix](https://github.com/tavplubix)). -* Fix expressions compilation with short circuit evaluation. [#28821](https://github.com/ClickHouse/ClickHouse/pull/28821) ([Azat Khuzhin](https://github.com/azat)). -* Fix extremely rare case when ReplicatedMergeTree replicas can diverge after hard reboot of all replicas. The error looks like `Part ... intersects (previous|next) part ...`. [#28817](https://github.com/ClickHouse/ClickHouse/pull/28817) ([alesapin](https://github.com/alesapin)). -* Better check for connection usability and also catch any exception in `RabbitMQ` shutdown just in case. [#28797](https://github.com/ClickHouse/ClickHouse/pull/28797) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). -* Fix possible crash for `SELECT` with partially created aggregate projection in case of exception. [#28700](https://github.com/ClickHouse/ClickHouse/pull/28700) ([Amos Bird](https://github.com/amosbird)). -* Fix the coredump in the creation of distributed tables, when the parameters passed in are wrong. [#28686](https://github.com/ClickHouse/ClickHouse/pull/28686) ([Zhiyong Wang](https://github.com/ljcui)). -* Add Settings.Names, Settings.Values aliases for system.processes table. [#28685](https://github.com/ClickHouse/ClickHouse/pull/28685) ([Vitaly](https://github.com/orloffv)). -* Support for S2 Geometry library: Fix the number of arguments required by `s2RectAdd` and `s2RectContains` functions. [#28663](https://github.com/ClickHouse/ClickHouse/pull/28663) ([Bharat Nallan](https://github.com/bharatnc)). -* Fix invalid constant type conversion when Nullable or LowCardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). -* Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). - - -### ClickHouse release v21.10, 2021-10-16 - -#### Backward Incompatible Change - -* Now the following MergeTree table-level settings: `replicated_max_parallel_sends`, `replicated_max_parallel_sends_for_table`, `replicated_max_parallel_fetches`, `replicated_max_parallel_fetches_for_table` do nothing. They never worked well and were replaced with `max_replicated_fetches_network_bandwidth`, `max_replicated_sends_network_bandwidth` and `background_fetches_pool_size`. [#28404](https://github.com/ClickHouse/ClickHouse/pull/28404) ([alesapin](https://github.com/alesapin)). - -#### New Feature - -* Add feature for creating user-defined functions (UDF) as lambda expressions. Syntax `CREATE FUNCTION {function_name} as ({parameters}) -> {function core}`. Example `CREATE FUNCTION plus_one as (a) -> a + 1`. Authors @Realist007. [#27796](https://github.com/ClickHouse/ClickHouse/pull/27796) ([Maksim Kita](https://github.com/kitaisreal)) [#23978](https://github.com/ClickHouse/ClickHouse/pull/23978) ([Realist007](https://github.com/Realist007)). -* Added `Executable` storage engine and `executable` table function. It enables data processing with external scripts in streaming fashion. [#28102](https://github.com/ClickHouse/ClickHouse/pull/28102) ([Maksim Kita](https://github.com/kitaisreal)) ([ruct](https://github.com/ruct)). -* Added `ExecutablePool` storage engine. Similar to `Executable` but it's using a pool of long running processes. [#28518](https://github.com/ClickHouse/ClickHouse/pull/28518) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `ALTER TABLE ... MATERIALIZE COLUMN` query. [#27038](https://github.com/ClickHouse/ClickHouse/pull/27038) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Support for partitioned write into `s3` table function. [#23051](https://github.com/ClickHouse/ClickHouse/pull/23051) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Support `lz4` compression format (in addition to `gz`, `bz2`, `xz`, `zstd`) for data import / export. [#25310](https://github.com/ClickHouse/ClickHouse/pull/25310) ([Bharat Nallan](https://github.com/bharatnc)). -* Allow positional arguments under setting `enable_positional_arguments`. Closes [#2592](https://github.com/ClickHouse/ClickHouse/issues/2592). [#27530](https://github.com/ClickHouse/ClickHouse/pull/27530) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Accept user settings related to file formats in `SETTINGS` clause in `CREATE` query for s3 tables. This closes [#27580](https://github.com/ClickHouse/ClickHouse/issues/27580). [#28037](https://github.com/ClickHouse/ClickHouse/pull/28037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow SSL connection for `RabbitMQ` engine. [#28365](https://github.com/ClickHouse/ClickHouse/pull/28365) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add `getServerPort` function to allow getting server port. When the port is not used by the server, throw an exception. [#27900](https://github.com/ClickHouse/ClickHouse/pull/27900) ([Amos Bird](https://github.com/amosbird)). -* Add conversion functions between "snowflake id" and `DateTime`, `DateTime64`. See [#27058](https://github.com/ClickHouse/ClickHouse/issues/27058). [#27704](https://github.com/ClickHouse/ClickHouse/pull/27704) ([jasine](https://github.com/jasine)). -* Add function `SHA512`. [#27830](https://github.com/ClickHouse/ClickHouse/pull/27830) ([zhanglistar](https://github.com/zhanglistar)). -* Add `log_queries_probability` setting that allows user to write to query_log only a sample of queries. Closes [#16609](https://github.com/ClickHouse/ClickHouse/issues/16609). [#27527](https://github.com/ClickHouse/ClickHouse/pull/27527) ([Nikolay Degterinsky](https://github.com/evillique)). - -#### Experimental Feature - -* `web` type of disks to store readonly tables on web server in form of static files. See [#23982](https://github.com/ClickHouse/ClickHouse/issues/23982). [#25251](https://github.com/ClickHouse/ClickHouse/pull/25251) ([Kseniia Sumarokova](https://github.com/kssenii)). This is mostly needed to faciliate testing of operation on shared storage and for easy importing of datasets. Not recommended to use before release 21.11. -* Added new commands `BACKUP` and `RESTORE`. [#21945](https://github.com/ClickHouse/ClickHouse/pull/21945) ([Vitaly Baranov](https://github.com/vitlibar)). This is under development and not intended to be used in current version. - -#### Performance Improvement - -* Speed up `sumIf` and `countIf` aggregation functions. [#28272](https://github.com/ClickHouse/ClickHouse/pull/28272) ([Raúl Marín](https://github.com/Algunenano)). -* Create virtual projection for `minmax` indices. Now, when `allow_experimental_projection_optimization` is enabled, queries will use minmax index instead of reading the data when possible. [#26286](https://github.com/ClickHouse/ClickHouse/pull/26286) ([Amos Bird](https://github.com/amosbird)). -* Introducing two checks in `sequenceMatch` and `sequenceCount` that allow for early exit when some deterministic part of the sequence pattern is missing from the events list. This change unlocks many queries that would previously fail due to reaching operations cap, and generally speeds up the pipeline. [#27729](https://github.com/ClickHouse/ClickHouse/pull/27729) ([Jakub Kuklis](https://github.com/jkuklis)). -* Enhance primary key analysis with always monotonic information of binary functions, notably non-zero constant division. [#28302](https://github.com/ClickHouse/ClickHouse/pull/28302) ([Amos Bird](https://github.com/amosbird)). -* Make `hasAll` filter condition leverage bloom filter data-skipping indexes. [#27984](https://github.com/ClickHouse/ClickHouse/pull/27984) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). -* Speed up data parts loading by delaying table startup process. [#28313](https://github.com/ClickHouse/ClickHouse/pull/28313) ([Amos Bird](https://github.com/amosbird)). -* Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). -* Enable `optimize_distributed_group_by_sharding_key` by default. [#28105](https://github.com/ClickHouse/ClickHouse/pull/28105) ([Azat Khuzhin](https://github.com/azat)). - -#### Improvement - -* Check cluster name before creating `Distributed` table, do not allow to create a table with incorrect cluster name. Fixes [#27832](https://github.com/ClickHouse/ClickHouse/issues/27832). [#27927](https://github.com/ClickHouse/ClickHouse/pull/27927) ([tavplubix](https://github.com/tavplubix)). -* Add aggregate function `quantileBFloat16Weighted` similarly to other quantile...Weighted functions. This closes [#27745](https://github.com/ClickHouse/ClickHouse/issues/27745). [#27758](https://github.com/ClickHouse/ClickHouse/pull/27758) ([Ivan Novitskiy](https://github.com/RedClusive)). -* Allow to create dictionaries with empty attributes list. [#27905](https://github.com/ClickHouse/ClickHouse/pull/27905) ([Maksim Kita](https://github.com/kitaisreal)). -* Add interactive documentation in `clickhouse-client` about how to reset the password. This is useful in scenario when user has installed ClickHouse, set up the password and instantly forget it. See [#27750](https://github.com/ClickHouse/ClickHouse/issues/27750). [#27903](https://github.com/ClickHouse/ClickHouse/pull/27903) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support the case when the data is enclosed in array in `JSONAsString` input format. Closes [#25517](https://github.com/ClickHouse/ClickHouse/issues/25517). [#25633](https://github.com/ClickHouse/ClickHouse/pull/25633) ([Kruglov Pavel](https://github.com/Avogar)). -* Add new column `last_queue_update_exception` to `system.replicas` table. [#26843](https://github.com/ClickHouse/ClickHouse/pull/26843) ([nvartolomei](https://github.com/nvartolomei)). -* Support reconnections on failover for `MaterializedPostgreSQL` tables. Closes [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#28614](https://github.com/ClickHouse/ClickHouse/pull/28614) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Generate a unique server UUID on first server start. [#20089](https://github.com/ClickHouse/ClickHouse/pull/20089) ([Bharat Nallan](https://github.com/bharatnc)). -* Introduce `connection_wait_timeout` (default to 5 seconds, 0 - do not wait) setting for `MySQL` engine. [#28474](https://github.com/ClickHouse/ClickHouse/pull/28474) ([Azat Khuzhin](https://github.com/azat)). -* Do not allow creating `MaterializedPostgreSQL` with bad arguments. Closes [#28423](https://github.com/ClickHouse/ClickHouse/issues/28423). [#28430](https://github.com/ClickHouse/ClickHouse/pull/28430) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Use real tmp file instead of predefined "rows_sources" for vertical merges. This avoids generating garbage directories in tmp disks. [#28299](https://github.com/ClickHouse/ClickHouse/pull/28299) ([Amos Bird](https://github.com/amosbird)). -* Added `libhdfs3_conf` in server config instead of export env `LIBHDFS3_CONF` in clickhouse-server.service. This is for configuration of interaction with HDFS. [#28268](https://github.com/ClickHouse/ClickHouse/pull/28268) ([Zhichang Yu](https://github.com/yuzhichang)). -* Fix removing of parts in a Temporary state which can lead to an unexpected exception (`Part %name% doesn't exist`). Fixes [#23661](https://github.com/ClickHouse/ClickHouse/issues/23661). [#28221](https://github.com/ClickHouse/ClickHouse/pull/28221) [#28221](https://github.com/ClickHouse/ClickHouse/issues/28221)) ([Azat Khuzhin](https://github.com/azat)). -* Fix `zookeeper_log.address` (before the first patch in this PR the address was always `::`) and reduce number of calls `getpeername(2)` for this column (since each time entry for `zookeeper_log` is added `getpeername()` is called, cache this address in the zookeeper client to avoid this). [#28212](https://github.com/ClickHouse/ClickHouse/pull/28212) ([Azat Khuzhin](https://github.com/azat)). -* Support implicit conversions between index in operator `[]` and key of type `Map` (e.g. different `Int` types, `String` and `FixedString`). [#28096](https://github.com/ClickHouse/ClickHouse/pull/28096) ([Anton Popov](https://github.com/CurtizJ)). -* Support `ON CONFLICT` clause when inserting into PostgreSQL table engine or table function. Closes [#27727](https://github.com/ClickHouse/ClickHouse/issues/27727). [#28081](https://github.com/ClickHouse/ClickHouse/pull/28081) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Lower restrictions for `Enum` data type to allow attaching compatible data. Closes [#26672](https://github.com/ClickHouse/ClickHouse/issues/26672). [#28028](https://github.com/ClickHouse/ClickHouse/pull/28028) ([Dmitry Novik](https://github.com/novikd)). -* Add a setting `empty_result_for_aggregation_by_constant_keys_on_empty_set` to control the behavior of grouping by constant keys on empty set. This is to bring back the old baviour of [#6842](https://github.com/ClickHouse/ClickHouse/issues/6842). [#27932](https://github.com/ClickHouse/ClickHouse/pull/27932) ([Amos Bird](https://github.com/amosbird)). -* Added `replication_wait_for_inactive_replica_timeout` setting. It allows to specify how long to wait for inactive replicas to execute `ALTER`/`OPTIMZE`/`TRUNCATE` query (default is 120 seconds). If `replication_alter_partitions_sync` is 2 and some replicas are not active for more than `replication_wait_for_inactive_replica_timeout` seconds, then `UNFINISHED` will be thrown. [#27931](https://github.com/ClickHouse/ClickHouse/pull/27931) ([tavplubix](https://github.com/tavplubix)). -* Support lambda argument for `APPLY` column transformer which allows applying functions with more than one argument. This is for [#27877](https://github.com/ClickHouse/ClickHouse/issues/27877). [#27901](https://github.com/ClickHouse/ClickHouse/pull/27901) ([Amos Bird](https://github.com/amosbird)). -* Enable `tcp_keep_alive_timeout` by default. [#27882](https://github.com/ClickHouse/ClickHouse/pull/27882) ([Azat Khuzhin](https://github.com/azat)). -* Improve remote query cancelation (in case of remote server abnormaly terminated). [#27881](https://github.com/ClickHouse/ClickHouse/pull/27881) ([Azat Khuzhin](https://github.com/azat)). -* Use Multipart copy upload for large S3 objects. [#27858](https://github.com/ClickHouse/ClickHouse/pull/27858) ([ianton-ru](https://github.com/ianton-ru)). -* Allow symlink traversal for library dictionaty path. [#27815](https://github.com/ClickHouse/ClickHouse/pull/27815) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Now `ALTER MODIFY COLUM` `T` to `Nullable(T)` doesn't require mutation. [#27787](https://github.com/ClickHouse/ClickHouse/pull/27787) ([victorgao](https://github.com/kafka1991)). -* Don't silently ignore errors and don't count delays in `ReadBufferFromS3`. [#27484](https://github.com/ClickHouse/ClickHouse/pull/27484) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Improve `ALTER ... MATERIALIZE TTL` by recalculating metadata only without actual TTL action. [#27019](https://github.com/ClickHouse/ClickHouse/pull/27019) ([lthaooo](https://github.com/lthaooo)). -* Allow reading the list of custom top level domains without a new line at EOF. [#28213](https://github.com/ClickHouse/ClickHouse/pull/28213) ([Azat Khuzhin](https://github.com/azat)). - -#### Bug Fix - -* Fix cases, when reading compressed data from `carbon-clickhouse` fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([FArthur-cmd](https://github.com/FArthur-cmd)). -* Fix checking access grants when executing `GRANT WITH REPLACE` statement with `ON CLUSTER` clause. This PR improves fix [#27001](https://github.com/ClickHouse/ClickHouse/pull/27701). [#27983](https://github.com/ClickHouse/ClickHouse/pull/27983) ([Vitaly Baranov](https://github.com/vitlibar)). -* Allow selecting with `extremes = 1` from a column of the type `LowCardinality(UUID)`. [#27918](https://github.com/ClickHouse/ClickHouse/pull/27918) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). -* After [#26864](https://github.com/ClickHouse/ClickHouse/pull/26864). Fix shutdown of `NamedSessionStorage`: session contexts stored in `NamedSessionStorage` are now destroyed before destroying the global context. [#27875](https://github.com/ClickHouse/ClickHouse/pull/27875) ([Vitaly Baranov](https://github.com/vitlibar)). -* Bugfix for `windowFunnel` "strict" mode. This fixes [#27469](https://github.com/ClickHouse/ClickHouse/issues/27469). [#27563](https://github.com/ClickHouse/ClickHouse/pull/27563) ([achimbab](https://github.com/achimbab)). -* Fix infinite loop while reading truncated `bzip2` archive. [#28543](https://github.com/ClickHouse/ClickHouse/pull/28543) ([Azat Khuzhin](https://github.com/azat)). -* Fix UUID overlap in `DROP TABLE` for internal DDL from `MaterializedMySQL`. MaterializedMySQL is an experimental feature. [#28533](https://github.com/ClickHouse/ClickHouse/pull/28533) ([Azat Khuzhin](https://github.com/azat)). -* Fix `There is no subcolumn` error, while select from tables, which have `Nested` columns and scalar columns with dot in name and the same prefix as `Nested` (e.g. `n.id UInt32, n.arr1 Array(UInt64), n.arr2 Array(UInt64)`). [#28531](https://github.com/ClickHouse/ClickHouse/pull/28531) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after ALTER of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). -* Fixed possible ZooKeeper watches leak (minor issue) on background processing of distributed DDL queue. Closes [#26036](https://github.com/ClickHouse/ClickHouse/issues/26036). [#28446](https://github.com/ClickHouse/ClickHouse/pull/28446) ([tavplubix](https://github.com/tavplubix)). -* Fix missing quoting of table names in `MaterializedPostgreSQL` engine. Closes [#28316](https://github.com/ClickHouse/ClickHouse/issues/28316). [#28433](https://github.com/ClickHouse/ClickHouse/pull/28433) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix the wrong behaviour of non joined rows from nullable column. Close [#27691](https://github.com/ClickHouse/ClickHouse/issues/27691). [#28349](https://github.com/ClickHouse/ClickHouse/pull/28349) ([vdimir](https://github.com/vdimir)). -* Fix NOT-IN index optimization when not all key columns are used. This fixes [#28120](https://github.com/ClickHouse/ClickHouse/issues/28120). [#28315](https://github.com/ClickHouse/ClickHouse/pull/28315) ([Amos Bird](https://github.com/amosbird)). -* Fix intersecting parts due to new part had been replaced with an empty part. [#28310](https://github.com/ClickHouse/ClickHouse/pull/28310) ([Azat Khuzhin](https://github.com/azat)). -* Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible read of uninitialized memory for queries with `Nullable(LowCardinality)` type and the setting `extremes` set to 1. Fixes [#28165](https://github.com/ClickHouse/ClickHouse/issues/28165). [#28205](https://github.com/ClickHouse/ClickHouse/pull/28205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Multiple small fixes for projections. See detailed description in the PR. [#28178](https://github.com/ClickHouse/ClickHouse/pull/28178) ([Amos Bird](https://github.com/amosbird)). -* Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). -* Fix handling null value with type of `Nullable(String)` in function `JSONExtract`. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930). This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). -* Multiple fixes for the new `clickhouse-keeper` tool. Fix a rare bug in `clickhouse-keeper` when the client can receive a watch response before request-response. [#28197](https://github.com/ClickHouse/ClickHouse/pull/28197) ([alesapin](https://github.com/alesapin)). Fix incorrect behavior in `clickhouse-keeper` when list watches (`getChildren`) triggered with `set` requests for children. [#28190](https://github.com/ClickHouse/ClickHouse/pull/28190) ([alesapin](https://github.com/alesapin)). Fix rare case when changes of `clickhouse-keeper` settings may lead to lost logs and server hung. [#28360](https://github.com/ClickHouse/ClickHouse/pull/28360) ([alesapin](https://github.com/alesapin)). Fix bug in `clickhouse-keeper` which can lead to endless logs when `rotate_logs_interval` decreased. [#28152](https://github.com/ClickHouse/ClickHouse/pull/28152) ([alesapin](https://github.com/alesapin)). - -#### Build/Testing/Packaging Improvement - -* Enable Thread Fuzzer in Stress Test. Thread Fuzzer is ClickHouse feature that allows to test more permutations of thread scheduling and discover more potential issues. This closes [#9813](https://github.com/ClickHouse/ClickHouse/issues/9813). This closes [#9814](https://github.com/ClickHouse/ClickHouse/issues/9814). This closes [#9515](https://github.com/ClickHouse/ClickHouse/issues/9515). This closes [#9516](https://github.com/ClickHouse/ClickHouse/issues/9516). [#27538](https://github.com/ClickHouse/ClickHouse/pull/27538) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add new log level `test` for testing environments. It is even more verbose than the default `trace`. [#28559](https://github.com/ClickHouse/ClickHouse/pull/28559) ([alesapin](https://github.com/alesapin)). -* Print out git status information at CMake configure stage. [#28047](https://github.com/ClickHouse/ClickHouse/pull/28047) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). -* Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as the default one (archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). - - - -### ClickHouse release v21.9, 2021-09-09 - -#### Backward Incompatible Change - -* Do not output trailing zeros in text representation of `Decimal` types. Example: `1.23` will be printed instead of `1.230000` for decimal with scale 6. This closes [#15794](https://github.com/ClickHouse/ClickHouse/issues/15794). It may introduce slight incompatibility if your applications somehow relied on the trailing zeros. Serialization in output formats can be controlled with the setting `output_format_decimal_trailing_zeros`. Implementation of `toString` and casting to String is changed unconditionally. [#27680](https://github.com/ClickHouse/ClickHouse/pull/27680) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Do not allow to apply parametric aggregate function with `-Merge` combinator to aggregate function state if state was produced by aggregate function with different parameters. For example, state of `fooState(42)(x)` cannot be finalized with `fooMerge(s)` or `fooMerge(123)(s)`, parameters must be specified explicitly like `fooMerge(42)(s)` and must be equal. It does not affect some special aggregate functions like `quantile` and `sequence*` that use parameters for finalization only. [#26847](https://github.com/ClickHouse/ClickHouse/pull/26847) ([tavplubix](https://github.com/tavplubix)). -* Under clickhouse-local, always treat local addresses with a port as remote. [#26736](https://github.com/ClickHouse/ClickHouse/pull/26736) ([Raúl Marín](https://github.com/Algunenano)). -* Fix the issue that in case of some sophisticated query with column aliases identical to the names of expressions, bad cast may happen. This fixes [#25447](https://github.com/ClickHouse/ClickHouse/issues/25447). This fixes [#26914](https://github.com/ClickHouse/ClickHouse/issues/26914). This fix may introduce backward incompatibility: if there are different expressions with identical names, exception will be thrown. It may break some rare cases when `enable_optimize_predicate_expression` is set. [#26639](https://github.com/ClickHouse/ClickHouse/pull/26639) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Now, scalar subquery always returns `Nullable` result if it's type can be `Nullable`. It is needed because in case of empty subquery it's result should be `Null`. Previously, it was possible to get error about incompatible types (type deduction does not execute scalar subquery, and it could use not-nullable type). Scalar subquery with empty result which can't be converted to `Nullable` (like `Array` or `Tuple`) now throws error. Fixes [#25411](https://github.com/ClickHouse/ClickHouse/issues/25411). [#26423](https://github.com/ClickHouse/ClickHouse/pull/26423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Introduce syntax for here documents. Example `SELECT $doc$ VALUE $doc$`. [#26671](https://github.com/ClickHouse/ClickHouse/pull/26671) ([Maksim Kita](https://github.com/kitaisreal)). This change is backward incompatible if in query there are identifiers that contain `$` [#28768](https://github.com/ClickHouse/ClickHouse/issues/28768). -* Now indices can handle Nullable types, including `isNull` and `isNotNull`. [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) and [#12455](https://github.com/ClickHouse/ClickHouse/pull/12455) ([Amos Bird](https://github.com/amosbird)) and [#27250](https://github.com/ClickHouse/ClickHouse/pull/27250) ([Azat Khuzhin](https://github.com/azat)). But this was done with on-disk format changes, and even though new server can read old data, old server cannot. Also, in case you have `MINMAX` data skipping indices, you may get `Data after mutation/merge is not byte-identical` error, since new index will have `.idx2` extension while before it was `.idx`. That said, that you should not delay updating all existing replicas, in this case, otherwise, if old replica (<21.9) will download data from new replica with 21.9+ it will not be able to apply index for downloaded part. - -#### New Feature - -* Implementation of short circuit function evaluation, closes [#12587](https://github.com/ClickHouse/ClickHouse/issues/12587). Add settings `short_circuit_function_evaluation` to configure short circuit function evaluation. [#23367](https://github.com/ClickHouse/ClickHouse/pull/23367) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for INTERSECT, EXCEPT, ANY, ALL operators. [#24757](https://github.com/ClickHouse/ClickHouse/pull/24757) ([Kirill Ershov](https://github.com/zdikov)). ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add support for encryption at the virtual file system level (data encryption at rest) using AES-CTR algorithm. [#24206](https://github.com/ClickHouse/ClickHouse/pull/24206) ([Latysheva Alexandra](https://github.com/alexelex)). ([Vitaly Baranov](https://github.com/vitlibar)) [#26733](https://github.com/ClickHouse/ClickHouse/pull/26733) [#26377](https://github.com/ClickHouse/ClickHouse/pull/26377) [#26465](https://github.com/ClickHouse/ClickHouse/pull/26465). -* Added natural language processing (NLP) functions for tokenization, stemming, lemmatizing and search in synonyms extensions. [#24997](https://github.com/ClickHouse/ClickHouse/pull/24997) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added integration with S2 geometry library. [#24980](https://github.com/ClickHouse/ClickHouse/pull/24980) ([Andr0901](https://github.com/Andr0901)). ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add SQLite table engine, table function, database engine. [#24194](https://github.com/ClickHouse/ClickHouse/pull/24194) ([Arslan Gumerov](https://github.com/g-arslan)). ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added support for custom query for `MySQL`, `PostgreSQL`, `ClickHouse`, `JDBC`, `Cassandra` dictionary source. Closes [#1270](https://github.com/ClickHouse/ClickHouse/issues/1270). [#26995](https://github.com/ClickHouse/ClickHouse/pull/26995) ([Maksim Kita](https://github.com/kitaisreal)). -* Add shared (replicated) storage of user, roles, row policies, quotas and settings profiles through ZooKeeper. [#27426](https://github.com/ClickHouse/ClickHouse/pull/27426) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Add compression for `INTO OUTFILE` that automatically choose compression algorithm. Closes [#3473](https://github.com/ClickHouse/ClickHouse/issues/3473). [#27134](https://github.com/ClickHouse/ClickHouse/pull/27134) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Add `INSERT ... FROM INFILE` similarly to `SELECT ... INTO OUTFILE`. [#27655](https://github.com/ClickHouse/ClickHouse/pull/27655) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Added `complex_key_range_hashed` dictionary. Closes [#22029](https://github.com/ClickHouse/ClickHouse/issues/22029). [#27629](https://github.com/ClickHouse/ClickHouse/pull/27629) ([Maksim Kita](https://github.com/kitaisreal)). -* Support expressions in JOIN ON section. Close [#21868](https://github.com/ClickHouse/ClickHouse/issues/21868). [#24420](https://github.com/ClickHouse/ClickHouse/pull/24420) ([Vladimir C](https://github.com/vdimir)). -* When client connects to server, it receives information about all warnings that are already were collected by server. (It can be disabled by using option `--no-warnings`). Add `system.warnings` table to collect warnings about server configuration. [#26246](https://github.com/ClickHouse/ClickHouse/pull/26246) ([Filatenkov Artur](https://github.com/FArthur-cmd)). [#26282](https://github.com/ClickHouse/ClickHouse/pull/26282) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Allow using constant expressions from with and select in aggregate function parameters. Close [#10945](https://github.com/ClickHouse/ClickHouse/issues/10945). [#27531](https://github.com/ClickHouse/ClickHouse/pull/27531) ([abel-cheng](https://github.com/abel-cheng)). -* Add `tupleToNameValuePairs`, a function that turns a named tuple into an array of pairs. [#27505](https://github.com/ClickHouse/ClickHouse/pull/27505) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). -* Add support for `bzip2` compression method for import/export. Closes [#22428](https://github.com/ClickHouse/ClickHouse/issues/22428). [#27377](https://github.com/ClickHouse/ClickHouse/pull/27377) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added `bitmapSubsetOffsetLimit(bitmap, offset, cardinality_limit)` function. It creates a subset of bitmap limit the results to `cardinality_limit` with offset of `offset`. [#27234](https://github.com/ClickHouse/ClickHouse/pull/27234) ([DHBin](https://github.com/DHBin)). -* Add column `default_database` to `system.users`. [#27054](https://github.com/ClickHouse/ClickHouse/pull/27054) ([kevin wan](https://github.com/MaxWk)). -* Supported `cluster` macros inside table functions 'cluster' and 'clusterAllReplicas'. [#26913](https://github.com/ClickHouse/ClickHouse/pull/26913) ([polyprogrammist](https://github.com/PolyProgrammist)). -* Add new functions `currentRoles()`, `enabledRoles()`, `defaultRoles()`. [#26780](https://github.com/ClickHouse/ClickHouse/pull/26780) ([Vitaly Baranov](https://github.com/vitlibar)). -* New functions `currentProfiles()`, `enabledProfiles()`, `defaultProfiles()`. [#26714](https://github.com/ClickHouse/ClickHouse/pull/26714) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add functions that return (initial_)query_id of the current query. This closes [#23682](https://github.com/ClickHouse/ClickHouse/issues/23682). [#26410](https://github.com/ClickHouse/ClickHouse/pull/26410) ([Alexey Boykov](https://github.com/mathalex)). -* Add `REPLACE GRANT` feature. [#26384](https://github.com/ClickHouse/ClickHouse/pull/26384) ([Caspian](https://github.com/Cas-pian)). -* `EXPLAIN` query now has `EXPLAIN ESTIMATE ...` mode that will show information about read rows, marks and parts from MergeTree tables. Closes [#23941](https://github.com/ClickHouse/ClickHouse/issues/23941). [#26131](https://github.com/ClickHouse/ClickHouse/pull/26131) ([fastio](https://github.com/fastio)). -* Added `system.zookeeper_log` table. All actions of ZooKeeper client are logged into this table. Implements [#25449](https://github.com/ClickHouse/ClickHouse/issues/25449). [#26129](https://github.com/ClickHouse/ClickHouse/pull/26129) ([tavplubix](https://github.com/tavplubix)). -* Zero-copy replication for `ReplicatedMergeTree` over `HDFS` storage. [#25918](https://github.com/ClickHouse/ClickHouse/pull/25918) ([Zhichang Yu](https://github.com/yuzhichang)). -* Allow to insert Nested type as array of structs in `Arrow`, `ORC` and `Parquet` input format. [#25902](https://github.com/ClickHouse/ClickHouse/pull/25902) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a new datatype `Date32` (store data as Int32), support date range same with `DateTime64` support load parquet date32 to ClickHouse `Date32` Add new function `toDate32` like `toDate`. [#25774](https://github.com/ClickHouse/ClickHouse/pull/25774) ([LiuNeng](https://github.com/liuneng1994)). -* Allow setting default database for users. [#25268](https://github.com/ClickHouse/ClickHouse/issues/25268). [#25687](https://github.com/ClickHouse/ClickHouse/pull/25687) ([kevin wan](https://github.com/MaxWk)). -* Add an optional parameter to `MongoDB` engine to accept connection string options and support SSL connection. Closes [#21189](https://github.com/ClickHouse/ClickHouse/issues/21189). Closes [#21041](https://github.com/ClickHouse/ClickHouse/issues/21041). [#22045](https://github.com/ClickHouse/ClickHouse/pull/22045) ([Omar Bazaraa](https://github.com/OmarBazaraa)). - -#### Experimental Feature - -* Added a compression codec `AES_128_GCM_SIV` which encrypts columns instead of compressing them. [#19896](https://github.com/ClickHouse/ClickHouse/pull/19896) ([PHO](https://github.com/depressed-pho)). Will be rewritten, do not use. -* Rename `MaterializeMySQL` to `MaterializedMySQL`. [#26822](https://github.com/ClickHouse/ClickHouse/pull/26822) ([tavplubix](https://github.com/tavplubix)). - -#### Performance Improvement - -* Improve the performance of fast queries when `max_execution_time = 0` by reducing the number of `clock_gettime` system calls. [#27325](https://github.com/ClickHouse/ClickHouse/pull/27325) ([filimonov](https://github.com/filimonov)). -* Specialize date time related comparison to achieve better performance. This fixes [#27083](https://github.com/ClickHouse/ClickHouse/issues/27083) . [#27122](https://github.com/ClickHouse/ClickHouse/pull/27122) ([Amos Bird](https://github.com/amosbird)). -* Share file descriptors in concurrent reads of the same files. There is no noticeable performance difference on Linux. But the number of opened files will be significantly (10..100 times) lower on typical servers and it makes operations easier. See [#26214](https://github.com/ClickHouse/ClickHouse/issues/26214). [#26768](https://github.com/ClickHouse/ClickHouse/pull/26768) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve latency of short queries, that require reading from tables with large number of columns. [#26371](https://github.com/ClickHouse/ClickHouse/pull/26371) ([Anton Popov](https://github.com/CurtizJ)). -* Don't build sets for indices when analyzing a query. [#26365](https://github.com/ClickHouse/ClickHouse/pull/26365) ([Raúl Marín](https://github.com/Algunenano)). -* Vectorize the SUM of Nullable integer types with native representation ([David Manzanares](https://github.com/davidmanzanares), [Raúl Marín](https://github.com/Algunenano)). [#26248](https://github.com/ClickHouse/ClickHouse/pull/26248) ([Raúl Marín](https://github.com/Algunenano)). -* Compile expressions involving columns with `Enum` types. [#26237](https://github.com/ClickHouse/ClickHouse/pull/26237) ([Maksim Kita](https://github.com/kitaisreal)). -* Compile aggregate functions `groupBitOr`, `groupBitAnd`, `groupBitXor`. [#26161](https://github.com/ClickHouse/ClickHouse/pull/26161) ([Maksim Kita](https://github.com/kitaisreal)). -* Improved memory usage with better block size prediction when reading empty DEFAULT columns. Closes [#17317](https://github.com/ClickHouse/ClickHouse/issues/17317). [#25917](https://github.com/ClickHouse/ClickHouse/pull/25917) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Reduce memory usage and number of read rows in queries with `ORDER BY primary_key`. [#25721](https://github.com/ClickHouse/ClickHouse/pull/25721) ([Anton Popov](https://github.com/CurtizJ)). -* Enable `distributed_push_down_limit` by default. [#27104](https://github.com/ClickHouse/ClickHouse/pull/27104) ([Azat Khuzhin](https://github.com/azat)). -* Make `toTimeZone` monotonicity when timeZone is a constant value to support partition puring when use sql like:. [#26261](https://github.com/ClickHouse/ClickHouse/pull/26261) ([huangzhaowei](https://github.com/SaintBacchus)). - -#### Improvement - -* Mark window functions as ready for general use. Remove the `allow_experimental_window_functions` setting. [#27184](https://github.com/ClickHouse/ClickHouse/pull/27184) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Improve compatibility with non-whole-minute timezone offsets. [#27080](https://github.com/ClickHouse/ClickHouse/pull/27080) ([Raúl Marín](https://github.com/Algunenano)). -* If file descriptor in `File` table is regular file - allow to read multiple times from it. It allows `clickhouse-local` to read multiple times from stdin (with multiple SELECT queries or subqueries) if stdin is a regular file like `clickhouse-local --query "SELECT * FROM table UNION ALL SELECT * FROM table" ... < file`. This closes [#11124](https://github.com/ClickHouse/ClickHouse/issues/11124). Co-authored with ([alexey-milovidov](https://github.com/alexey-milovidov)). [#25960](https://github.com/ClickHouse/ClickHouse/pull/25960) ([BoloniniD](https://github.com/BoloniniD)). -* Remove duplicate index analysis and avoid possible invalid limit checks during projection analysis. [#27742](https://github.com/ClickHouse/ClickHouse/pull/27742) ([Amos Bird](https://github.com/amosbird)). -* Enable query parameters to be passed in the body of HTTP requests. [#27706](https://github.com/ClickHouse/ClickHouse/pull/27706) ([Hermano Lustosa](https://github.com/hllustosa)). -* Disallow `arrayJoin` on partition expressions. [#27648](https://github.com/ClickHouse/ClickHouse/pull/27648) ([Raúl Marín](https://github.com/Algunenano)). -* Log client IP address if authentication fails. [#27514](https://github.com/ClickHouse/ClickHouse/pull/27514) ([Misko Lee](https://github.com/imiskolee)). -* Use bytes instead of strings for binary data in the GRPC protocol. [#27431](https://github.com/ClickHouse/ClickHouse/pull/27431) ([Vitaly Baranov](https://github.com/vitlibar)). -* Send response with error message if HTTP port is not set and user tries to send HTTP request to TCP port. [#27385](https://github.com/ClickHouse/ClickHouse/pull/27385) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). -* Add `_CAST` function for internal usage, which will not preserve type nullability, but non-internal cast will preserve according to setting `cast_keep_nullable`. Closes [#12636](https://github.com/ClickHouse/ClickHouse/issues/12636). [#27382](https://github.com/ClickHouse/ClickHouse/pull/27382) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add setting `log_formatted_queries` to log additional formatted query into `system.query_log`. It's useful for normalized query analysis because functions like `normalizeQuery` and `normalizeQueryKeepNames` don't parse/format queries in order to achieve better performance. [#27380](https://github.com/ClickHouse/ClickHouse/pull/27380) ([Amos Bird](https://github.com/amosbird)). -* Add two settings `max_hyperscan_regexp_length` and `max_hyperscan_regexp_total_length` to prevent huge regexp being used in hyperscan related functions, such as `multiMatchAny`. [#27378](https://github.com/ClickHouse/ClickHouse/pull/27378) ([Amos Bird](https://github.com/amosbird)). -* Memory consumed by bitmap aggregate functions now is taken into account for memory limits. This closes [#26555](https://github.com/ClickHouse/ClickHouse/issues/26555). [#27252](https://github.com/ClickHouse/ClickHouse/pull/27252) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add 10 seconds cache for S3 proxy resolver. [#27216](https://github.com/ClickHouse/ClickHouse/pull/27216) ([ianton-ru](https://github.com/ianton-ru)). -* Split global mutex into individual regexp construction. This helps avoid huge regexp construction blocking other related threads. [#27211](https://github.com/ClickHouse/ClickHouse/pull/27211) ([Amos Bird](https://github.com/amosbird)). -* Support schema for PostgreSQL database engine. Closes [#27166](https://github.com/ClickHouse/ClickHouse/issues/27166). [#27198](https://github.com/ClickHouse/ClickHouse/pull/27198) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Track memory usage in clickhouse-client. [#27191](https://github.com/ClickHouse/ClickHouse/pull/27191) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Try recording `query_kind` in `system.query_log` even when query fails to start. [#27182](https://github.com/ClickHouse/ClickHouse/pull/27182) ([Amos Bird](https://github.com/amosbird)). -* Added columns `replica_is_active` that maps replica name to is replica active status to table `system.replicas`. Closes [#27138](https://github.com/ClickHouse/ClickHouse/issues/27138). [#27180](https://github.com/ClickHouse/ClickHouse/pull/27180) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow to pass query settings via server URI in Web UI. [#27177](https://github.com/ClickHouse/ClickHouse/pull/27177) ([kolsys](https://github.com/kolsys)). -* Add a new metric called `MaxPushedDDLEntryID` which is the maximum ddl entry id that current node push to zookeeper. [#27174](https://github.com/ClickHouse/ClickHouse/pull/27174) ([Fuwang Hu](https://github.com/fuwhu)). -* Improved the existence condition judgment and empty string node judgment when `clickhouse-keeper` creates znode. [#27125](https://github.com/ClickHouse/ClickHouse/pull/27125) ([小路](https://github.com/nicelulu)). -* Merge JOIN correctly handles empty set in the right. [#27078](https://github.com/ClickHouse/ClickHouse/pull/27078) ([Vladimir C](https://github.com/vdimir)). -* Now functions can be shard-level constants, which means if it's executed in the context of some distributed table, it generates a normal column, otherwise it produces a constant value. Notable functions are: `hostName()`, `tcpPort()`, `version()`, `buildId()`, `uptime()`, etc. [#27020](https://github.com/ClickHouse/ClickHouse/pull/27020) ([Amos Bird](https://github.com/amosbird)). -* Updated `extractAllGroupsHorizontal` - upper limit on the number of matches per row can be set via optional third argument. [#26961](https://github.com/ClickHouse/ClickHouse/pull/26961) ([Vasily Nemkov](https://github.com/Enmk)). -* Expose `RocksDB` statistics via system.rocksdb table. Read rocksdb options from ClickHouse config (`rocksdb...` keys). NOTE: ClickHouse does not rely on RocksDB, it is just one of the additional integration storage engines. [#26821](https://github.com/ClickHouse/ClickHouse/pull/26821) ([Azat Khuzhin](https://github.com/azat)). -* Less verbose internal RocksDB logs. NOTE: ClickHouse does not rely on RocksDB, it is just one of the additional integration storage engines. This closes [#26252](https://github.com/ClickHouse/ClickHouse/issues/26252). [#26789](https://github.com/ClickHouse/ClickHouse/pull/26789) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Changing default roles affects new sessions only. [#26759](https://github.com/ClickHouse/ClickHouse/pull/26759) ([Vitaly Baranov](https://github.com/vitlibar)). -* Watchdog is disabled in docker by default. Fix for not handling ctrl+c. [#26757](https://github.com/ClickHouse/ClickHouse/pull/26757) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* `SET PROFILE` now applies constraints too if they're set for a passed profile. [#26730](https://github.com/ClickHouse/ClickHouse/pull/26730) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve handling of `KILL QUERY` requests. [#26675](https://github.com/ClickHouse/ClickHouse/pull/26675) ([Raúl Marín](https://github.com/Algunenano)). -* `mapPopulatesSeries` function supports `Map` type. [#26663](https://github.com/ClickHouse/ClickHouse/pull/26663) ([Ildus Kurbangaliev](https://github.com/ildus)). -* Fix excessive (x2) connect attempts with `skip_unavailable_shards`. [#26658](https://github.com/ClickHouse/ClickHouse/pull/26658) ([Azat Khuzhin](https://github.com/azat)). -* Avoid hanging `clickhouse-benchmark` if connection fails (i.e. on EMFILE). [#26656](https://github.com/ClickHouse/ClickHouse/pull/26656) ([Azat Khuzhin](https://github.com/azat)). -* Allow more threads to be used by the Kafka engine. [#26642](https://github.com/ClickHouse/ClickHouse/pull/26642) ([feihengye](https://github.com/feihengye)). -* Add round-robin support for `clickhouse-benchmark` (it does not differ from the regular multi host/port run except for statistics report). [#26607](https://github.com/ClickHouse/ClickHouse/pull/26607) ([Azat Khuzhin](https://github.com/azat)). -* Executable dictionaries (`executable`, `executable_pool`) enable creation with DDL query using `clickhouse-local`. Closes [#22355](https://github.com/ClickHouse/ClickHouse/issues/22355). [#26510](https://github.com/ClickHouse/ClickHouse/pull/26510) ([Maksim Kita](https://github.com/kitaisreal)). -* Set client query kind for `mysql` and `postgresql` compatibility protocol handlers. [#26498](https://github.com/ClickHouse/ClickHouse/pull/26498) ([anneji-dev](https://github.com/anneji-dev)). -* Apply `LIMIT` on the shards for queries like `SELECT * FROM dist ORDER BY key LIMIT 10` w/ `distributed_push_down_limit=1`. Avoid running `Distinct`/`LIMIT BY` steps for queries like `SELECT DISTINCT shading_key FROM dist ORDER BY key`. Now `distributed_push_down_limit` is respected by `optimize_distributed_group_by_sharding_key` optimization. [#26466](https://github.com/ClickHouse/ClickHouse/pull/26466) ([Azat Khuzhin](https://github.com/azat)). -* Updated protobuf to 3.17.3. Changelogs are available on https://github.com/protocolbuffers/protobuf/releases. [#26424](https://github.com/ClickHouse/ClickHouse/pull/26424) ([Ilya Yatsishin](https://github.com/qoega)). -* Enable `use_hedged_requests` setting that allows to mitigate tail latencies on large clusters. [#26380](https://github.com/ClickHouse/ClickHouse/pull/26380) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve behaviour with non-existing host in user allowed host list. [#26368](https://github.com/ClickHouse/ClickHouse/pull/26368) ([ianton-ru](https://github.com/ianton-ru)). -* Add ability to set `Distributed` directory monitor settings via CREATE TABLE (i.e. `CREATE TABLE dist (key Int) Engine=Distributed(cluster, db, table) SETTINGS monitor_batch_inserts=1` and similar). [#26336](https://github.com/ClickHouse/ClickHouse/pull/26336) ([Azat Khuzhin](https://github.com/azat)). -* Save server address in history URLs in web UI if it differs from the origin of web UI. This closes [#26044](https://github.com/ClickHouse/ClickHouse/issues/26044). [#26322](https://github.com/ClickHouse/ClickHouse/pull/26322) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add events to profile calls to `sleep` / `sleepEachRow`. [#26320](https://github.com/ClickHouse/ClickHouse/pull/26320) ([Raúl Marín](https://github.com/Algunenano)). -* Allow to reuse connections of shards among different clusters. It also avoids creating new connections when using `cluster` table function. [#26318](https://github.com/ClickHouse/ClickHouse/pull/26318) ([Amos Bird](https://github.com/amosbird)). -* Control the execution period of clear old temporary directories by parameter with default value. [#26212](https://github.com/ClickHouse/ClickHouse/issues/26212). [#26313](https://github.com/ClickHouse/ClickHouse/pull/26313) ([fastio](https://github.com/fastio)). -* Add a setting `function_range_max_elements_in_block` to tune the safety threshold for data volume generated by function `range`. This closes [#26303](https://github.com/ClickHouse/ClickHouse/issues/26303). [#26305](https://github.com/ClickHouse/ClickHouse/pull/26305) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Check hash function at table creation, not at sampling. Add settings for MergeTree, if someone create a table with incorrect sampling column but sampling never be used, disable this settings for starting the server without exception. [#26256](https://github.com/ClickHouse/ClickHouse/pull/26256) ([zhaoyu](https://github.com/zxc111)). -* Added `output_format_avro_string_column_pattern` setting to put specified String columns to Avro as string instead of default bytes. Implements [#22414](https://github.com/ClickHouse/ClickHouse/issues/22414). [#26245](https://github.com/ClickHouse/ClickHouse/pull/26245) ([Ilya Golshtein](https://github.com/ilejn)). -* Add information about column sizes in `system.columns` table for `Log` and `TinyLog` tables. This closes [#9001](https://github.com/ClickHouse/ClickHouse/issues/9001). [#26241](https://github.com/ClickHouse/ClickHouse/pull/26241) ([Nikolay Degterinsky](https://github.com/evillique)). -* Don't throw exception when querying `system.detached_parts` table if there is custom disk configuration and `detached` directory does not exist on some disks. This closes [#26078](https://github.com/ClickHouse/ClickHouse/issues/26078). [#26236](https://github.com/ClickHouse/ClickHouse/pull/26236) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Check for non-deterministic functions in keys, including constant expressions like `now()`, `today()`. This closes [#25875](https://github.com/ClickHouse/ClickHouse/issues/25875). This closes [#11333](https://github.com/ClickHouse/ClickHouse/issues/11333). [#26235](https://github.com/ClickHouse/ClickHouse/pull/26235) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* convert timestamp and timestamptz data types to `DateTime64` in PostgreSQL table engine. [#26234](https://github.com/ClickHouse/ClickHouse/pull/26234) ([jasine](https://github.com/jasine)). -* Apply aggressive IN index analysis for projections so that better projection candidate can be selected. [#26218](https://github.com/ClickHouse/ClickHouse/pull/26218) ([Amos Bird](https://github.com/amosbird)). -* Remove GLOBAL keyword for IN when scalar function is passed. In previous versions, if user specified `GLOBAL IN f(x)` exception was thrown. [#26217](https://github.com/ClickHouse/ClickHouse/pull/26217) ([Amos Bird](https://github.com/amosbird)). -* Add error id (like `BAD_ARGUMENTS`) to exception messages. This closes [#25862](https://github.com/ClickHouse/ClickHouse/issues/25862). [#26172](https://github.com/ClickHouse/ClickHouse/pull/26172) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect output with --progress option for clickhouse-local. Progress bar will be cleared once it gets to 100% - same as it is done for clickhouse-client. Closes [#17484](https://github.com/ClickHouse/ClickHouse/issues/17484). [#26128](https://github.com/ClickHouse/ClickHouse/pull/26128) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add `merge_selecting_sleep_ms` setting. [#26120](https://github.com/ClickHouse/ClickHouse/pull/26120) ([lthaooo](https://github.com/lthaooo)). -* Remove complicated usage of Linux AIO with one block readahead and replace it with plain simple synchronous IO with O_DIRECT. In previous versions, the setting `min_bytes_to_use_direct_io` may not work correctly if `max_threads` is greater than one. Reading with direct IO (that is disabled by default for queries and enabled by default for large merges) will work in less efficient way. This closes [#25997](https://github.com/ClickHouse/ClickHouse/issues/25997). [#26003](https://github.com/ClickHouse/ClickHouse/pull/26003) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Flush `Distributed` table on `REPLACE TABLE` query. Resolves [#24566](https://github.com/ClickHouse/ClickHouse/issues/24566) - Do not replace (or create) table on `[CREATE OR] REPLACE TABLE ... AS SELECT` query if insertion into new table fails. Resolves [#23175](https://github.com/ClickHouse/ClickHouse/issues/23175). [#25895](https://github.com/ClickHouse/ClickHouse/pull/25895) ([tavplubix](https://github.com/tavplubix)). -* Add `views` column to system.query_log containing the names of the (materialized or live) views executed by the query. Adds a new log table (`system.query_views_log`) that contains information about each view executed during a query. Modifies view execution: When an exception is thrown while executing a view, any view that has already startedwill continue running until it finishes. This used to be the behaviour under parallel_view_processing=true and now it's always the same behaviour. - Dependent views now report reading progress to the context. [#25714](https://github.com/ClickHouse/ClickHouse/pull/25714) ([Raúl Marín](https://github.com/Algunenano)). -* Do connection draining asynchonously upon finishing executing distributed queries. A new server setting is added `max_threads_for_connection_collector` which specifies the number of workers to recycle connections in background. If the pool is full, connection will be drained synchronously but a bit different than before: It's drained after we send EOS to client, query will succeed immediately after receiving enough data, and any exception will be logged instead of throwing to the client. Added setting `drain_timeout` (3 seconds by default). Connection draining will disconnect upon timeout. [#25674](https://github.com/ClickHouse/ClickHouse/pull/25674) ([Amos Bird](https://github.com/amosbird)). -* Support for multiple includes in configuration. It is possible to include users configuration, remote servers configuration from multiple sources. Simply place `` element with `from_zk`, `from_env` or `incl` attribute and it will be replaced with the substitution. [#24404](https://github.com/ClickHouse/ClickHouse/pull/24404) ([nvartolomei](https://github.com/nvartolomei)). -* Fix multiple block insertion into distributed table with `insert_distributed_one_random_shard = 1`. This is a marginal feature. Mark as improvement. [#23140](https://github.com/ClickHouse/ClickHouse/pull/23140) ([Amos Bird](https://github.com/amosbird)). -* Support `LowCardinality` and `FixedString` keys/values for `Map` type. [#21543](https://github.com/ClickHouse/ClickHouse/pull/21543) ([hexiaoting](https://github.com/hexiaoting)). -* Enable reloading of local disk config. [#19526](https://github.com/ClickHouse/ClickHouse/pull/19526) ([taiyang-li](https://github.com/taiyang-li)). - -#### Bug Fix - -* Fix a couple of bugs that may cause replicas to diverge. [#27808](https://github.com/ClickHouse/ClickHouse/pull/27808) ([tavplubix](https://github.com/tavplubix)). -* Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). -* Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). -* Fix column filtering with union distinct in subquery. Closes [#27578](https://github.com/ClickHouse/ClickHouse/issues/27578). [#27689](https://github.com/ClickHouse/ClickHouse/pull/27689) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix bad type cast when functions like `arrayHas` are applied to arrays of LowCardinality of Nullable of different non-numeric types like `DateTime` and `DateTime64`. In previous versions bad cast occurs. In new version it will lead to exception. This closes [#26330](https://github.com/ClickHouse/ClickHouse/issues/26330). [#27682](https://github.com/ClickHouse/ClickHouse/pull/27682) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix postgresql table function resulting in non-closing connections. Closes [#26088](https://github.com/ClickHouse/ClickHouse/issues/26088). [#27662](https://github.com/ClickHouse/ClickHouse/pull/27662) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed another case of `Unexpected merged part ... intersecting drop range ...` error. [#27656](https://github.com/ClickHouse/ClickHouse/pull/27656) ([tavplubix](https://github.com/tavplubix)). -* Fix an error with aliased column in `Distributed` table. [#27652](https://github.com/ClickHouse/ClickHouse/pull/27652) ([Vladimir C](https://github.com/vdimir)). -* After setting `max_memory_usage*` to non-zero value it was not possible to reset it back to 0 (unlimited). It's fixed. [#27638](https://github.com/ClickHouse/ClickHouse/pull/27638) ([tavplubix](https://github.com/tavplubix)). -* Fixed underflow of the time value when constructing it from components. Closes [#27193](https://github.com/ClickHouse/ClickHouse/issues/27193). [#27605](https://github.com/ClickHouse/ClickHouse/pull/27605) ([Vasily Nemkov](https://github.com/Enmk)). -* Fix crash during projection materialization when some parts contain missing columns. This fixes [#27512](https://github.com/ClickHouse/ClickHouse/issues/27512). [#27528](https://github.com/ClickHouse/ClickHouse/pull/27528) ([Amos Bird](https://github.com/amosbird)). -* fix metric `BackgroundMessageBrokerSchedulePoolTask`, maybe mistyped. [#27452](https://github.com/ClickHouse/ClickHouse/pull/27452) ([Ben](https://github.com/benbiti)). -* Fix distributed queries with zero shards and aggregation. [#27427](https://github.com/ClickHouse/ClickHouse/pull/27427) ([Azat Khuzhin](https://github.com/azat)). -* Compatibility when `/proc/meminfo` does not contain KB suffix. [#27361](https://github.com/ClickHouse/ClickHouse/pull/27361) ([Mike Kot](https://github.com/myrrc)). -* Fix incorrect result for query with row-level security, PREWHERE and LowCardinality filter. Fixes [#27179](https://github.com/ClickHouse/ClickHouse/issues/27179). [#27329](https://github.com/ClickHouse/ClickHouse/pull/27329) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed incorrect validation of partition id for MergeTree tables that created with old syntax. [#27328](https://github.com/ClickHouse/ClickHouse/pull/27328) ([tavplubix](https://github.com/tavplubix)). -* Fix MySQL protocol when using parallel formats (CSV / TSV). [#27326](https://github.com/ClickHouse/ClickHouse/pull/27326) ([Raúl Marín](https://github.com/Algunenano)). -* Fix `Cannot find column` error for queries with sampling. Was introduced in [#24574](https://github.com/ClickHouse/ClickHouse/issues/24574). Fixes [#26522](https://github.com/ClickHouse/ClickHouse/issues/26522). [#27301](https://github.com/ClickHouse/ClickHouse/pull/27301) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix errors like `Expected ColumnLowCardinality, gotUInt8` or `Bad cast from type DB::ColumnVector to DB::ColumnLowCardinality` for some queries with `LowCardinality` in `PREWHERE`. And more importantly, fix the lack of whitespace in the error message. Fixes [#23515](https://github.com/ClickHouse/ClickHouse/issues/23515). [#27298](https://github.com/ClickHouse/ClickHouse/pull/27298) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix `distributed_group_by_no_merge = 2` with `distributed_push_down_limit = 1` or `optimize_distributed_group_by_sharding_key = 1` with `LIMIT BY` and `LIMIT OFFSET`. [#27249](https://github.com/ClickHouse/ClickHouse/pull/27249) ([Azat Khuzhin](https://github.com/azat)). These are obscure combination of settings that no one is using. -* Fix mutation stuck on invalid partitions in non-replicated MergeTree. [#27248](https://github.com/ClickHouse/ClickHouse/pull/27248) ([Azat Khuzhin](https://github.com/azat)). -* In case of ambiguity, lambda functions prefer its arguments to other aliases or identifiers. [#27235](https://github.com/ClickHouse/ClickHouse/pull/27235) ([Raúl Marín](https://github.com/Algunenano)). -* Fix column structure in merge join, close [#27091](https://github.com/ClickHouse/ClickHouse/issues/27091). [#27217](https://github.com/ClickHouse/ClickHouse/pull/27217) ([Vladimir C](https://github.com/vdimir)). -* In rare cases `system.detached_parts` table might contain incorrect information for some parts, it's fixed. Fixes [#27114](https://github.com/ClickHouse/ClickHouse/issues/27114). [#27183](https://github.com/ClickHouse/ClickHouse/pull/27183) ([tavplubix](https://github.com/tavplubix)). -* Fix uninitialized memory in functions `multiSearch*` with empty array, close [#27169](https://github.com/ClickHouse/ClickHouse/issues/27169). [#27181](https://github.com/ClickHouse/ClickHouse/pull/27181) ([Vladimir C](https://github.com/vdimir)). -* Fix synchronization in GRPCServer. This PR fixes [#27024](https://github.com/ClickHouse/ClickHouse/issues/27024). [#27064](https://github.com/ClickHouse/ClickHouse/pull/27064) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache` configuration parsing. Options `allow_read_expired_keys`, `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds` were not parsed for dictionaries with non `cache` type. [#27032](https://github.com/ClickHouse/ClickHouse/pull/27032) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix possible mutation stack due to race with DROP_RANGE. [#27002](https://github.com/ClickHouse/ClickHouse/pull/27002) ([Azat Khuzhin](https://github.com/azat)). -* Now partition ID in queries like `ALTER TABLE ... PARTITION ID xxx` validates for correctness. Fixes [#25718](https://github.com/ClickHouse/ClickHouse/issues/25718). [#26963](https://github.com/ClickHouse/ClickHouse/pull/26963) ([alesapin](https://github.com/alesapin)). -* Fix "Unknown column name" error with multiple JOINs in some cases, close [#26899](https://github.com/ClickHouse/ClickHouse/issues/26899). [#26957](https://github.com/ClickHouse/ClickHouse/pull/26957) ([Vladimir C](https://github.com/vdimir)). -* Fix reading of custom TLDs (stops processing with lower buffer or bigger file). [#26948](https://github.com/ClickHouse/ClickHouse/pull/26948) ([Azat Khuzhin](https://github.com/azat)). -* Fix error `Missing columns: 'xxx'` when `DEFAULT` column references other non materialized column without `DEFAULT` expression. Fixes [#26591](https://github.com/ClickHouse/ClickHouse/issues/26591). [#26900](https://github.com/ClickHouse/ClickHouse/pull/26900) ([alesapin](https://github.com/alesapin)). -* Fix loading of dictionary keys in `library-bridge` for `library` dictionary source. [#26834](https://github.com/ClickHouse/ClickHouse/pull/26834) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Aggregate function parameters might be lost when applying some combinators causing exceptions like `Conversion from AggregateFunction(topKArray, Array(String)) to AggregateFunction(topKArray(10), Array(String)) is not supported`. It's fixed. Fixes [#26196](https://github.com/ClickHouse/ClickHouse/issues/26196) and [#26433](https://github.com/ClickHouse/ClickHouse/issues/26433). [#26814](https://github.com/ClickHouse/ClickHouse/pull/26814) ([tavplubix](https://github.com/tavplubix)). -* Add `event_time_microseconds` value for `REMOVE_PART` in `system.part_log`. In previous versions is was not set. [#26720](https://github.com/ClickHouse/ClickHouse/pull/26720) ([Azat Khuzhin](https://github.com/azat)). -* Do not remove data on ReplicatedMergeTree table shutdown to avoid creating data to metadata inconsistency. [#26716](https://github.com/ClickHouse/ClickHouse/pull/26716) ([nvartolomei](https://github.com/nvartolomei)). -* Sometimes `SET ROLE` could work incorrectly, this PR fixes that. [#26707](https://github.com/ClickHouse/ClickHouse/pull/26707) ([Vitaly Baranov](https://github.com/vitlibar)). -* Some fixes for parallel formatting (https://github.com/ClickHouse/ClickHouse/issues/26694). [#26703](https://github.com/ClickHouse/ClickHouse/pull/26703) ([Raúl Marín](https://github.com/Algunenano)). -* Fix potential nullptr dereference in window functions. This fixes [#25276](https://github.com/ClickHouse/ClickHouse/issues/25276). [#26668](https://github.com/ClickHouse/ClickHouse/pull/26668) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Fix clickhouse-client history file conversion (when upgrading from the format of 3 years old version of clickhouse-client) if file is empty. [#26589](https://github.com/ClickHouse/ClickHouse/pull/26589) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect function names of groupBitmapAnd/Or/Xor (can be displayed in some occasions). This fixes. [#26557](https://github.com/ClickHouse/ClickHouse/pull/26557) ([Amos Bird](https://github.com/amosbird)). -* Update `chown` cmd check in clickhouse-server docker entrypoint. It fixes the bug that cluster pod restart failed (or timeout) on kubernetes. [#26545](https://github.com/ClickHouse/ClickHouse/pull/26545) ([Ky Li](https://github.com/Kylinrix)). -* Fix crash in `RabbitMQ` shutdown in case `RabbitMQ` setup was not started. Closes [#26504](https://github.com/ClickHouse/ClickHouse/issues/26504). [#26529](https://github.com/ClickHouse/ClickHouse/pull/26529) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix issues with `CREATE DICTIONARY` query if dictionary name or database name was quoted. Closes [#26491](https://github.com/ClickHouse/ClickHouse/issues/26491). [#26508](https://github.com/ClickHouse/ClickHouse/pull/26508) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix broken column name resolution after rewriting column aliases. This fixes [#26432](https://github.com/ClickHouse/ClickHouse/issues/26432). [#26475](https://github.com/ClickHouse/ClickHouse/pull/26475) ([Amos Bird](https://github.com/amosbird)). -* Fix some fuzzed msan crash. Fixes [#22517](https://github.com/ClickHouse/ClickHouse/issues/22517). [#26428](https://github.com/ClickHouse/ClickHouse/pull/26428) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix infinite non joined block stream in `partial_merge_join` close [#26325](https://github.com/ClickHouse/ClickHouse/issues/26325). [#26374](https://github.com/ClickHouse/ClickHouse/pull/26374) ([Vladimir C](https://github.com/vdimir)). -* Fix possible crash when login as dropped user. This PR fixes [#26073](https://github.com/ClickHouse/ClickHouse/issues/26073). [#26363](https://github.com/ClickHouse/ClickHouse/pull/26363) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix `optimize_distributed_group_by_sharding_key` for multiple columns (leads to incorrect result w/ `optimize_skip_unused_shards=1`/`allow_nondeterministic_optimize_skip_unused_shards=1` and multiple columns in sharding key expression). [#26353](https://github.com/ClickHouse/ClickHouse/pull/26353) ([Azat Khuzhin](https://github.com/azat)). -* Fixed rare bug in lost replica recovery that may cause replicas to diverge. [#26321](https://github.com/ClickHouse/ClickHouse/pull/26321) ([tavplubix](https://github.com/tavplubix)). -* Fix zstd decompression (for import/export in zstd framing format that is unrelated to tables data) in case there are escape sequences at the end of internal buffer. Closes [#26013](https://github.com/ClickHouse/ClickHouse/issues/26013). [#26314](https://github.com/ClickHouse/ClickHouse/pull/26314) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix logical error on join with totals, close [#26017](https://github.com/ClickHouse/ClickHouse/issues/26017). [#26250](https://github.com/ClickHouse/ClickHouse/pull/26250) ([Vladimir C](https://github.com/vdimir)). -* Remove excessive newline in `thread_name` column in `system.stack_trace` table. This fixes [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix potential crash if more than one `untuple` expression is used. [#26179](https://github.com/ClickHouse/ClickHouse/pull/26179) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Don't throw exception in `toString` for Nullable Enum if Enum does not have a value for zero, close [#25806](https://github.com/ClickHouse/ClickHouse/issues/25806). [#26123](https://github.com/ClickHouse/ClickHouse/pull/26123) ([Vladimir C](https://github.com/vdimir)). -* Fixed incorrect `sequence_id` in MySQL protocol packets that ClickHouse sends on exception during query execution. It might cause MySQL client to reset connection to ClickHouse server. Fixes [#21184](https://github.com/ClickHouse/ClickHouse/issues/21184). [#26051](https://github.com/ClickHouse/ClickHouse/pull/26051) ([tavplubix](https://github.com/tavplubix)). -* Fix for the case that `cutToFirstSignificantSubdomainCustom()`/`cutToFirstSignificantSubdomainCustomWithWWW()`/`firstSignificantSubdomainCustom()` returns incorrect type for consts, and hence `optimize_skip_unused_shards` does not work:. [#26041](https://github.com/ClickHouse/ClickHouse/pull/26041) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible mismatched header when using normal projection with prewhere. This fixes [#26020](https://github.com/ClickHouse/ClickHouse/issues/26020). [#26038](https://github.com/ClickHouse/ClickHouse/pull/26038) ([Amos Bird](https://github.com/amosbird)). -* Fix sharding_key from column w/o function for remote() (before `select * from remote('127.1', system.one, dummy)` leads to `Unknown column: dummy, there are only columns .` error). [#25824](https://github.com/ClickHouse/ClickHouse/pull/25824) ([Azat Khuzhin](https://github.com/azat)). -* Fixed `Not found column ...` and `Missing column ...` errors when selecting from `MaterializeMySQL`. Fixes [#23708](https://github.com/ClickHouse/ClickHouse/issues/23708), [#24830](https://github.com/ClickHouse/ClickHouse/issues/24830), [#25794](https://github.com/ClickHouse/ClickHouse/issues/25794). [#25822](https://github.com/ClickHouse/ClickHouse/pull/25822) ([tavplubix](https://github.com/tavplubix)). -* Fix `optimize_skip_unused_shards_rewrite_in` for non-UInt64 types (may select incorrect shards eventually or throw `Cannot infer type of an empty tuple` or `Function tuple requires at least one argument`). [#25798](https://github.com/ClickHouse/ClickHouse/pull/25798) ([Azat Khuzhin](https://github.com/azat)). - -#### Build/Testing/Packaging Improvement - -* Now we ran stateful and stateless tests in random timezones. Fixes [#12439](https://github.com/ClickHouse/ClickHouse/issues/12439). Reading String as DateTime and writing DateTime as String in Protobuf format now respect timezone. Reading UInt16 as DateTime in Arrow and Parquet formats now treat it as Date and then converts to DateTime with respect to DateTime's timezone, because Date is serialized in Arrow and Parquet as UInt16. GraphiteMergeTree now respect time zone for rounding of times. Fixes [#5098](https://github.com/ClickHouse/ClickHouse/issues/5098). Author: @alexey-milovidov. [#15408](https://github.com/ClickHouse/ClickHouse/pull/15408) ([alesapin](https://github.com/alesapin)). -* `clickhouse-test` supports SQL tests with [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/templates/#synopsis) templates. [#26579](https://github.com/ClickHouse/ClickHouse/pull/26579) ([Vladimir C](https://github.com/vdimir)). -* Add support for build with `clang-13`. This closes [#27705](https://github.com/ClickHouse/ClickHouse/issues/27705). [#27714](https://github.com/ClickHouse/ClickHouse/pull/27714) ([alexey-milovidov](https://github.com/alexey-milovidov)). [#27777](https://github.com/ClickHouse/ClickHouse/pull/27777) ([Sergei Semin](https://github.com/syominsergey)) -* Add CMake options to build with or without specific CPU instruction set. This is for [#17469](https://github.com/ClickHouse/ClickHouse/issues/17469) and [#27509](https://github.com/ClickHouse/ClickHouse/issues/27509). [#27508](https://github.com/ClickHouse/ClickHouse/pull/27508) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix linking of auxiliar programs when using dynamic libraries. [#26958](https://github.com/ClickHouse/ClickHouse/pull/26958) ([Raúl Marín](https://github.com/Algunenano)). -* Update RocksDB to `2021-07-16` master. [#26411](https://github.com/ClickHouse/ClickHouse/pull/26411) ([alexey-milovidov](https://github.com/alexey-milovidov)). - - -### ClickHouse release v21.8, 2021-08-12 - -#### Upgrade Notes -* New version is using `Map` data type for system logs tables (`system.query_log`, `system.query_thread_log`, `system.processes`, `system.opentelemetry_span_log`). These tables will be auto-created with new data types. Virtual columns are created to support old queries. Closes [#18698](https://github.com/ClickHouse/ClickHouse/issues/18698). [#23934](https://github.com/ClickHouse/ClickHouse/pull/23934), [#25773](https://github.com/ClickHouse/ClickHouse/pull/25773) ([hexiaoting](https://github.com/hexiaoting), [sundy-li](https://github.com/sundy-li), [Maksim Kita](https://github.com/kitaisreal)). If you want to *downgrade* from version 21.8 to older versions, you will need to cleanup system tables with logs manually. Look at `/var/lib/clickhouse/data/system/*_log`. - -#### New Features - -* Add support for a part of SQL/JSON standard. [#24148](https://github.com/ClickHouse/ClickHouse/pull/24148) ([l1tsolaiki](https://github.com/l1tsolaiki), [Kseniia Sumarokova](https://github.com/kssenii)). -* Collect common system metrics (in `system.asynchronous_metrics` and `system.asynchronous_metric_log`) on CPU usage, disk usage, memory usage, IO, network, files, load average, CPU frequencies, thermal sensors, EDAC counters, system uptime; also added metrics about the scheduling jitter and the time spent collecting the metrics. It works similar to `atop` in ClickHouse and allows access to monitoring data even if you have no additional tools installed. Close [#9430](https://github.com/ClickHouse/ClickHouse/issues/9430). [#24416](https://github.com/ClickHouse/ClickHouse/pull/24416) ([alexey-milovidov](https://github.com/alexey-milovidov), [Yegor Levankov](https://github.com/elevankoff)). -* Add MaterializedPostgreSQL table engine and database engine. This database engine allows replicating a whole database or any subset of database tables. [#20470](https://github.com/ClickHouse/ClickHouse/pull/20470) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add new functions `leftPad()`, `rightPad()`, `leftPadUTF8()`, `rightPadUTF8()`. [#26075](https://github.com/ClickHouse/ClickHouse/pull/26075) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add the `FIRST` keyword to the `ADD INDEX` command to be able to add the index at the beginning of the indices list. [#25904](https://github.com/ClickHouse/ClickHouse/pull/25904) ([xjewer](https://github.com/xjewer)). -* Introduce `system.data_skipping_indices` table containing information about existing data skipping indices. Close [#7659](https://github.com/ClickHouse/ClickHouse/issues/7659). [#25693](https://github.com/ClickHouse/ClickHouse/pull/25693) ([Dmitry Novik](https://github.com/novikd)). -* Add `bin`/`unbin` functions. [#25609](https://github.com/ClickHouse/ClickHouse/pull/25609) ([zhaoyu](https://github.com/zxc111)). -* Support `Map` and `UInt128`, `Int128`, `UInt256`, `Int256` types in `mapAdd` and `mapSubtract` functions. [#25596](https://github.com/ClickHouse/ClickHouse/pull/25596) ([Ildus Kurbangaliev](https://github.com/ildus)). -* Support `DISTINCT ON (columns)` expression, close [#25404](https://github.com/ClickHouse/ClickHouse/issues/25404). [#25589](https://github.com/ClickHouse/ClickHouse/pull/25589) ([Zijie Lu](https://github.com/TszKitLo40)). -* Add an ability to reset a custom setting to default and remove it from the table's metadata. It allows rolling back the change without knowing the system/config's default. Closes [#14449](https://github.com/ClickHouse/ClickHouse/issues/14449). [#17769](https://github.com/ClickHouse/ClickHouse/pull/17769) ([xjewer](https://github.com/xjewer)). -* Render pipelines as graphs in Web UI if `EXPLAIN PIPELINE graph = 1` query is submitted. [#26067](https://github.com/ClickHouse/ClickHouse/pull/26067) ([alexey-milovidov](https://github.com/alexey-milovidov)). - -#### Performance Improvements - -* Compile aggregate functions. Use option `compile_aggregate_expressions` to enable it. [#24789](https://github.com/ClickHouse/ClickHouse/pull/24789) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve latency of short queries that require reading from tables with many columns. [#26371](https://github.com/ClickHouse/ClickHouse/pull/26371) ([Anton Popov](https://github.com/CurtizJ)). - -#### Improvements - -* Use `Map` data type for system logs tables (`system.query_log`, `system.query_thread_log`, `system.processes`, `system.opentelemetry_span_log`). These tables will be auto-created with new data types. Virtual columns are created to support old queries. Closes [#18698](https://github.com/ClickHouse/ClickHouse/issues/18698). [#23934](https://github.com/ClickHouse/ClickHouse/pull/23934), [#25773](https://github.com/ClickHouse/ClickHouse/pull/25773) ([hexiaoting](https://github.com/hexiaoting), [sundy-li](https://github.com/sundy-li), [Maksim Kita](https://github.com/kitaisreal)). -* For a dictionary with a complex key containing only one attribute, allow not wrapping the key expression in tuple for functions `dictGet`, `dictHas`. [#26130](https://github.com/ClickHouse/ClickHouse/pull/26130) ([Maksim Kita](https://github.com/kitaisreal)). -* Implement function `bin`/`hex` from `AggregateFunction` states. [#26094](https://github.com/ClickHouse/ClickHouse/pull/26094) ([zhaoyu](https://github.com/zxc111)). -* Support arguments of `UUID` type for `empty` and `notEmpty` functions. `UUID` is empty if it is all zeros (nil UUID). Closes [#3446](https://github.com/ClickHouse/ClickHouse/issues/3446). [#25974](https://github.com/ClickHouse/ClickHouse/pull/25974) ([zhaoyu](https://github.com/zxc111)). -* Add support for `SET SQL_SELECT_LIMIT` in MySQL protocol. Closes [#17115](https://github.com/ClickHouse/ClickHouse/issues/17115). [#25972](https://github.com/ClickHouse/ClickHouse/pull/25972) ([Kseniia Sumarokova](https://github.com/kssenii)). -* More instrumentation for network interaction: add counters for recv/send bytes; add gauges for recvs/sends. Added missing documentation. Close [#5897](https://github.com/ClickHouse/ClickHouse/issues/5897). [#25962](https://github.com/ClickHouse/ClickHouse/pull/25962) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add setting `optimize_move_to_prewhere_if_final`. If query has `FINAL`, the optimization `move_to_prewhere` will be enabled only if both `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are enabled. Closes [#8684](https://github.com/ClickHouse/ClickHouse/issues/8684). [#25940](https://github.com/ClickHouse/ClickHouse/pull/25940) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow complex quoted identifiers of JOINed tables. Close [#17861](https://github.com/ClickHouse/ClickHouse/issues/17861). [#25924](https://github.com/ClickHouse/ClickHouse/pull/25924) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add support for Unicode (e.g. Chinese, Cyrillic) components in `Nested` data types. Close [#25594](https://github.com/ClickHouse/ClickHouse/issues/25594). [#25923](https://github.com/ClickHouse/ClickHouse/pull/25923) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow `quantiles*` functions to work with `aggregate_functions_null_for_empty`. Close [#25892](https://github.com/ClickHouse/ClickHouse/issues/25892). [#25919](https://github.com/ClickHouse/ClickHouse/pull/25919) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow parameters for parametric aggregate functions to be arbitrary constant expressions (e.g., `1 + 2`), not just literals. It also allows using the query parameters (in parameterized queries like `{param:UInt8}`) inside parametric aggregate functions. Closes [#11607](https://github.com/ClickHouse/ClickHouse/issues/11607). [#25910](https://github.com/ClickHouse/ClickHouse/pull/25910) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Correctly throw the exception on the attempt to parse an invalid `Date`. Closes [#6481](https://github.com/ClickHouse/ClickHouse/issues/6481). [#25909](https://github.com/ClickHouse/ClickHouse/pull/25909) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support for multiple includes in configuration. It is possible to include users configuration, remote server configuration from multiple sources. Simply place `` element with `from_zk`, `from_env` or `incl` attribute, and it will be replaced with the substitution. [#24404](https://github.com/ClickHouse/ClickHouse/pull/24404) ([nvartolomei](https://github.com/nvartolomei)). -* Support for queries with a column named `"null"` (it must be specified in back-ticks or double quotes) and `ON CLUSTER`. Closes [#24035](https://github.com/ClickHouse/ClickHouse/issues/24035). [#25907](https://github.com/ClickHouse/ClickHouse/pull/25907) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support `LowCardinality`, `Decimal`, and `UUID` for `JSONExtract`. Closes [#24606](https://github.com/ClickHouse/ClickHouse/issues/24606). [#25900](https://github.com/ClickHouse/ClickHouse/pull/25900) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Convert history file from `readline` format to `replxx` format. [#25888](https://github.com/ClickHouse/ClickHouse/pull/25888) ([Azat Khuzhin](https://github.com/azat)). -* Fix an issue which can lead to intersecting parts after `DROP PART` or background deletion of an empty part. [#25884](https://github.com/ClickHouse/ClickHouse/pull/25884) ([alesapin](https://github.com/alesapin)). -* Better handling of lost parts for `ReplicatedMergeTree` tables. Fixes rare inconsistencies in `ReplicationQueue`. Fixes [#10368](https://github.com/ClickHouse/ClickHouse/issues/10368). [#25820](https://github.com/ClickHouse/ClickHouse/pull/25820) ([alesapin](https://github.com/alesapin)). -* Allow starting clickhouse-client with unreadable working directory. [#25817](https://github.com/ClickHouse/ClickHouse/pull/25817) ([ianton-ru](https://github.com/ianton-ru)). -* Fix "No available columns" error for `Merge` storage. [#25801](https://github.com/ClickHouse/ClickHouse/pull/25801) ([Azat Khuzhin](https://github.com/azat)). -* MySQL Engine now supports the exchange of column comments between MySQL and ClickHouse. [#25795](https://github.com/ClickHouse/ClickHouse/pull/25795) ([Storozhuk Kostiantyn](https://github.com/sand6255)). -* Fix inconsistent behaviour of `GROUP BY` constant on empty set. Closes [#6842](https://github.com/ClickHouse/ClickHouse/issues/6842). [#25786](https://github.com/ClickHouse/ClickHouse/pull/25786) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Cancel already running merges in partition on `DROP PARTITION` and `TRUNCATE` for `ReplicatedMergeTree`. Resolves [#17151](https://github.com/ClickHouse/ClickHouse/issues/17151). [#25684](https://github.com/ClickHouse/ClickHouse/pull/25684) ([tavplubix](https://github.com/tavplubix)). -* Support ENUM` data type for MaterializeMySQL. [#25676](https://github.com/ClickHouse/ClickHouse/pull/25676) ([Storozhuk Kostiantyn](https://github.com/sand6255)). -* Support materialized and aliased columns in JOIN, close [#13274](https://github.com/ClickHouse/ClickHouse/issues/13274). [#25634](https://github.com/ClickHouse/ClickHouse/pull/25634) ([Vladimir C](https://github.com/vdimir)). -* Fix possible logical race condition between `ALTER TABLE ... DETACH` and background merges. [#25605](https://github.com/ClickHouse/ClickHouse/pull/25605) ([Azat Khuzhin](https://github.com/azat)). -* Make `NetworkReceiveElapsedMicroseconds` metric to correctly include the time spent waiting for data from the client to `INSERT`. Close [#9958](https://github.com/ClickHouse/ClickHouse/issues/9958). [#25602](https://github.com/ClickHouse/ClickHouse/pull/25602) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support `TRUNCATE TABLE` for S3 and HDFS. Close [#25530](https://github.com/ClickHouse/ClickHouse/issues/25530). [#25550](https://github.com/ClickHouse/ClickHouse/pull/25550) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support for dynamic reloading of config to change number of threads in pool for background jobs execution (merges, mutations, fetches). [#25548](https://github.com/ClickHouse/ClickHouse/pull/25548) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow extracting of non-string element as string using `JSONExtract`. This is for [#25414](https://github.com/ClickHouse/ClickHouse/issues/25414). [#25452](https://github.com/ClickHouse/ClickHouse/pull/25452) ([Amos Bird](https://github.com/amosbird)). -* Support regular expression in `Database` argument for `StorageMerge`. Close [#776](https://github.com/ClickHouse/ClickHouse/issues/776). [#25064](https://github.com/ClickHouse/ClickHouse/pull/25064) ([flynn](https://github.com/ucasfl)). -* Web UI: if the value looks like a URL, automatically generate a link. [#25965](https://github.com/ClickHouse/ClickHouse/pull/25965) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make `sudo service clickhouse-server start` to work on systems with `systemd` like Centos 8. Close [#14298](https://github.com/ClickHouse/ClickHouse/issues/14298). Close [#17799](https://github.com/ClickHouse/ClickHouse/issues/17799). [#25921](https://github.com/ClickHouse/ClickHouse/pull/25921) ([alexey-milovidov](https://github.com/alexey-milovidov)). - -#### Bug Fixes - -* Fix incorrect `SET ROLE` in some cases. [#26707](https://github.com/ClickHouse/ClickHouse/pull/26707) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix potential `nullptr` dereference in window functions. Fix [#25276](https://github.com/ClickHouse/ClickHouse/issues/25276). [#26668](https://github.com/ClickHouse/ClickHouse/pull/26668) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Fix incorrect function names of `groupBitmapAnd/Or/Xor`. Fix [#26557](https://github.com/ClickHouse/ClickHouse/pull/26557) ([Amos Bird](https://github.com/amosbird)). -* Fix crash in RabbitMQ shutdown in case RabbitMQ setup was not started. Closes [#26504](https://github.com/ClickHouse/ClickHouse/issues/26504). [#26529](https://github.com/ClickHouse/ClickHouse/pull/26529) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix issues with `CREATE DICTIONARY` query if dictionary name or database name was quoted. Closes [#26491](https://github.com/ClickHouse/ClickHouse/issues/26491). [#26508](https://github.com/ClickHouse/ClickHouse/pull/26508) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix broken name resolution after rewriting column aliases. Fix [#26432](https://github.com/ClickHouse/ClickHouse/issues/26432). [#26475](https://github.com/ClickHouse/ClickHouse/pull/26475) ([Amos Bird](https://github.com/amosbird)). -* Fix infinite non-joined block stream in `partial_merge_join` close [#26325](https://github.com/ClickHouse/ClickHouse/issues/26325). [#26374](https://github.com/ClickHouse/ClickHouse/pull/26374) ([Vladimir C](https://github.com/vdimir)). -* Fix possible crash when login as dropped user. Fix [#26073](https://github.com/ClickHouse/ClickHouse/issues/26073). [#26363](https://github.com/ClickHouse/ClickHouse/pull/26363) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix `optimize_distributed_group_by_sharding_key` for multiple columns (leads to incorrect result w/ `optimize_skip_unused_shards=1`/`allow_nondeterministic_optimize_skip_unused_shards=1` and multiple columns in sharding key expression). [#26353](https://github.com/ClickHouse/ClickHouse/pull/26353) ([Azat Khuzhin](https://github.com/azat)). -* `CAST` from `Date` to `DateTime` (or `DateTime64`) was not using the timezone of the `DateTime` type. It can also affect the comparison between `Date` and `DateTime`. Inference of the common type for `Date` and `DateTime` also was not using the corresponding timezone. It affected the results of function `if` and array construction. Closes [#24128](https://github.com/ClickHouse/ClickHouse/issues/24128). [#24129](https://github.com/ClickHouse/ClickHouse/pull/24129) ([Maksim Kita](https://github.com/kitaisreal)). -* Fixed rare bug in lost replica recovery that may cause replicas to diverge. [#26321](https://github.com/ClickHouse/ClickHouse/pull/26321) ([tavplubix](https://github.com/tavplubix)). -* Fix zstd decompression in case there are escape sequences at the end of internal buffer. Closes [#26013](https://github.com/ClickHouse/ClickHouse/issues/26013). [#26314](https://github.com/ClickHouse/ClickHouse/pull/26314) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix logical error on join with totals, close [#26017](https://github.com/ClickHouse/ClickHouse/issues/26017). [#26250](https://github.com/ClickHouse/ClickHouse/pull/26250) ([Vladimir C](https://github.com/vdimir)). -* Remove excessive newline in `thread_name` column in `system.stack_trace` table. Fix [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix `joinGet` with `LowCarinality` columns, close [#25993](https://github.com/ClickHouse/ClickHouse/issues/25993). [#26118](https://github.com/ClickHouse/ClickHouse/pull/26118) ([Vladimir C](https://github.com/vdimir)). -* Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix throwing exception when iterate over non-existing remote directory. [#26087](https://github.com/ClickHouse/ClickHouse/pull/26087) ([ianton-ru](https://github.com/ianton-ru)). -* Fix rare server crash because of `abort` in ZooKeeper client. Fixes [#25813](https://github.com/ClickHouse/ClickHouse/issues/25813). [#26079](https://github.com/ClickHouse/ClickHouse/pull/26079) ([alesapin](https://github.com/alesapin)). -* Fix wrong thread count estimation for right subquery join in some cases. Close [#24075](https://github.com/ClickHouse/ClickHouse/issues/24075). [#26052](https://github.com/ClickHouse/ClickHouse/pull/26052) ([Vladimir C](https://github.com/vdimir)). -* Fixed incorrect `sequence_id` in MySQL protocol packets that ClickHouse sends on exception during query execution. It might cause MySQL client to reset connection to ClickHouse server. Fixes [#21184](https://github.com/ClickHouse/ClickHouse/issues/21184). [#26051](https://github.com/ClickHouse/ClickHouse/pull/26051) ([tavplubix](https://github.com/tavplubix)). -* Fix possible mismatched header when using normal projection with `PREWHERE`. Fix [#26020](https://github.com/ClickHouse/ClickHouse/issues/26020). [#26038](https://github.com/ClickHouse/ClickHouse/pull/26038) ([Amos Bird](https://github.com/amosbird)). -* Fix formatting of type `Map` with integer keys to `JSON`. [#25982](https://github.com/ClickHouse/ClickHouse/pull/25982) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible deadlock during query profiler stack unwinding. Fix [#25968](https://github.com/ClickHouse/ClickHouse/issues/25968). [#25970](https://github.com/ClickHouse/ClickHouse/pull/25970) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crash on call `dictGet()` with bad arguments. [#25913](https://github.com/ClickHouse/ClickHouse/pull/25913) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed `scram-sha-256` authentication for PostgreSQL engines. Closes [#24516](https://github.com/ClickHouse/ClickHouse/issues/24516). [#25906](https://github.com/ClickHouse/ClickHouse/pull/25906) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix extremely long backoff for background tasks when the background pool is full. Fixes [#25836](https://github.com/ClickHouse/ClickHouse/issues/25836). [#25893](https://github.com/ClickHouse/ClickHouse/pull/25893) ([alesapin](https://github.com/alesapin)). -* Fix ARM exception handling with non default page size. Fixes [#25512](https://github.com/ClickHouse/ClickHouse/issues/25512), [#25044](https://github.com/ClickHouse/ClickHouse/issues/25044), [#24901](https://github.com/ClickHouse/ClickHouse/issues/24901), [#23183](https://github.com/ClickHouse/ClickHouse/issues/23183), [#20221](https://github.com/ClickHouse/ClickHouse/issues/20221), [#19703](https://github.com/ClickHouse/ClickHouse/issues/19703), [#19028](https://github.com/ClickHouse/ClickHouse/issues/19028), [#18391](https://github.com/ClickHouse/ClickHouse/issues/18391), [#18121](https://github.com/ClickHouse/ClickHouse/issues/18121), [#17994](https://github.com/ClickHouse/ClickHouse/issues/17994), [#12483](https://github.com/ClickHouse/ClickHouse/issues/12483). [#25854](https://github.com/ClickHouse/ClickHouse/pull/25854) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix sharding_key from column w/o function for `remote()` (before `select * from remote('127.1', system.one, dummy)` leads to `Unknown column: dummy, there are only columns .` error). [#25824](https://github.com/ClickHouse/ClickHouse/pull/25824) ([Azat Khuzhin](https://github.com/azat)). -* Fixed `Not found column ...` and `Missing column ...` errors when selecting from `MaterializeMySQL`. Fixes [#23708](https://github.com/ClickHouse/ClickHouse/issues/23708), [#24830](https://github.com/ClickHouse/ClickHouse/issues/24830), [#25794](https://github.com/ClickHouse/ClickHouse/issues/25794). [#25822](https://github.com/ClickHouse/ClickHouse/pull/25822) ([tavplubix](https://github.com/tavplubix)). -* Fix `optimize_skip_unused_shards_rewrite_in` for non-UInt64 types (may select incorrect shards eventually or throw `Cannot infer type of an empty tuple` or `Function tuple requires at least one argument`). [#25798](https://github.com/ClickHouse/ClickHouse/pull/25798) ([Azat Khuzhin](https://github.com/azat)). -* Fix rare bug with `DROP PART` query for `ReplicatedMergeTree` tables which can lead to error message `Unexpected merged part intersecting drop range`. [#25783](https://github.com/ClickHouse/ClickHouse/pull/25783) ([alesapin](https://github.com/alesapin)). -* Fix bug in `TTL` with `GROUP BY` expression which refuses to execute `TTL` after first execution in part. [#25743](https://github.com/ClickHouse/ClickHouse/pull/25743) ([alesapin](https://github.com/alesapin)). -* Allow StorageMerge to access tables with aliases. Closes [#6051](https://github.com/ClickHouse/ClickHouse/issues/6051). [#25694](https://github.com/ClickHouse/ClickHouse/pull/25694) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix slow dict join in some cases, close [#24209](https://github.com/ClickHouse/ClickHouse/issues/24209). [#25618](https://github.com/ClickHouse/ClickHouse/pull/25618) ([Vladimir C](https://github.com/vdimir)). -* Fix `ALTER MODIFY COLUMN` of columns, which participates in TTL expressions. [#25554](https://github.com/ClickHouse/ClickHouse/pull/25554) ([Anton Popov](https://github.com/CurtizJ)). -* Fix assertion in `PREWHERE` with non-UInt8 type, close [#19589](https://github.com/ClickHouse/ClickHouse/issues/19589). [#25484](https://github.com/ClickHouse/ClickHouse/pull/25484) ([Vladimir C](https://github.com/vdimir)). -* Fix some fuzzed msan crash. Fixes [#22517](https://github.com/ClickHouse/ClickHouse/issues/22517). [#26428](https://github.com/ClickHouse/ClickHouse/pull/26428) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Update `chown` cmd check in `clickhouse-server` docker entrypoint. It fixes error 'cluster pod restart failed (or timeout)' on kubernetes. [#26545](https://github.com/ClickHouse/ClickHouse/pull/26545) ([Ky Li](https://github.com/Kylinrix)). - - -### ClickHouse release v21.7, 2021-07-09 - -#### Backward Incompatible Change - -* Improved performance of queries with explicitly defined large sets. Added compatibility setting `legacy_column_name_of_tuple_literal`. It makes sense to set it to `true`, while doing rolling update of cluster from version lower than 21.7 to any higher version. Otherwise distributed queries with explicitly defined sets at `IN` clause may fail during update. [#25371](https://github.com/ClickHouse/ClickHouse/pull/25371) ([Anton Popov](https://github.com/CurtizJ)). -* Forward/backward incompatible change of maximum buffer size in clickhouse-keeper (an experimental alternative to ZooKeeper). Better to do it now (before production), than later. [#25421](https://github.com/ClickHouse/ClickHouse/pull/25421) ([alesapin](https://github.com/alesapin)). - -#### New Feature - -* Support configuration in YAML format as alternative to XML. This closes [#3607](https://github.com/ClickHouse/ClickHouse/issues/3607). [#21858](https://github.com/ClickHouse/ClickHouse/pull/21858) ([BoloniniD](https://github.com/BoloniniD)). -* Provides a way to restore replicated table when the data is (possibly) present, but the ZooKeeper metadata is lost. Resolves [#13458](https://github.com/ClickHouse/ClickHouse/issues/13458). [#13652](https://github.com/ClickHouse/ClickHouse/pull/13652) ([Mike Kot](https://github.com/myrrc)). -* Support structs and maps in Arrow/Parquet/ORC and dictionaries in Arrow input/output formats. Present new setting `output_format_arrow_low_cardinality_as_dictionary`. [#24341](https://github.com/ClickHouse/ClickHouse/pull/24341) ([Kruglov Pavel](https://github.com/Avogar)). -* Added support for `Array` type in dictionaries. [#25119](https://github.com/ClickHouse/ClickHouse/pull/25119) ([Maksim Kita](https://github.com/kitaisreal)). -* Added function `bitPositionsToArray`. Closes [#23792](https://github.com/ClickHouse/ClickHouse/issues/23792). Author [Kevin Wan] (@MaxWk). [#25394](https://github.com/ClickHouse/ClickHouse/pull/25394) ([Maksim Kita](https://github.com/kitaisreal)). -* Added function `dateName` to return names like 'Friday' or 'April'. Author [Daniil Kondratyev] (@dankondr). [#25372](https://github.com/ClickHouse/ClickHouse/pull/25372) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `toJSONString` function to serialize columns to their JSON representations. [#25164](https://github.com/ClickHouse/ClickHouse/pull/25164) ([Amos Bird](https://github.com/amosbird)). -* Now `query_log` has two new columns: `initial_query_start_time`, `initial_query_start_time_microsecond` that record the starting time of a distributed query if any. [#25022](https://github.com/ClickHouse/ClickHouse/pull/25022) ([Amos Bird](https://github.com/amosbird)). -* Add aggregate function `segmentLengthSum`. [#24250](https://github.com/ClickHouse/ClickHouse/pull/24250) ([flynn](https://github.com/ucasfl)). -* Add a new boolean setting `prefer_global_in_and_join` which defaults all IN/JOIN as GLOBAL IN/JOIN. [#23434](https://github.com/ClickHouse/ClickHouse/pull/23434) ([Amos Bird](https://github.com/amosbird)). -* Support `ALTER DELETE` queries for `Join` table engine. [#23260](https://github.com/ClickHouse/ClickHouse/pull/23260) ([foolchi](https://github.com/foolchi)). -* Add `quantileBFloat16` aggregate function as well as the corresponding `quantilesBFloat16` and `medianBFloat16`. It is very simple and fast quantile estimator with relative error not more than 0.390625%. This closes [#16641](https://github.com/ClickHouse/ClickHouse/issues/16641). [#23204](https://github.com/ClickHouse/ClickHouse/pull/23204) ([Ivan Novitskiy](https://github.com/RedClusive)). -* Implement `sequenceNextNode()` function useful for `flow analysis`. [#19766](https://github.com/ClickHouse/ClickHouse/pull/19766) ([achimbab](https://github.com/achimbab)). - -#### Experimental Feature - -* Add support for virtual filesystem over HDFS. [#11058](https://github.com/ClickHouse/ClickHouse/pull/11058) ([overshov](https://github.com/overshov)) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Now clickhouse-keeper (an experimental alternative to ZooKeeper) supports ZooKeeper-like `digest` ACLs. [#24448](https://github.com/ClickHouse/ClickHouse/pull/24448) ([alesapin](https://github.com/alesapin)). - -#### Performance Improvement - -* Added optimization that transforms some functions to reading of subcolumns to reduce amount of read data. E.g., statement `col IS NULL` is transformed to reading of subcolumn `col.null`. Optimization can be enabled by setting `optimize_functions_to_subcolumns` which is currently off by default. [#24406](https://github.com/ClickHouse/ClickHouse/pull/24406) ([Anton Popov](https://github.com/CurtizJ)). -* Rewrite more columns to possible alias expressions. This may enable better optimization, such as projections. [#24405](https://github.com/ClickHouse/ClickHouse/pull/24405) ([Amos Bird](https://github.com/amosbird)). -* Index of type `bloom_filter` can be used for expressions with `hasAny` function with constant arrays. This closes: [#24291](https://github.com/ClickHouse/ClickHouse/issues/24291). [#24900](https://github.com/ClickHouse/ClickHouse/pull/24900) ([Vasily Nemkov](https://github.com/Enmk)). -* Add exponential backoff to reschedule read attempt in case RabbitMQ queues are empty. (ClickHouse has support for importing data from RabbitMQ). Closes [#24340](https://github.com/ClickHouse/ClickHouse/issues/24340). [#24415](https://github.com/ClickHouse/ClickHouse/pull/24415) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Improvement - -* Allow to limit bandwidth for replication. Add two Replicated\*MergeTree settings: `max_replicated_fetches_network_bandwidth` and `max_replicated_sends_network_bandwidth` which allows to limit maximum speed of replicated fetches/sends for table. Add two server-wide settings (in `default` user profile): `max_replicated_fetches_network_bandwidth_for_server` and `max_replicated_sends_network_bandwidth_for_server` which limit maximum speed of replication for all tables. The settings are not followed perfectly accurately. Turned off by default. Fixes [#1821](https://github.com/ClickHouse/ClickHouse/issues/1821). [#24573](https://github.com/ClickHouse/ClickHouse/pull/24573) ([alesapin](https://github.com/alesapin)). -* Resource constraints and isolation for ODBC and Library bridges. Use separate `clickhouse-bridge` group and user for bridge processes. Set oom_score_adj so the bridges will be first subjects for OOM killer. Set set maximum RSS to 1 GiB. Closes [#23861](https://github.com/ClickHouse/ClickHouse/issues/23861). [#25280](https://github.com/ClickHouse/ClickHouse/pull/25280) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add standalone `clickhouse-keeper` symlink to the main `clickhouse` binary. Now it's possible to run coordination without the main clickhouse server. [#24059](https://github.com/ClickHouse/ClickHouse/pull/24059) ([alesapin](https://github.com/alesapin)). -* Use global settings for query to `VIEW`. Fixed the behavior when queries to `VIEW` use local settings, that leads to errors if setting on `CREATE VIEW` and `SELECT` were different. As for now, `VIEW` won't use these modified settings, but you can still pass additional settings in `SETTINGS` section of `CREATE VIEW` query. Close [#20551](https://github.com/ClickHouse/ClickHouse/issues/20551). [#24095](https://github.com/ClickHouse/ClickHouse/pull/24095) ([Vladimir](https://github.com/vdimir)). -* On server start, parts with incorrect partition ID would not be ever removed, but always detached. [#25070](https://github.com/ClickHouse/ClickHouse/issues/25070). [#25166](https://github.com/ClickHouse/ClickHouse/pull/25166) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Increase size of background schedule pool to 128 (`background_schedule_pool_size` setting). It allows avoiding replication queue hung on slow zookeeper connection. [#25072](https://github.com/ClickHouse/ClickHouse/pull/25072) ([alesapin](https://github.com/alesapin)). -* Add merge tree setting `max_parts_to_merge_at_once` which limits the number of parts that can be merged in the background at once. Doesn't affect `OPTIMIZE FINAL` query. Fixes [#1820](https://github.com/ClickHouse/ClickHouse/issues/1820). [#24496](https://github.com/ClickHouse/ClickHouse/pull/24496) ([alesapin](https://github.com/alesapin)). -* Allow `NOT IN` operator to be used in partition pruning. [#24894](https://github.com/ClickHouse/ClickHouse/pull/24894) ([Amos Bird](https://github.com/amosbird)). -* Recognize IPv4 addresses like `127.0.1.1` as local. This is controversial and closes [#23504](https://github.com/ClickHouse/ClickHouse/issues/23504). Michael Filimonov will test this feature. [#24316](https://github.com/ClickHouse/ClickHouse/pull/24316) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* ClickHouse database created with MaterializeMySQL (it is an experimental feature) now contains all column comments from the MySQL database that materialized. [#25199](https://github.com/ClickHouse/ClickHouse/pull/25199) ([Storozhuk Kostiantyn](https://github.com/sand6255)). -* Add settings (`connection_auto_close`/`connection_max_tries`/`connection_pool_size`) for MySQL storage engine. [#24146](https://github.com/ClickHouse/ClickHouse/pull/24146) ([Azat Khuzhin](https://github.com/azat)). -* Improve startup time of Distributed engine. [#25663](https://github.com/ClickHouse/ClickHouse/pull/25663) ([Azat Khuzhin](https://github.com/azat)). -* Improvement for Distributed tables. Drop replicas from dirname for internal_replication=true (allows INSERT into Distributed with cluster from any number of replicas, before only 15 replicas was supported, everything more will fail with ENAMETOOLONG while creating directory for async blocks). [#25513](https://github.com/ClickHouse/ClickHouse/pull/25513) ([Azat Khuzhin](https://github.com/azat)). -* Added support `Interval` type for `LowCardinality`. It is needed for intermediate values of some expressions. Closes [#21730](https://github.com/ClickHouse/ClickHouse/issues/21730). [#25410](https://github.com/ClickHouse/ClickHouse/pull/25410) ([Vladimir](https://github.com/vdimir)). -* Add `==` operator on time conditions for `sequenceMatch` and `sequenceCount` functions. For eg: sequenceMatch('(?1)(?t==1)(?2)')(time, data = 1, data = 2). [#25299](https://github.com/ClickHouse/ClickHouse/pull/25299) ([Christophe Kalenzaga](https://github.com/mga-chka)). -* Add settings `http_max_fields`, `http_max_field_name_size`, `http_max_field_value_size`. [#25296](https://github.com/ClickHouse/ClickHouse/pull/25296) ([Ivan](https://github.com/abyss7)). -* Add support for function `if` with `Decimal` and `Int` types on its branches. This closes [#20549](https://github.com/ClickHouse/ClickHouse/issues/20549). This closes [#10142](https://github.com/ClickHouse/ClickHouse/issues/10142). [#25283](https://github.com/ClickHouse/ClickHouse/pull/25283) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Update prompt in `clickhouse-client` and display a message when reconnecting. This closes [#10577](https://github.com/ClickHouse/ClickHouse/issues/10577). [#25281](https://github.com/ClickHouse/ClickHouse/pull/25281) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Correct memory tracking in aggregate function `topK`. This closes [#25259](https://github.com/ClickHouse/ClickHouse/issues/25259). [#25260](https://github.com/ClickHouse/ClickHouse/pull/25260) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix `topLevelDomain` for IDN hosts (i.e. `example.рф`), before it returns empty string for such hosts. [#25103](https://github.com/ClickHouse/ClickHouse/pull/25103) ([Azat Khuzhin](https://github.com/azat)). -* Detect Linux kernel version at runtime (for worked nested epoll, that is required for `async_socket_for_remote`/`use_hedged_requests`, otherwise remote queries may stuck). [#25067](https://github.com/ClickHouse/ClickHouse/pull/25067) ([Azat Khuzhin](https://github.com/azat)). -* For distributed query, when `optimize_skip_unused_shards=1`, allow to skip shard with condition like `(sharding key) IN (one-element-tuple)`. (Tuples with many elements were supported. Tuple with single element did not work because it is parsed as literal). [#24930](https://github.com/ClickHouse/ClickHouse/pull/24930) ([Amos Bird](https://github.com/amosbird)). -* Improved log messages of S3 errors, no more double whitespaces in case of empty keys and buckets. [#24897](https://github.com/ClickHouse/ClickHouse/pull/24897) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Some queries require multi-pass semantic analysis. Try reusing built sets for `IN` in this case. [#24874](https://github.com/ClickHouse/ClickHouse/pull/24874) ([Amos Bird](https://github.com/amosbird)). -* Respect `max_distributed_connections` for `insert_distributed_sync` (otherwise for huge clusters and sync insert it may run out of `max_thread_pool_size`). [#24754](https://github.com/ClickHouse/ClickHouse/pull/24754) ([Azat Khuzhin](https://github.com/azat)). -* Avoid hiding errors like `Limit for rows or bytes to read exceeded` for scalar subqueries. [#24545](https://github.com/ClickHouse/ClickHouse/pull/24545) ([nvartolomei](https://github.com/nvartolomei)). -* Make String-to-Int parser stricter so that `toInt64('+')` will throw. [#24475](https://github.com/ClickHouse/ClickHouse/pull/24475) ([Amos Bird](https://github.com/amosbird)). -* If `SSD_CACHE` is created with DDL query, it can be created only inside `user_files` directory. [#24466](https://github.com/ClickHouse/ClickHouse/pull/24466) ([Maksim Kita](https://github.com/kitaisreal)). -* PostgreSQL support for specifying non default schema for insert queries. Closes [#24149](https://github.com/ClickHouse/ClickHouse/issues/24149). [#24413](https://github.com/ClickHouse/ClickHouse/pull/24413) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix IPv6 addresses resolving (i.e. fixes `select * from remote('[::1]', system.one)`). [#24319](https://github.com/ClickHouse/ClickHouse/pull/24319) ([Azat Khuzhin](https://github.com/azat)). -* Fix trailing whitespaces in FROM clause with subqueries in multiline mode, and also changes the output of the queries slightly in a more human friendly way. [#24151](https://github.com/ClickHouse/ClickHouse/pull/24151) ([Azat Khuzhin](https://github.com/azat)). -* Improvement for Distributed tables. Add ability to split distributed batch on failures (i.e. due to memory limits, corruptions), under `distributed_directory_monitor_split_batch_on_failure` (OFF by default). [#23864](https://github.com/ClickHouse/ClickHouse/pull/23864) ([Azat Khuzhin](https://github.com/azat)). -* Handle column name clashes for `Join` table engine. Closes [#20309](https://github.com/ClickHouse/ClickHouse/issues/20309). [#23769](https://github.com/ClickHouse/ClickHouse/pull/23769) ([Vladimir](https://github.com/vdimir)). -* Display progress for `File` table engine in `clickhouse-local` and on INSERT query in `clickhouse-client` when data is passed to stdin. Closes [#18209](https://github.com/ClickHouse/ClickHouse/issues/18209). [#23656](https://github.com/ClickHouse/ClickHouse/pull/23656) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Bugfixes and improvements of `clickhouse-copier`. Allow to copy tables with different (but compatible schemas). Closes [#9159](https://github.com/ClickHouse/ClickHouse/issues/9159). Added test to copy ReplacingMergeTree. Closes [#22711](https://github.com/ClickHouse/ClickHouse/issues/22711). Support TTL on columns and Data Skipping Indices. It simply removes it to create internal Distributed table (underlying table will have TTL and skipping indices). Closes [#19384](https://github.com/ClickHouse/ClickHouse/issues/19384). Allow to copy MATERIALIZED and ALIAS columns. There are some cases in which it could be helpful (e.g. if this column is in PRIMARY KEY). Now it could be allowed by setting `allow_to_copy_alias_and_materialized_columns` property to true in task configuration. Closes [#9177](https://github.com/ClickHouse/ClickHouse/issues/9177). Closes [#11007] (https://github.com/ClickHouse/ClickHouse/issues/11007). Closes [#9514](https://github.com/ClickHouse/ClickHouse/issues/9514). Added a property `allow_to_drop_target_partitions` in task configuration to drop partition in original table before moving helping tables. Closes [#20957](https://github.com/ClickHouse/ClickHouse/issues/20957). Get rid of `OPTIMIZE DEDUPLICATE` query. This hack was needed, because `ALTER TABLE MOVE PARTITION` was retried many times and plain MergeTree tables don't have deduplication. Closes [#17966](https://github.com/ClickHouse/ClickHouse/issues/17966). Write progress to ZooKeeper node on path `task_path + /status` in JSON format. Closes [#20955](https://github.com/ClickHouse/ClickHouse/issues/20955). Support for ReplicatedTables without arguments. Closes [#24834](https://github.com/ClickHouse/ClickHouse/issues/24834) .[#23518](https://github.com/ClickHouse/ClickHouse/pull/23518) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Added sleep with backoff between read retries from S3. [#23461](https://github.com/ClickHouse/ClickHouse/pull/23461) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Respect `insert_allow_materialized_columns` (allows materialized columns) for INSERT into `Distributed` table. [#23349](https://github.com/ClickHouse/ClickHouse/pull/23349) ([Azat Khuzhin](https://github.com/azat)). -* Add ability to push down LIMIT for distributed queries. [#23027](https://github.com/ClickHouse/ClickHouse/pull/23027) ([Azat Khuzhin](https://github.com/azat)). -* Fix zero-copy replication with several S3 volumes (Fixes [#22679](https://github.com/ClickHouse/ClickHouse/issues/22679)). [#22864](https://github.com/ClickHouse/ClickHouse/pull/22864) ([ianton-ru](https://github.com/ianton-ru)). -* Resolve the actual port number bound when a user requests any available port from the operating system to show it in the log message. [#25569](https://github.com/ClickHouse/ClickHouse/pull/25569) ([bnaecker](https://github.com/bnaecker)). -* Fixed case, when sometimes conversion of postgres arrays resulted in String data type, not n-dimensional array, because `attndims` works incorrectly in some cases. Closes [#24804](https://github.com/ClickHouse/ClickHouse/issues/24804). [#25538](https://github.com/ClickHouse/ClickHouse/pull/25538) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix convertion of DateTime with timezone for MySQL, PostgreSQL, ODBC. Closes [#5057](https://github.com/ClickHouse/ClickHouse/issues/5057). [#25528](https://github.com/ClickHouse/ClickHouse/pull/25528) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Distinguish KILL MUTATION for different tables (fixes unexpected `Cancelled mutating parts` error). [#25025](https://github.com/ClickHouse/ClickHouse/pull/25025) ([Azat Khuzhin](https://github.com/azat)). -* Allow to declare S3 disk at root of bucket (S3 virtual filesystem is an experimental feature under development). [#24898](https://github.com/ClickHouse/ClickHouse/pull/24898) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Enable reading of subcolumns (e.g. components of Tuples) for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). -* A feature for MySQL compatibility protocol: make `user` function to return correct output. Closes [#25697](https://github.com/ClickHouse/ClickHouse/pull/25697). [#25697](https://github.com/ClickHouse/ClickHouse/pull/25697) ([sundyli](https://github.com/sundy-li)). - -#### Bug Fix - -* Improvement for backward compatibility. Use old modulo function version when used in partition key. Closes [#23508](https://github.com/ClickHouse/ClickHouse/issues/23508). [#24157](https://github.com/ClickHouse/ClickHouse/pull/24157) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix extremely rare bug on low-memory servers which can lead to the inability to perform merges without restart. Possibly fixes [#24603](https://github.com/ClickHouse/ClickHouse/issues/24603). [#24872](https://github.com/ClickHouse/ClickHouse/pull/24872) ([alesapin](https://github.com/alesapin)). -* Fix extremely rare error `Tagging already tagged part` in replication queue during concurrent `alter move/replace partition`. Possibly fixes [#22142](https://github.com/ClickHouse/ClickHouse/issues/22142). [#24961](https://github.com/ClickHouse/ClickHouse/pull/24961) ([alesapin](https://github.com/alesapin)). -* Fix potential crash when calculating aggregate function states by aggregation of aggregate function states of other aggregate functions (not a practical use case). See [#24523](https://github.com/ClickHouse/ClickHouse/issues/24523). [#25015](https://github.com/ClickHouse/ClickHouse/pull/25015) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` does not finish. This was detected on server with extremely low amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix bug which can lead to ZooKeeper client hung inside clickhouse-server. [#24721](https://github.com/ClickHouse/ClickHouse/pull/24721) ([alesapin](https://github.com/alesapin)). -* If ZooKeeper connection was lost and replica was cloned after restoring the connection, its replication queue might contain outdated entries. Fixed failed assertion when replication queue contains intersecting virtual parts. It may rarely happen if some data part was lost. Print error in log instead of terminating. [#24777](https://github.com/ClickHouse/ClickHouse/pull/24777) ([tavplubix](https://github.com/tavplubix)). -* Fix lost `WHERE` condition in expression-push-down optimization of query plan (setting `query_plan_filter_push_down = 1` by default). Fixes [#25368](https://github.com/ClickHouse/ClickHouse/issues/25368). [#25370](https://github.com/ClickHouse/ClickHouse/pull/25370) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug which can lead to intersecting parts after merges with TTL: `Part all_40_40_0 is covered by all_40_40_1 but should be merged into all_40_41_1. This shouldn't happen often.`. [#25549](https://github.com/ClickHouse/ClickHouse/pull/25549) ([alesapin](https://github.com/alesapin)). -* On ZooKeeper connection loss `ReplicatedMergeTree` table might wait for background operations to complete before trying to reconnect. It's fixed, now background operations are stopped forcefully. [#25306](https://github.com/ClickHouse/ClickHouse/pull/25306) ([tavplubix](https://github.com/tavplubix)). -* Fix error `Key expression contains comparison between inconvertible types` for queries with `ARRAY JOIN` in case if array is used in primary key. Fixes [#8247](https://github.com/ClickHouse/ClickHouse/issues/8247). [#25546](https://github.com/ClickHouse/ClickHouse/pull/25546) ([Anton Popov](https://github.com/CurtizJ)). -* Fix wrong totals for query `WITH TOTALS` and `WITH FILL`. Fixes [#20872](https://github.com/ClickHouse/ClickHouse/issues/20872). [#25539](https://github.com/ClickHouse/ClickHouse/pull/25539) ([Anton Popov](https://github.com/CurtizJ)). -* Fix data race when querying `system.clusters` while reloading the cluster configuration at the same time. [#25737](https://github.com/ClickHouse/ClickHouse/pull/25737) ([Amos Bird](https://github.com/amosbird)). -* Fixed `No such file or directory` error on moving `Distributed` table between databases. Fixes [#24971](https://github.com/ClickHouse/ClickHouse/issues/24971). [#25667](https://github.com/ClickHouse/ClickHouse/pull/25667) ([tavplubix](https://github.com/tavplubix)). -* `REPLACE PARTITION` might be ignored in rare cases if the source partition was empty. It's fixed. Fixes [#24869](https://github.com/ClickHouse/ClickHouse/issues/24869). [#25665](https://github.com/ClickHouse/ClickHouse/pull/25665) ([tavplubix](https://github.com/tavplubix)). -* Fixed a bug in `Replicated` database engine that might rarely cause some replica to skip enqueued DDL query. [#24805](https://github.com/ClickHouse/ClickHouse/pull/24805) ([tavplubix](https://github.com/tavplubix)). -* Fix null pointer dereference in `EXPLAIN AST` without query. [#25631](https://github.com/ClickHouse/ClickHouse/pull/25631) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix waiting of automatic dropping of empty parts. It could lead to full filling of background pool and stuck of replication. [#23315](https://github.com/ClickHouse/ClickHouse/pull/23315) ([Anton Popov](https://github.com/CurtizJ)). -* Fix restore of a table stored in S3 virtual filesystem (it is an experimental feature not ready for production). [#25601](https://github.com/ClickHouse/ClickHouse/pull/25601) ([ianton-ru](https://github.com/ianton-ru)). -* Fix nullptr dereference in `Arrow` format when using `Decimal256`. Add `Decimal256` support for `Arrow` format. [#25531](https://github.com/ClickHouse/ClickHouse/pull/25531) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix excessive underscore before the names of the preprocessed configuration files. [#25431](https://github.com/ClickHouse/ClickHouse/pull/25431) ([Vitaly Baranov](https://github.com/vitlibar)). -* A fix for `clickhouse-copier` tool: Fix segfault when sharding_key is absent in task config for copier. [#25419](https://github.com/ClickHouse/ClickHouse/pull/25419) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix `REPLACE` column transformer when used in DDL by correctly quoting the formated query. This fixes [#23925](https://github.com/ClickHouse/ClickHouse/issues/23925). [#25391](https://github.com/ClickHouse/ClickHouse/pull/25391) ([Amos Bird](https://github.com/amosbird)). -* Fix the possibility of non-deterministic behaviour of the `quantileDeterministic` function and similar. This closes [#20480](https://github.com/ClickHouse/ClickHouse/issues/20480). [#25313](https://github.com/ClickHouse/ClickHouse/pull/25313) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support `SimpleAggregateFunction(LowCardinality)` for `SummingMergeTree`. Fixes [#25134](https://github.com/ClickHouse/ClickHouse/issues/25134). [#25300](https://github.com/ClickHouse/ClickHouse/pull/25300) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix logical error with exception message "Cannot sum Array/Tuple in min/maxMap". [#25298](https://github.com/ClickHouse/ClickHouse/pull/25298) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix error `Bad cast from type DB::ColumnLowCardinality to DB::ColumnVector` for queries where `LowCardinality` argument was used for IN (this bug appeared in 21.6). Fixes [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187). [#25290](https://github.com/ClickHouse/ClickHouse/pull/25290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix incorrect behaviour of `joinGetOrNull` with not-nullable columns. This fixes [#24261](https://github.com/ClickHouse/ClickHouse/issues/24261). [#25288](https://github.com/ClickHouse/ClickHouse/pull/25288) ([Amos Bird](https://github.com/amosbird)). -* Fix incorrect behaviour and UBSan report in big integers. In previous versions `CAST(1e19 AS UInt128)` returned zero. [#25279](https://github.com/ClickHouse/ClickHouse/pull/25279) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed an error which occurred while inserting a subset of columns using CSVWithNames format. Fixes [#25129](https://github.com/ClickHouse/ClickHouse/issues/25129). [#25169](https://github.com/ClickHouse/ClickHouse/pull/25169) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Do not use table's projection for `SELECT` with `FINAL`. It is not supported yet. [#25163](https://github.com/ClickHouse/ClickHouse/pull/25163) ([Amos Bird](https://github.com/amosbird)). -* Fix possible parts loss after updating up to 21.5 in case table used `UUID` in partition key. (It is not recommended to use `UUID` in partition key). Fixes [#25070](https://github.com/ClickHouse/ClickHouse/issues/25070). [#25127](https://github.com/ClickHouse/ClickHouse/pull/25127) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix crash in query with cross join and `joined_subquery_requires_alias = 0`. Fixes [#24011](https://github.com/ClickHouse/ClickHouse/issues/24011). [#25082](https://github.com/ClickHouse/ClickHouse/pull/25082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug with constant maps in mapContains function that lead to error `empty column was returned by function mapContains`. Closes [#25077](https://github.com/ClickHouse/ClickHouse/issues/25077). [#25080](https://github.com/ClickHouse/ClickHouse/pull/25080) ([Kruglov Pavel](https://github.com/Avogar)). -* Remove possibility to create tables with columns referencing themselves like `a UInt32 ALIAS a + 1` or `b UInt32 MATERIALIZED b`. Fixes [#24910](https://github.com/ClickHouse/ClickHouse/issues/24910), [#24292](https://github.com/ClickHouse/ClickHouse/issues/24292). [#25059](https://github.com/ClickHouse/ClickHouse/pull/25059) ([alesapin](https://github.com/alesapin)). -* Fix wrong result when using aggregate projection with *not empty* `GROUP BY` key to execute query with `GROUP BY` by *empty* key. [#25055](https://github.com/ClickHouse/ClickHouse/pull/25055) ([Amos Bird](https://github.com/amosbird)). -* Fix serialization of splitted nested messages in Protobuf format. This PR fixes [#24647](https://github.com/ClickHouse/ClickHouse/issues/24647). [#25000](https://github.com/ClickHouse/ClickHouse/pull/25000) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix limit/offset settings for distributed queries (ignore on the remote nodes). [#24940](https://github.com/ClickHouse/ClickHouse/pull/24940) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible heap-buffer-overflow in `Arrow` format. [#24922](https://github.com/ClickHouse/ClickHouse/pull/24922) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed possible error 'Cannot read from istream at offset 0' when reading a file from DiskS3 (S3 virtual filesystem is an experimental feature under development that should not be used in production). [#24885](https://github.com/ClickHouse/ClickHouse/pull/24885) ([Pavel Kovalenko](https://github.com/Jokser)). -* Fix "Missing columns" exception when joining Distributed Materialized View. [#24870](https://github.com/ClickHouse/ClickHouse/pull/24870) ([Azat Khuzhin](https://github.com/azat)). -* Allow `NULL` values in postgresql compatibility protocol. Closes [#22622](https://github.com/ClickHouse/ClickHouse/issues/22622). [#24857](https://github.com/ClickHouse/ClickHouse/pull/24857) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix bug when exception `Mutation was killed` can be thrown to the client on mutation wait when mutation not loaded into memory yet. [#24809](https://github.com/ClickHouse/ClickHouse/pull/24809) ([alesapin](https://github.com/alesapin)). -* Fixed bug in deserialization of random generator state with might cause some data types such as `AggregateFunction(groupArraySample(N), T))` to behave in a non-deterministic way. [#24538](https://github.com/ClickHouse/ClickHouse/pull/24538) ([tavplubix](https://github.com/tavplubix)). -* Disallow building uniqXXXXStates of other aggregation states. [#24523](https://github.com/ClickHouse/ClickHouse/pull/24523) ([Raúl Marín](https://github.com/Algunenano)). Then allow it back by actually eliminating the root cause of the related issue. ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix usage of tuples in `CREATE .. AS SELECT` queries. [#24464](https://github.com/ClickHouse/ClickHouse/pull/24464) ([Anton Popov](https://github.com/CurtizJ)). -* Fix computation of total bytes in `Buffer` table. In current ClickHouse version total_writes.bytes counter decreases too much during the buffer flush. It leads to counter overflow and totalBytes return something around 17.44 EB some time after the flush. [#24450](https://github.com/ClickHouse/ClickHouse/pull/24450) ([DimasKovas](https://github.com/DimasKovas)). -* Fix incorrect information about the monotonicity of toWeek function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/5212 , and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). -* When user authentication is managed by LDAP. Fixed potential deadlock that can happen during LDAP role (re)mapping, when LDAP group is mapped to a nonexistent local role. [#24431](https://github.com/ClickHouse/ClickHouse/pull/24431) ([Denis Glazachev](https://github.com/traceon)). -* In "multipart/form-data" message consider the CRLF preceding a boundary as part of it. Fixes [#23905](https://github.com/ClickHouse/ClickHouse/issues/23905). [#24399](https://github.com/ClickHouse/ClickHouse/pull/24399) ([Ivan](https://github.com/abyss7)). -* Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). -* Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([tavplubix](https://github.com/tavplubix)). -* Allow empty HTTP headers. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). -* Correct processing of mutations (ALTER UPDATE/DELETE) in Memory tables. Closes [#24274](https://github.com/ClickHouse/ClickHouse/issues/24274). [#24275](https://github.com/ClickHouse/ClickHouse/pull/24275) ([flynn](https://github.com/ucasfl)). -* Make column LowCardinality property in JOIN output the same as in the input, close [#23351](https://github.com/ClickHouse/ClickHouse/issues/23351), close [#20315](https://github.com/ClickHouse/ClickHouse/issues/20315). [#24061](https://github.com/ClickHouse/ClickHouse/pull/24061) ([Vladimir](https://github.com/vdimir)). -* A fix for Kafka tables. Fix the bug in failover behavior when Engine = Kafka was not able to start consumption if the same consumer had an empty assignment previously. Closes [#21118](https://github.com/ClickHouse/ClickHouse/issues/21118). [#21267](https://github.com/ClickHouse/ClickHouse/pull/21267) ([filimonov](https://github.com/filimonov)). - -#### Build/Testing/Packaging Improvement - -* Add `darwin-aarch64` (Mac M1 / Apple Silicon) builds in CI [#25560](https://github.com/ClickHouse/ClickHouse/pull/25560) ([Ivan](https://github.com/abyss7)) and put the links to the docs and website ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Adds cross-platform embedding of binary resources into executables. It works on Illumos. [#25146](https://github.com/ClickHouse/ClickHouse/pull/25146) ([bnaecker](https://github.com/bnaecker)). -* Add join related options to stress tests to improve fuzzing. [#25200](https://github.com/ClickHouse/ClickHouse/pull/25200) ([Vladimir](https://github.com/vdimir)). -* Enable build with s3 module in osx [#25217](https://github.com/ClickHouse/ClickHouse/issues/25217). [#25218](https://github.com/ClickHouse/ClickHouse/pull/25218) ([kevin wan](https://github.com/MaxWk)). -* Add integration test cases to cover JDBC bridge. [#25047](https://github.com/ClickHouse/ClickHouse/pull/25047) ([Zhichun Wu](https://github.com/zhicwu)). -* Integration tests configuration has special treatment for dictionaries. Removed remaining dictionaries manual setup. [#24728](https://github.com/ClickHouse/ClickHouse/pull/24728) ([Ilya Yatsishin](https://github.com/qoega)). -* Add libfuzzer tests for YAMLParser class. [#24480](https://github.com/ClickHouse/ClickHouse/pull/24480) ([BoloniniD](https://github.com/BoloniniD)). -* Ubuntu 20.04 is now used to run integration tests, docker-compose version used to run integration tests is updated to 1.28.2. Environment variables now take effect on docker-compose. Rework test_dictionaries_all_layouts_separate_sources to allow parallel run. [#20393](https://github.com/ClickHouse/ClickHouse/pull/20393) ([Ilya Yatsishin](https://github.com/qoega)). -* Fix TOCTOU error in installation script. [#25277](https://github.com/ClickHouse/ClickHouse/pull/25277) ([alexey-milovidov](https://github.com/alexey-milovidov)). - - -### ClickHouse release 21.6, 2021-06-05 +### ClickHouse release v22.1, 2022-01-18 #### Upgrade Notes -* `zstd` compression library is updated to v1.5.0. You may get messages about "checksum does not match" in replication. These messages are expected due to update of compression algorithm and you can ignore them. These messages are informational and do not indicate any kinds of undesired behaviour. -* The setting `compile_expressions` is enabled by default. Although it has been heavily tested on variety of scenarios, if you find some undesired behaviour on your servers, you can try turning this setting off. -* Values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`. +* The functions `left` and `right` were previously implemented in parser and now full-featured. Distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Resource usage by scalar subqueries is fully accounted since this version. With this change, rows read in scalar subqueries are now reported in the query_log. If the scalar subquery is cached (repeated or called for several rows) the rows read are only counted once. This change allows KILLing queries and reporting progress while they are executing scalar subqueries. [#32271](https://github.com/ClickHouse/ClickHouse/pull/32271) ([Raúl Marín](https://github.com/Algunenano)). #### New Feature -* Add Postgres-like cast operator (`::`). E.g.: `[1, 2]::Array(UInt8)`, `0.1::Decimal(4, 4)`, `number::UInt16`. [#23871](https://github.com/ClickHouse/ClickHouse/pull/23871) ([Anton Popov](https://github.com/CurtizJ)). -* Make big integers production ready. Add support for `UInt128` data type. Fix known issues with the `Decimal256` data type. Support big integers in dictionaries. Support `gcd`/`lcm` functions for big integers. Support big integers in array search and conditional functions. Support `LowCardinality(UUID)`. Support big integers in `generateRandom` table function and `clickhouse-obfuscator`. Fix error with returning `UUID` from scalar subqueries. This fixes [#7834](https://github.com/ClickHouse/ClickHouse/issues/7834). This fixes [#23936](https://github.com/ClickHouse/ClickHouse/issues/23936). This fixes [#4176](https://github.com/ClickHouse/ClickHouse/issues/4176). This fixes [#24018](https://github.com/ClickHouse/ClickHouse/issues/24018). Backward incompatible change: values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`. [#23631](https://github.com/ClickHouse/ClickHouse/pull/23631) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support `Array` data type for inserting and selecting data in `Arrow`, `Parquet` and `ORC` formats. [#21770](https://github.com/ClickHouse/ClickHouse/pull/21770) ([taylor12805](https://github.com/taylor12805)). -* Implement table comments. Closes [#23225](https://github.com/ClickHouse/ClickHouse/issues/23225). [#23548](https://github.com/ClickHouse/ClickHouse/pull/23548) ([flynn](https://github.com/ucasFL)). -* Support creating dictionaries with DDL queries in `clickhouse-local`. Closes [#22354](https://github.com/ClickHouse/ClickHouse/issues/22354). Added support for `DETACH DICTIONARY PERMANENTLY`. Added support for `EXCHANGE DICTIONARIES` for `Atomic` database engine. Added support for moving dictionaries between databases using `RENAME DICTIONARY`. [#23436](https://github.com/ClickHouse/ClickHouse/pull/23436) ([Maksim Kita](https://github.com/kitaisreal)). -* Add aggregate function `uniqTheta` to support [Theta Sketch](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html) in ClickHouse. [#23894](https://github.com/ClickHouse/ClickHouse/pull/23894). [#22609](https://github.com/ClickHouse/ClickHouse/pull/22609) ([Ping Yu](https://github.com/pingyu)). -* Add function `splitByRegexp`. [#24077](https://github.com/ClickHouse/ClickHouse/pull/24077) ([abel-cheng](https://github.com/abel-cheng)). -* Add function `arrayProduct` which accept an array as the parameter, and return the product of all the elements in array. Closes [#21613](https://github.com/ClickHouse/ClickHouse/issues/21613). [#23782](https://github.com/ClickHouse/ClickHouse/pull/23782) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `thread_name` column in `system.stack_trace`. This closes [#23256](https://github.com/ClickHouse/ClickHouse/issues/23256). [#24124](https://github.com/ClickHouse/ClickHouse/pull/24124) ([abel-cheng](https://github.com/abel-cheng)). -* If `insert_null_as_default` = 1, insert default values instead of NULL in `INSERT ... SELECT` and `INSERT ... SELECT ... UNION ALL ...` queries. Closes [#22832](https://github.com/ClickHouse/ClickHouse/issues/22832). [#23524](https://github.com/ClickHouse/ClickHouse/pull/23524) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add support for progress indication in `clickhouse-local` with `--progress` option. [#23196](https://github.com/ClickHouse/ClickHouse/pull/23196) ([Egor Savin](https://github.com/Amesaru)). -* Add support for HTTP compression (determined by `Content-Encoding` HTTP header) in `http` dictionary source. This fixes [#8912](https://github.com/ClickHouse/ClickHouse/issues/8912). [#23946](https://github.com/ClickHouse/ClickHouse/pull/23946) ([FArthur-cmd](https://github.com/FArthur-cmd)). -* Added `SYSTEM QUERY RELOAD MODEL`, `SYSTEM QUERY RELOAD MODELS`. Closes [#18722](https://github.com/ClickHouse/ClickHouse/issues/18722). [#23182](https://github.com/ClickHouse/ClickHouse/pull/23182) ([Maksim Kita](https://github.com/kitaisreal)). -* Add setting `json` (boolean, 0 by default) for `EXPLAIN PLAN` query. When enabled, query output will be a single `JSON` row. It is recommended to use `TSVRaw` format to avoid unnecessary escaping. [#23082](https://github.com/ClickHouse/ClickHouse/pull/23082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Add setting `indexes` (boolean, disabled by default) to `EXPLAIN PIPELINE` query. When enabled, shows used indexes, number of filtered parts and granules for every index applied. Supported for `MergeTree*` tables. [#22352](https://github.com/ClickHouse/ClickHouse/pull/22352) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* LDAP: implemented user DN detection functionality to use when mapping Active Directory groups to ClickHouse roles. [#22228](https://github.com/ClickHouse/ClickHouse/pull/22228) ([Denis Glazachev](https://github.com/traceon)). -* New aggregate function `deltaSumTimestamp` for summing the difference between consecutive rows while maintaining ordering during merge by storing timestamps. [#21888](https://github.com/ClickHouse/ClickHouse/pull/21888) ([Russ Frank](https://github.com/rf)). -* Added less secure IMDS credentials provider for S3 which works under docker correctly. [#21852](https://github.com/ClickHouse/ClickHouse/pull/21852) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Add back `indexHint` function. This is for [#21238](https://github.com/ClickHouse/ClickHouse/issues/21238). This reverts [#9542](https://github.com/ClickHouse/ClickHouse/pull/9542). This fixes [#9540](https://github.com/ClickHouse/ClickHouse/issues/9540). [#21304](https://github.com/ClickHouse/ClickHouse/pull/21304) ([Amos Bird](https://github.com/amosbird)). +* Implement data schema inference for input formats. Allow to skip structure (or write just `auto`) in table functions `file`, `url`, `s3`, `hdfs` and in parameters of `clickhouse-local` . Allow to skip structure in create query for table engines `File`, `HDFS`, `S3`, `URL`, `Merge`, `Buffer`, `Distributed` and `ReplicatedMergeTree` (if we add new replicas). [#32455](https://github.com/ClickHouse/ClickHouse/pull/32455) ([Kruglov Pavel](https://github.com/Avogar)). +* Detect format by file extension in `file`/`hdfs`/`s3`/`url` table functions and `HDFS`/`S3`/`URL` table engines and also for `SELECT INTO OUTFILE` and `INSERT FROM INFILE` [#33565](https://github.com/ClickHouse/ClickHouse/pull/33565) ([Kruglov Pavel](https://github.com/Avogar)). Close [#30918](https://github.com/ClickHouse/ClickHouse/issues/30918). [#33443](https://github.com/ClickHouse/ClickHouse/pull/33443) ([OnePiece](https://github.com/zhongyuankai)). +* A tool for collecting diagnostics data if you need support. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). +* Automatic cluster discovery via Zoo/Keeper. It allows to add replicas to the cluster without changing configuration on every server. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([vdimir](https://github.com/vdimir)). +* Implement hive table engine to access apache hive from clickhouse. This implements: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([taiyang-li](https://github.com/taiyang-li)). +* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorical values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([alexey-milovidov](https://github.com/alexey-milovidov)). Initial implementation by TODO +* Added table function `hdfsCluster` which allows processing files from HDFS in parallel from many nodes in a specified cluster, similarly to `s3Cluster`. [#32400](https://github.com/ClickHouse/ClickHouse/pull/32400) ([Zhichang Yu](https://github.com/yuzhichang)). +* Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)). +* Allow `COMMENT` in `CREATE VIEW` (for all VIEW kinds). [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)). +* Dynamically reinitialize listening ports and protocols when configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Added `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add new functions for `H3` coordinate system: `h3HexAreaKm2`, `h3CellAreaM2`, `h3CellAreaRads2`. [#33479](https://github.com/ClickHouse/ClickHouse/pull/33479) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `MONTHNAME` function. [#33436](https://github.com/ClickHouse/ClickHouse/pull/33436) ([usurai](https://github.com/usurai)). +* Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)). +* Add function `decodeURLFormComponent` slightly different to `decodeURLComponent`. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). +* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). +* Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). -#### Experimental Feature - -* Add `PROJECTION` support for `MergeTree*` tables. [#20202](https://github.com/ClickHouse/ClickHouse/pull/20202) ([Amos Bird](https://github.com/amosbird)). #### Performance Improvement -* Enable `compile_expressions` setting by default. When this setting enabled, compositions of simple functions and operators will be compiled to native code with LLVM at runtime. [#8482](https://github.com/ClickHouse/ClickHouse/pull/8482) ([Maksim Kita](https://github.com/kitaisreal), [alexey-milovidov](https://github.com/alexey-milovidov)). Note: if you feel in trouble, turn this option off. -* Update `re2` library. Performance of regular expressions matching is improved. Also this PR adds compatibility with gcc-11. [#24196](https://github.com/ClickHouse/ClickHouse/pull/24196) ([Raúl Marín](https://github.com/Algunenano)). -* ORC input format reading by stripe instead of reading entire table into memory by once which is cost memory when file size is huge. [#23102](https://github.com/ClickHouse/ClickHouse/pull/23102) ([Chao Ma](https://github.com/godliness)). -* Fusion of aggregate functions `sum`, `count` and `avg` in a query into single aggregate function. The optimization is controlled with the `optimize_fuse_sum_count_avg` setting. This is implemented with a new aggregate function `sumCount`. This function returns a tuple of two fields: `sum` and `count`. [#21337](https://github.com/ClickHouse/ClickHouse/pull/21337) ([hexiaoting](https://github.com/hexiaoting)). -* Update `zstd` to v1.5.0. The performance of compression is improved for single digits percentage. [#24135](https://github.com/ClickHouse/ClickHouse/pull/24135) ([Raúl Marín](https://github.com/Algunenano)). Note: you may get messages about "checksum does not match" in replication. These messages are expected due to update of compression algorithm and you can ignore them. -* Improved performance of `Buffer` tables: do not acquire lock for total_bytes/total_rows for `Buffer` engine. [#24066](https://github.com/ClickHouse/ClickHouse/pull/24066) ([Azat Khuzhin](https://github.com/azat)). -* Preallocate support for hashed/sparse_hashed dictionaries is returned. [#23979](https://github.com/ClickHouse/ClickHouse/pull/23979) ([Azat Khuzhin](https://github.com/azat)). -* Enable `async_socket_for_remote` by default (lower amount of threads in querying Distributed tables with large fanout). [#23683](https://github.com/ClickHouse/ClickHouse/pull/23683) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -#### Improvement - -* Add `_partition_value` virtual column to MergeTree table family. It can be used to prune partition in a deterministic way. It's needed to implement partition matcher for mutations. [#23673](https://github.com/ClickHouse/ClickHouse/pull/23673) ([Amos Bird](https://github.com/amosbird)). -* Added `region` parameter for S3 storage and disk. [#23846](https://github.com/ClickHouse/ClickHouse/pull/23846) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Allow configuring different log levels for different logging channels. Closes [#19569](https://github.com/ClickHouse/ClickHouse/issues/19569). [#23857](https://github.com/ClickHouse/ClickHouse/pull/23857) ([filimonov](https://github.com/filimonov)). -* Keep default timezone on `DateTime` operations if it was not provided explicitly. For example, if you add one second to a value of `DateTime` type without timezone it will remain `DateTime` without timezone. In previous versions the value of default timezone was placed to the returned data type explicitly so it becomes DateTime('something'). This closes [#4854](https://github.com/ClickHouse/ClickHouse/issues/4854). [#23392](https://github.com/ClickHouse/ClickHouse/pull/23392) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow user to specify empty string instead of database name for `MySQL` storage. Default database will be used for queries. In previous versions it was working for SELECT queries and not support for INSERT was also added. This closes [#19281](https://github.com/ClickHouse/ClickHouse/issues/19281). This can be useful working with `Sphinx` or other MySQL-compatible foreign databases. [#23319](https://github.com/ClickHouse/ClickHouse/pull/23319) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Function `now64` now supports optional timezone argument. [#24091](https://github.com/ClickHouse/ClickHouse/pull/24091) ([Vasily Nemkov](https://github.com/Enmk)). -* Fix the case when a progress bar in interactive mode in clickhouse-client that appear in the middle of the data may rewrite some parts of visible data in terminal. This closes [#19283](https://github.com/ClickHouse/ClickHouse/issues/19283). [#23050](https://github.com/ClickHouse/ClickHouse/pull/23050) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix crash when memory allocation fails in simdjson. https://github.com/simdjson/simdjson/pull/1567 . Mark as improvement because it's a very rare bug. [#24147](https://github.com/ClickHouse/ClickHouse/pull/24147) ([Amos Bird](https://github.com/amosbird)). -* Preserve dictionaries until storage shutdown (this will avoid possible `external dictionary 'DICT' not found` errors at server shutdown during final flush of the `Buffer` engine). [#24068](https://github.com/ClickHouse/ClickHouse/pull/24068) ([Azat Khuzhin](https://github.com/azat)). -* Flush `Buffer` tables before shutting down tables (within one database), to avoid discarding blocks due to underlying table had been already detached (and `Destination table default.a_data_01870 doesn't exist. Block of data is discarded` error in the log). [#24067](https://github.com/ClickHouse/ClickHouse/pull/24067) ([Azat Khuzhin](https://github.com/azat)). -* Now `prefer_column_name_to_alias = 1` will also favor column names for `group by`, `having` and `order by`. This fixes [#23882](https://github.com/ClickHouse/ClickHouse/issues/23882). [#24022](https://github.com/ClickHouse/ClickHouse/pull/24022) ([Amos Bird](https://github.com/amosbird)). -* Add support for `ORDER BY WITH FILL` with `DateTime64`. [#24016](https://github.com/ClickHouse/ClickHouse/pull/24016) ([kevin wan](https://github.com/MaxWk)). -* Enable `DateTime64` to be a version column in `ReplacingMergeTree`. [#23992](https://github.com/ClickHouse/ClickHouse/pull/23992) ([kevin wan](https://github.com/MaxWk)). -* Log information about OS name, kernel version and CPU architecture on server startup. [#23988](https://github.com/ClickHouse/ClickHouse/pull/23988) ([Azat Khuzhin](https://github.com/azat)). -* Support specifying table schema for `postgresql` dictionary source. Closes [#23958](https://github.com/ClickHouse/ClickHouse/issues/23958). [#23980](https://github.com/ClickHouse/ClickHouse/pull/23980) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add hints for names of `Enum` elements (suggest names in case of typos). Closes [#17112](https://github.com/ClickHouse/ClickHouse/issues/17112). [#23919](https://github.com/ClickHouse/ClickHouse/pull/23919) ([flynn](https://github.com/ucasFL)). -* Measure found rate (the percentage for which the value was found) for dictionaries (see `found_rate` in `system.dictionaries`). [#23916](https://github.com/ClickHouse/ClickHouse/pull/23916) ([Azat Khuzhin](https://github.com/azat)). -* Allow to add specific queue settings via table settng `rabbitmq_queue_settings_list`. (Closes [#23737](https://github.com/ClickHouse/ClickHouse/issues/23737) and [#23918](https://github.com/ClickHouse/ClickHouse/issues/23918)). Allow user to control all RabbitMQ setup: if table setting `rabbitmq_queue_consume` is set to `1` - RabbitMQ table engine will only connect to specified queue and will not perform any RabbitMQ consumer-side setup like declaring exchange, queues, bindings. (Closes [#21757](https://github.com/ClickHouse/ClickHouse/issues/21757)). Add proper cleanup when RabbitMQ table is dropped - delete queues, which the table has declared and all bound exchanges - if they were created by the table. [#23887](https://github.com/ClickHouse/ClickHouse/pull/23887) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add `broken_data_files`/`broken_data_compressed_bytes` into `system.distribution_queue`. Add metric for number of files for asynchronous insertion into Distributed tables that has been marked as broken (`BrokenDistributedFilesToInsert`). [#23885](https://github.com/ClickHouse/ClickHouse/pull/23885) ([Azat Khuzhin](https://github.com/azat)). -* Querying `system.tables` does not go to ZooKeeper anymore. [#23793](https://github.com/ClickHouse/ClickHouse/pull/23793) ([Fuwang Hu](https://github.com/fuwhu)). -* Respect `lock_acquire_timeout_for_background_operations` for `OPTIMIZE` queries. [#23623](https://github.com/ClickHouse/ClickHouse/pull/23623) ([Azat Khuzhin](https://github.com/azat)). -* Possibility to change `S3` disk settings in runtime via new `SYSTEM RESTART DISK` SQL command. [#23429](https://github.com/ClickHouse/ClickHouse/pull/23429) ([Pavel Kovalenko](https://github.com/Jokser)). -* If user applied a misconfiguration by mistakenly setting `max_distributed_connections` to value zero, every query to a `Distributed` table will throw exception with a message containing "logical error". But it's really an expected behaviour, not a logical error, so the exception message was slightly incorrect. It also triggered checks in our CI enviroment that ensures that no logical errors ever happen. Instead we will treat `max_distributed_connections` misconfigured to zero as the minimum possible value (one). [#23348](https://github.com/ClickHouse/ClickHouse/pull/23348) ([Azat Khuzhin](https://github.com/azat)). -* Disable `min_bytes_to_use_mmap_io` by default. [#23322](https://github.com/ClickHouse/ClickHouse/pull/23322) ([Azat Khuzhin](https://github.com/azat)). -* Support `LowCardinality` nullability with `join_use_nulls`, close [#15101](https://github.com/ClickHouse/ClickHouse/issues/15101). [#23237](https://github.com/ClickHouse/ClickHouse/pull/23237) ([vdimir](https://github.com/vdimir)). -* Added possibility to restore `MergeTree` parts to `detached` directory for `S3` disk. [#23112](https://github.com/ClickHouse/ClickHouse/pull/23112) ([Pavel Kovalenko](https://github.com/Jokser)). -* Retries on HTTP connection drops in S3. [#22988](https://github.com/ClickHouse/ClickHouse/pull/22988) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Add settings `external_storage_max_read_rows` and `external_storage_max_read_rows` for MySQL table engine, dictionary source and MaterializeMySQL minor data fetches. [#22697](https://github.com/ClickHouse/ClickHouse/pull/22697) ([TCeason](https://github.com/TCeason)). -* `MaterializeMySQL` (experimental feature): Previously, MySQL 5.7.9 was not supported due to SQL incompatibility. Now leave MySQL parameter verification to the MaterializeMySQL. [#23413](https://github.com/ClickHouse/ClickHouse/pull/23413) ([TCeason](https://github.com/TCeason)). -* Enable reading of subcolumns for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). -* Fix usage of tuples in `CREATE .. AS SELECT` queries. [#24464](https://github.com/ClickHouse/ClickHouse/pull/24464) ([Anton Popov](https://github.com/CurtizJ)). -* Support for `Parquet` format in `Kafka` tables. [#23412](https://github.com/ClickHouse/ClickHouse/pull/23412) ([Chao Ma](https://github.com/godliness)). - -#### Bug Fix - -* Use old modulo function version when used in partition key and primary key. Closes [#23508](https://github.com/ClickHouse/ClickHouse/issues/23508). [#24157](https://github.com/ClickHouse/ClickHouse/pull/24157) ([Kseniia Sumarokova](https://github.com/kssenii)). It was a source of backward incompatibility in previous releases. -* Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` is being processed infinitely. This was detected on server with extremely little amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix incorrect monotonicity of `toWeek` function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in [#5212](https://github.com/ClickHouse/ClickHouse/pull/5212), and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). -* Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). -* Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([tavplubix](https://github.com/tavplubix)). -* Allow empty HTTP headers in client requests. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). -* Set `max_threads = 1` to fix mutation fail of `Memory` tables. Closes [#24274](https://github.com/ClickHouse/ClickHouse/issues/24274). [#24275](https://github.com/ClickHouse/ClickHouse/pull/24275) ([flynn](https://github.com/ucasFL)). -* Fix typo in implementation of `Memory` tables, this bug was introduced at [#15127](https://github.com/ClickHouse/ClickHouse/issues/15127). Closes [#24192](https://github.com/ClickHouse/ClickHouse/issues/24192). [#24193](https://github.com/ClickHouse/ClickHouse/pull/24193) ([张中南](https://github.com/plugine)). -* Fix abnormal server termination due to `HDFS` becoming not accessible during query execution. Closes [#24117](https://github.com/ClickHouse/ClickHouse/issues/24117). [#24191](https://github.com/ClickHouse/ClickHouse/pull/24191) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix crash on updating of `Nested` column with const condition. [#24183](https://github.com/ClickHouse/ClickHouse/pull/24183) ([hexiaoting](https://github.com/hexiaoting)). -* Fix race condition which could happen in RBAC under a heavy load. This PR fixes [#24090](https://github.com/ClickHouse/ClickHouse/issues/24090), [#24134](https://github.com/ClickHouse/ClickHouse/issues/24134),. [#24176](https://github.com/ClickHouse/ClickHouse/pull/24176) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix a rare bug that could lead to a partially initialized table that can serve write requests (insert/alter/so on). Now such tables will be in readonly mode. [#24122](https://github.com/ClickHouse/ClickHouse/pull/24122) ([alesapin](https://github.com/alesapin)). -* Fix an issue: `EXPLAIN PIPELINE` with `SELECT xxx FINAL` showed a wrong pipeline. ([hexiaoting](https://github.com/hexiaoting)). -* Fixed using const `DateTime` value vs `DateTime64` column in `WHERE`. [#24100](https://github.com/ClickHouse/ClickHouse/pull/24100) ([Vasily Nemkov](https://github.com/Enmk)). -* Fix crash in merge JOIN, closes [#24010](https://github.com/ClickHouse/ClickHouse/issues/24010). [#24013](https://github.com/ClickHouse/ClickHouse/pull/24013) ([vdimir](https://github.com/vdimir)). -* Some `ALTER PARTITION` queries might cause `Part A intersects previous part B` and `Unexpected merged part C intersecting drop range D` errors in replication queue. It's fixed. Fixes [#23296](https://github.com/ClickHouse/ClickHouse/issues/23296). [#23997](https://github.com/ClickHouse/ClickHouse/pull/23997) ([tavplubix](https://github.com/tavplubix)). -* Fix SIGSEGV for external GROUP BY and overflow row (i.e. queries like `SELECT FROM GROUP BY WITH TOTALS SETTINGS max_bytes_before_external_group_by>0, max_rows_to_group_by>0, group_by_overflow_mode='any', totals_mode='before_having'`). [#23962](https://github.com/ClickHouse/ClickHouse/pull/23962) ([Azat Khuzhin](https://github.com/azat)). -* Fix keys metrics accounting for `CACHE` dictionary with duplicates in the source (leads to `DictCacheKeysRequestedMiss` overflows). [#23929](https://github.com/ClickHouse/ClickHouse/pull/23929) ([Azat Khuzhin](https://github.com/azat)). -* Fix implementation of connection pool of `PostgreSQL` engine. Closes [#23897](https://github.com/ClickHouse/ClickHouse/issues/23897). [#23909](https://github.com/ClickHouse/ClickHouse/pull/23909) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix `distributed_group_by_no_merge = 2` with `GROUP BY` and aggregate function wrapped into regular function (had been broken in [#23546](https://github.com/ClickHouse/ClickHouse/issues/23546)). Throw exception in case of someone trying to use `distributed_group_by_no_merge = 2` with window functions. Disable `optimize_distributed_group_by_sharding_key` for queries with window functions. [#23906](https://github.com/ClickHouse/ClickHouse/pull/23906) ([Azat Khuzhin](https://github.com/azat)). -* A fix for `s3` table function: better handling of HTTP errors. Response bodies of HTTP errors were being ignored earlier. [#23844](https://github.com/ClickHouse/ClickHouse/pull/23844) ([Vladimir Chebotarev](https://github.com/excitoon)). -* A fix for `s3` table function: better handling of URI's. Fixed an incompatibility with URLs containing `+` symbol, data with such keys could not be read previously. [#23822](https://github.com/ClickHouse/ClickHouse/pull/23822) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Fix error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` and `use_hedged_requests`. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix `CLEAR COLUMN` does not work when it is referenced by materialized view. Close [#23764](https://github.com/ClickHouse/ClickHouse/issues/23764). [#23781](https://github.com/ClickHouse/ClickHouse/pull/23781) ([flynn](https://github.com/ucasFL)). -* Fix heap use after free when reading from HDFS if `Values` format is used. [#23761](https://github.com/ClickHouse/ClickHouse/pull/23761) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). -* Fixed a bug in recovery of staled `ReplicatedMergeTree` replica. Some metadata updates could be ignored by staled replica if `ALTER` query was executed during downtime of the replica. [#23742](https://github.com/ClickHouse/ClickHouse/pull/23742) ([tavplubix](https://github.com/tavplubix)). -* Fix a bug with `Join` and `WITH TOTALS`, close [#17718](https://github.com/ClickHouse/ClickHouse/issues/17718). [#23549](https://github.com/ClickHouse/ClickHouse/pull/23549) ([vdimir](https://github.com/vdimir)). -* Fix possible `Block structure mismatch` error for queries with `UNION` which could possibly happen after filter-pushdown optimization. Fixes [#23029](https://github.com/ClickHouse/ClickHouse/issues/23029). [#23359](https://github.com/ClickHouse/ClickHouse/pull/23359) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Add type conversion when the setting `optimize_skip_unused_shards_rewrite_in` is enabled. This fixes MSan report. [#23219](https://github.com/ClickHouse/ClickHouse/pull/23219) ([Azat Khuzhin](https://github.com/azat)). -* Add a missing check when updating nested subcolumns, close issue: [#22353](https://github.com/ClickHouse/ClickHouse/issues/22353). [#22503](https://github.com/ClickHouse/ClickHouse/pull/22503) ([hexiaoting](https://github.com/hexiaoting)). - -#### Build/Testing/Packaging Improvement - -* Support building on Illumos. [#24144](https://github.com/ClickHouse/ClickHouse/pull/24144). Adds support for building on Solaris-derived operating systems. [#23746](https://github.com/ClickHouse/ClickHouse/pull/23746) ([bnaecker](https://github.com/bnaecker)). -* Add more benchmarks for hash tables, including the Swiss Table from Google (that appeared to be slower than ClickHouse hash map in our specific usage scenario). [#24111](https://github.com/ClickHouse/ClickHouse/pull/24111) ([Maksim Kita](https://github.com/kitaisreal)). -* Update librdkafka 1.6.0-RC3 to 1.6.1. [#23874](https://github.com/ClickHouse/ClickHouse/pull/23874) ([filimonov](https://github.com/filimonov)). -* Always enable `asynchronous-unwind-tables` explicitly. It may fix query profiler on AArch64. [#23602](https://github.com/ClickHouse/ClickHouse/pull/23602) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Avoid possible build dependency on locale and filesystem order. This allows reproducible builds. [#23600](https://github.com/ClickHouse/ClickHouse/pull/23600) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove a source of nondeterminism from build. Now builds at different point of time will produce byte-identical binaries. Partially addressed [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#23559](https://github.com/ClickHouse/ClickHouse/pull/23559) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add simple tool for benchmarking (Zoo)Keeper. [#23038](https://github.com/ClickHouse/ClickHouse/pull/23038) ([alesapin](https://github.com/alesapin)). - - -## ClickHouse release 21.5, 2021-05-20 - -#### Backward Incompatible Change - -* Change comparison of integers and floating point numbers when integer is not exactly representable in the floating point data type. In new version comparison will return false as the rounding error will occur. Example: `9223372036854775808.0 != 9223372036854775808`, because the number `9223372036854775808` is not representable as floating point number exactly (and `9223372036854775808.0` is rounded to `9223372036854776000.0`). But in previous version the comparison will return as the numbers are equal, because if the floating point number `9223372036854776000.0` get converted back to UInt64, it will yield `9223372036854775808`. For the reference, the Python programming language also treats these numbers as equal. But this behaviour was dependend on CPU model (different results on AMD64 and AArch64 for some out-of-range numbers), so we make the comparison more precise. It will treat int and float numbers equal only if int is represented in floating point type exactly. [#22595](https://github.com/ClickHouse/ClickHouse/pull/22595) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove support for `argMin` and `argMax` for single `Tuple` argument. The code was not memory-safe. The feature was added by mistake and it is confusing for people. These functions can be reintroduced under different names later. This fixes [#22384](https://github.com/ClickHouse/ClickHouse/issues/22384) and reverts [#17359](https://github.com/ClickHouse/ClickHouse/issues/17359). [#23393](https://github.com/ClickHouse/ClickHouse/pull/23393) ([alexey-milovidov](https://github.com/alexey-milovidov)). - -#### New Feature - -* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Improved performance of `dictGetHierarchy`, `dictIsIn` functions. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)). -* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)). -* Added a table function `s3Cluster`, which allows to process files from `s3` in parallel on every node of a specified cluster. [#22012](https://github.com/ClickHouse/ClickHouse/pull/22012) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Added support for replicas and shards in MySQL/PostgreSQL table engine / table function. You can write `SELECT * FROM mysql('host{1,2}-{1|2}', ...)`. Closes [#20969](https://github.com/ClickHouse/ClickHouse/issues/20969). [#22217](https://github.com/ClickHouse/ClickHouse/pull/22217) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added `ALTER TABLE ... FETCH PART ...` query. It's similar to `FETCH PARTITION`, but fetches only one part. [#22706](https://github.com/ClickHouse/ClickHouse/pull/22706) ([turbo jason](https://github.com/songenjie)). -* Added a setting `max_distributed_depth` that limits the depth of recursive queries to `Distributed` tables. Closes [#20229](https://github.com/ClickHouse/ClickHouse/issues/20229). [#21942](https://github.com/ClickHouse/ClickHouse/pull/21942) ([flynn](https://github.com/ucasFL)). - -#### Performance Improvement - -* Improved performance of `intDiv` by dynamic dispatch for AVX2. This closes [#22314](https://github.com/ClickHouse/ClickHouse/issues/22314). [#23000](https://github.com/ClickHouse/ClickHouse/pull/23000) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improved performance of reading from `ArrowStream` input format for sources other then local file (e.g. URL). [#22673](https://github.com/ClickHouse/ClickHouse/pull/22673) ([nvartolomei](https://github.com/nvartolomei)). -* Disabled compression by default when interacting with localhost (with clickhouse-client or server to server with distributed queries) via native protocol. It may improve performance of some import/export operations. This closes [#22234](https://github.com/ClickHouse/ClickHouse/issues/22234). [#22237](https://github.com/ClickHouse/ClickHouse/pull/22237) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Exclude values that does not belong to the shard from right part of IN section for distributed queries (under `optimize_skip_unused_shards_rewrite_in`, enabled by default, since it still requires `optimize_skip_unused_shards`). [#21511](https://github.com/ClickHouse/ClickHouse/pull/21511) ([Azat Khuzhin](https://github.com/azat)). -* Improved performance of reading a subset of columns with File-like table engine and column-oriented format like Parquet, Arrow or ORC. This closes [#issue:20129](https://github.com/ClickHouse/ClickHouse/issues/20129). [#21302](https://github.com/ClickHouse/ClickHouse/pull/21302) ([keenwolf](https://github.com/keen-wolf)). -* Allow to move more conditions to `PREWHERE` as it was before version 21.1 (adjustment of internal heuristics). Insufficient number of moved condtions could lead to worse performance. [#23397](https://github.com/ClickHouse/ClickHouse/pull/23397) ([Anton Popov](https://github.com/CurtizJ)). -* Improved performance of ODBC connections and fixed all the outstanding issues from the backlog. Using `nanodbc` library instead of `Poco::ODBC`. Closes [#9678](https://github.com/ClickHouse/ClickHouse/issues/9678). Add support for DateTime64 and Decimal* for ODBC table engine. Closes [#21961](https://github.com/ClickHouse/ClickHouse/issues/21961). Fixed issue with cyrillic text being truncated. Closes [#16246](https://github.com/ClickHouse/ClickHouse/issues/16246). Added connection pools for odbc bridge. [#21972](https://github.com/ClickHouse/ClickHouse/pull/21972) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Improvement - -* Increase `max_uri_size` (the maximum size of URL in HTTP interface) to 1 MiB by default. This closes [#21197](https://github.com/ClickHouse/ClickHouse/issues/21197). [#22997](https://github.com/ClickHouse/ClickHouse/pull/22997) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Set `background_fetches_pool_size` to `8` that is better for production usage with frequent small insertions or slow ZooKeeper cluster. [#22945](https://github.com/ClickHouse/ClickHouse/pull/22945) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* FlatDictionary added `initial_array_size`, `max_array_size` options. [#22521](https://github.com/ClickHouse/ClickHouse/pull/22521) ([Maksim Kita](https://github.com/kitaisreal)). -* Add new setting `non_replicated_deduplication_window` for non-replicated MergeTree inserts deduplication. [#22514](https://github.com/ClickHouse/ClickHouse/pull/22514) ([alesapin](https://github.com/alesapin)). -* Update paths to the `CatBoost` model configs in config reloading. [#22434](https://github.com/ClickHouse/ClickHouse/pull/22434) ([Kruglov Pavel](https://github.com/Avogar)). -* Added `Decimal256` type support in dictionaries. `Decimal256` is experimental feature. Closes [#20979](https://github.com/ClickHouse/ClickHouse/issues/20979). [#22960](https://github.com/ClickHouse/ClickHouse/pull/22960) ([Maksim Kita](https://github.com/kitaisreal)). -* Enabled `async_socket_for_remote` by default (using less amount of OS threads for distributed queries). [#23683](https://github.com/ClickHouse/ClickHouse/pull/23683) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Make function name `unhex` case insensitive for compatibility with MySQL. [#23229](https://github.com/ClickHouse/ClickHouse/pull/23229) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Implement functions `arrayHasAny`, `arrayHasAll`, `has`, `indexOf`, `countEqual` for generic case when types of array elements are different. In previous versions the functions `arrayHasAny`, `arrayHasAll` returned false and `has`, `indexOf`, `countEqual` thrown exception. Also add support for `Decimal` and big integer types in functions `has` and similar. This closes [#20272](https://github.com/ClickHouse/ClickHouse/issues/20272). [#23044](https://github.com/ClickHouse/ClickHouse/pull/23044) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Raised the threshold on max number of matches in result of the function `extractAllGroupsHorizontal`. [#23036](https://github.com/ClickHouse/ClickHouse/pull/23036) ([Vasily Nemkov](https://github.com/Enmk)). -* Do not perform `optimize_skip_unused_shards` for cluster with one node. [#22999](https://github.com/ClickHouse/ClickHouse/pull/22999) ([Azat Khuzhin](https://github.com/azat)). -* Added ability to run clickhouse-keeper (experimental drop-in replacement to ZooKeeper) with SSL. Config settings `keeper_server.tcp_port_secure` can be used for secure interaction between client and keeper-server. `keeper_server.raft_configuration.secure` can be used to enable internal secure communication between nodes. [#22992](https://github.com/ClickHouse/ClickHouse/pull/22992) ([alesapin](https://github.com/alesapin)). -* Added ability to flush buffer only in background for `Buffer` tables. [#22986](https://github.com/ClickHouse/ClickHouse/pull/22986) ([Azat Khuzhin](https://github.com/azat)). -* When selecting from MergeTree table with NULL in WHERE condition, in rare cases, exception was thrown. This closes [#20019](https://github.com/ClickHouse/ClickHouse/issues/20019). [#22978](https://github.com/ClickHouse/ClickHouse/pull/22978) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix error handling in Poco HTTP Client for AWS. [#22973](https://github.com/ClickHouse/ClickHouse/pull/22973) ([kreuzerkrieg](https://github.com/kreuzerkrieg)). -* Respect `max_part_removal_threads` for `ReplicatedMergeTree`. [#22971](https://github.com/ClickHouse/ClickHouse/pull/22971) ([Azat Khuzhin](https://github.com/azat)). -* Fix obscure corner case of MergeTree settings inactive_parts_to_throw_insert = 0 with inactive_parts_to_delay_insert > 0. [#22947](https://github.com/ClickHouse/ClickHouse/pull/22947) ([Azat Khuzhin](https://github.com/azat)). -* `dateDiff` now works with `DateTime64` arguments (even for values outside of `DateTime` range) [#22931](https://github.com/ClickHouse/ClickHouse/pull/22931) ([Vasily Nemkov](https://github.com/Enmk)). -* MaterializeMySQL (experimental feature): added an ability to replicate MySQL databases containing views without failing. This is accomplished by ignoring the views. [#22760](https://github.com/ClickHouse/ClickHouse/pull/22760) ([Christian](https://github.com/cfroystad)). -* Allow RBAC row policy via postgresql protocol. Closes [#22658](https://github.com/ClickHouse/ClickHouse/issues/22658). PostgreSQL protocol is enabled in configuration by default. [#22755](https://github.com/ClickHouse/ClickHouse/pull/22755) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add metric to track how much time is spend during waiting for Buffer layer lock. [#22725](https://github.com/ClickHouse/ClickHouse/pull/22725) ([Azat Khuzhin](https://github.com/azat)). -* Allow to use CTE in VIEW definition. This closes [#22491](https://github.com/ClickHouse/ClickHouse/issues/22491). [#22657](https://github.com/ClickHouse/ClickHouse/pull/22657) ([Amos Bird](https://github.com/amosbird)). -* Clear the rest of the screen and show cursor in `clickhouse-client` if previous program has left garbage in terminal. This closes [#16518](https://github.com/ClickHouse/ClickHouse/issues/16518). [#22634](https://github.com/ClickHouse/ClickHouse/pull/22634) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make `round` function to behave consistently on non-x86_64 platforms. Rounding half to nearest even (Banker's rounding) is used. [#22582](https://github.com/ClickHouse/ClickHouse/pull/22582) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Correctly check structure of blocks of data that are sending by Distributed tables. [#22325](https://github.com/ClickHouse/ClickHouse/pull/22325) ([Azat Khuzhin](https://github.com/azat)). -* Allow publishing Kafka errors to a virtual column of Kafka engine, controlled by the `kafka_handle_error_mode` setting. [#21850](https://github.com/ClickHouse/ClickHouse/pull/21850) ([fastio](https://github.com/fastio)). -* Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes [#21383](https://github.com/ClickHouse/ClickHouse/issues/21383). [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)). -* Add `clickhouse-library-bridge` for library dictionary source. Closes [#9502](https://github.com/ClickHouse/ClickHouse/issues/9502). [#21509](https://github.com/ClickHouse/ClickHouse/pull/21509) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Forbid to drop a column if it's referenced by materialized view. Closes [#21164](https://github.com/ClickHouse/ClickHouse/issues/21164). [#21303](https://github.com/ClickHouse/ClickHouse/pull/21303) ([flynn](https://github.com/ucasFL)). -* Support dynamic interserver credentials (rotating credentials without downtime). [#14113](https://github.com/ClickHouse/ClickHouse/pull/14113) ([johnskopis](https://github.com/johnskopis)). -* Add support for Kafka storage with `Arrow` and `ArrowStream` format messages. [#23415](https://github.com/ClickHouse/ClickHouse/pull/23415) ([Chao Ma](https://github.com/godliness)). -* Fixed missing semicolon in exception message. The user may find this exception message unpleasant to read. [#23208](https://github.com/ClickHouse/ClickHouse/pull/23208) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed missing whitespace in some exception messages about `LowCardinality` type. [#23207](https://github.com/ClickHouse/ClickHouse/pull/23207) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Some values were formatted with alignment in center in table cells in `Markdown` format. Not anymore. [#23096](https://github.com/ClickHouse/ClickHouse/pull/23096) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove non-essential details from suggestions in clickhouse-client. This closes [#22158](https://github.com/ClickHouse/ClickHouse/issues/22158). [#23040](https://github.com/ClickHouse/ClickHouse/pull/23040) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Correct calculation of `bytes_allocated` field in system.dictionaries for sparse_hashed dictionaries. [#22867](https://github.com/ClickHouse/ClickHouse/pull/22867) ([Azat Khuzhin](https://github.com/azat)). -* Fixed approximate total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). -* Fix the case when it was possible to configure dictionary with clickhouse source that was looking to itself that leads to infinite loop. Closes [#14314](https://github.com/ClickHouse/ClickHouse/issues/14314). [#22479](https://github.com/ClickHouse/ClickHouse/pull/22479) ([Maksim Kita](https://github.com/kitaisreal)). - -#### Bug Fix - -* Multiple fixes for hedged requests. Fixed an error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` when the setting `use_hedged_requests` is enabled. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). Fixed a race condition in hedged connections which leads to crash. This fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)). Fix possible crash in case if `unknown packet` was received from remote query (with `async_socket_for_remote` enabled). Fixes [#21167](https://github.com/ClickHouse/ClickHouse/issues/21167). [#23309](https://github.com/ClickHouse/ClickHouse/pull/23309) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed the behavior when disabling `input_format_with_names_use_header ` setting discards all the input with CSVWithNames format. This fixes [#22406](https://github.com/ClickHouse/ClickHouse/issues/22406). [#23202](https://github.com/ClickHouse/ClickHouse/pull/23202) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fixed remote JDBC bridge timeout connection issue. Closes [#9609](https://github.com/ClickHouse/ClickHouse/issues/9609). [#23771](https://github.com/ClickHouse/ClickHouse/pull/23771) ([Maksim Kita](https://github.com/kitaisreal), [alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix the logic of initial load of `complex_key_hashed` if `update_field` is specified. Closes [#23800](https://github.com/ClickHouse/ClickHouse/issues/23800). [#23824](https://github.com/ClickHouse/ClickHouse/pull/23824) ([Maksim Kita](https://github.com/kitaisreal)). -* Fixed crash when `PREWHERE` and row policy filter are both in effect with empty result. [#23763](https://github.com/ClickHouse/ClickHouse/pull/23763) ([Amos Bird](https://github.com/amosbird)). -* Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). -* Added an exception in case of completely the same values in both samples in aggregate function `mannWhitneyUTest`. This fixes [#23646](https://github.com/ClickHouse/ClickHouse/issues/23646). [#23654](https://github.com/ClickHouse/ClickHouse/pull/23654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fixed server fault when inserting data through HTTP caused an exception. This fixes [#23512](https://github.com/ClickHouse/ClickHouse/issues/23512). [#23643](https://github.com/ClickHouse/ClickHouse/pull/23643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fixed misinterpretation of some `LIKE` expressions with escape sequences. [#23610](https://github.com/ClickHouse/ClickHouse/pull/23610) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed restart / stop command hanging. Closes [#20214](https://github.com/ClickHouse/ClickHouse/issues/20214). [#23552](https://github.com/ClickHouse/ClickHouse/pull/23552) ([filimonov](https://github.com/filimonov)). -* Fixed `COLUMNS` matcher in case of multiple JOINs in select query. Closes [#22736](https://github.com/ClickHouse/ClickHouse/issues/22736). [#23501](https://github.com/ClickHouse/ClickHouse/pull/23501) ([Maksim Kita](https://github.com/kitaisreal)). -* Fixed a crash when modifying column's default value when a column itself is used as `ReplacingMergeTree`'s parameter. [#23483](https://github.com/ClickHouse/ClickHouse/pull/23483) ([hexiaoting](https://github.com/hexiaoting)). -* Fixed corner cases in vertical merges with `ReplacingMergeTree`. In rare cases they could lead to fails of merges with exceptions like `Incomplete granules are not allowed while blocks are granules size`. [#23459](https://github.com/ClickHouse/ClickHouse/pull/23459) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed bug that does not allow cast from empty array literal, to array with dimensions greater than 1, e.g. `CAST([] AS Array(Array(String)))`. Closes [#14476](https://github.com/ClickHouse/ClickHouse/issues/14476). [#23456](https://github.com/ClickHouse/ClickHouse/pull/23456) ([Maksim Kita](https://github.com/kitaisreal)). -* Fixed a bug when `deltaSum` aggregate function produced incorrect result after resetting the counter. [#23437](https://github.com/ClickHouse/ClickHouse/pull/23437) ([Russ Frank](https://github.com/rf)). -* Fixed `Cannot unlink file` error on unsuccessful creation of ReplicatedMergeTree table with multidisk configuration. This closes [#21755](https://github.com/ClickHouse/ClickHouse/issues/21755). [#23433](https://github.com/ClickHouse/ClickHouse/pull/23433) ([tavplubix](https://github.com/tavplubix)). -* Fixed incompatible constant expression generation during partition pruning based on virtual columns. This fixes https://github.com/ClickHouse/ClickHouse/pull/21401#discussion_r611888913. [#23366](https://github.com/ClickHouse/ClickHouse/pull/23366) ([Amos Bird](https://github.com/amosbird)). -* Fixed a crash when setting join_algorithm is set to 'auto' and Join is performed with a Dictionary. Close [#23002](https://github.com/ClickHouse/ClickHouse/issues/23002). [#23312](https://github.com/ClickHouse/ClickHouse/pull/23312) ([Vladimir](https://github.com/vdimir)). -* Don't relax NOT conditions during partition pruning. This fixes [#23305](https://github.com/ClickHouse/ClickHouse/issues/23305) and [#21539](https://github.com/ClickHouse/ClickHouse/issues/21539). [#23310](https://github.com/ClickHouse/ClickHouse/pull/23310) ([Amos Bird](https://github.com/amosbird)). -* Fixed very rare race condition on background cleanup of old blocks. It might cause a block not to be deduplicated if it's too close to the end of deduplication window. [#23301](https://github.com/ClickHouse/ClickHouse/pull/23301) ([tavplubix](https://github.com/tavplubix)). -* Fixed very rare (distributed) race condition between creation and removal of ReplicatedMergeTree tables. It might cause exceptions like `node doesn't exist` on attempt to create replicated table. Fixes [#21419](https://github.com/ClickHouse/ClickHouse/issues/21419). [#23294](https://github.com/ClickHouse/ClickHouse/pull/23294) ([tavplubix](https://github.com/tavplubix)). -* Fixed simple key dictionary from DDL creation if primary key is not first attribute. Fixes [#23236](https://github.com/ClickHouse/ClickHouse/issues/23236). [#23262](https://github.com/ClickHouse/ClickHouse/pull/23262) ([Maksim Kita](https://github.com/kitaisreal)). -* Fixed reading from ODBC when there are many long column names in a table. Closes [#8853](https://github.com/ClickHouse/ClickHouse/issues/8853). [#23215](https://github.com/ClickHouse/ClickHouse/pull/23215) ([Kseniia Sumarokova](https://github.com/kssenii)). -* MaterializeMySQL (experimental feature): fixed `Not found column` error when selecting from `MaterializeMySQL` with condition on key column. Fixes [#22432](https://github.com/ClickHouse/ClickHouse/issues/22432). [#23200](https://github.com/ClickHouse/ClickHouse/pull/23200) ([tavplubix](https://github.com/tavplubix)). -* Correct aliases handling if subquery was optimized to constant. Fixes [#22924](https://github.com/ClickHouse/ClickHouse/issues/22924). Fixes [#10401](https://github.com/ClickHouse/ClickHouse/issues/10401). [#23191](https://github.com/ClickHouse/ClickHouse/pull/23191) ([Maksim Kita](https://github.com/kitaisreal)). -* Server might fail to start if `data_type_default_nullable` setting is enabled in default profile, it's fixed. Fixes [#22573](https://github.com/ClickHouse/ClickHouse/issues/22573). [#23185](https://github.com/ClickHouse/ClickHouse/pull/23185) ([tavplubix](https://github.com/tavplubix)). -* Fixed a crash on shutdown which happened because of wrong accounting of current connections. [#23154](https://github.com/ClickHouse/ClickHouse/pull/23154) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed `Table .inner_id... doesn't exist` error when selecting from Materialized View after detaching it from Atomic database and attaching back. [#23047](https://github.com/ClickHouse/ClickHouse/pull/23047) ([tavplubix](https://github.com/tavplubix)). -* Fix error `Cannot find column in ActionsDAG result` which may happen if subquery uses `untuple`. Fixes [#22290](https://github.com/ClickHouse/ClickHouse/issues/22290). [#22991](https://github.com/ClickHouse/ClickHouse/pull/22991) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix usage of constant columns of type `Map` with nullable values. [#22939](https://github.com/ClickHouse/ClickHouse/pull/22939) ([Anton Popov](https://github.com/CurtizJ)). -* fixed `formatDateTime()` on `DateTime64` and "%C" format specifier fixed `toDateTime64()` for large values and non-zero scale. [#22937](https://github.com/ClickHouse/ClickHouse/pull/22937) ([Vasily Nemkov](https://github.com/Enmk)). -* Fixed a crash when using `mannWhitneyUTest` and `rankCorr` with window functions. This fixes [#22728](https://github.com/ClickHouse/ClickHouse/issues/22728). [#22876](https://github.com/ClickHouse/ClickHouse/pull/22876) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* LIVE VIEW (experimental feature): fixed possible hanging in concurrent DROP/CREATE of TEMPORARY LIVE VIEW in `TemporaryLiveViewCleaner`, [see](https://gist.github.com/vzakaznikov/0c03195960fc86b56bfe2bc73a90019e). [#22858](https://github.com/ClickHouse/ClickHouse/pull/22858) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed pushdown of `HAVING` in case, when filter column is used in aggregation. [#22763](https://github.com/ClickHouse/ClickHouse/pull/22763) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed possible hangs in Zookeeper requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). -* Fixed exception for Log with nested types without columns in the SELECT clause. [#22654](https://github.com/ClickHouse/ClickHouse/pull/22654) ([Azat Khuzhin](https://github.com/azat)). -* Fix unlimited wait for auxiliary AWS requests. [#22594](https://github.com/ClickHouse/ClickHouse/pull/22594) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Fixed a crash when client closes connection very early [#22579](https://github.com/ClickHouse/ClickHouse/issues/22579). [#22591](https://github.com/ClickHouse/ClickHouse/pull/22591) ([nvartolomei](https://github.com/nvartolomei)). -* `Map` data type (experimental feature): fixed an incorrect formatting of function `map` in distributed queries. [#22588](https://github.com/ClickHouse/ClickHouse/pull/22588) ([foolchi](https://github.com/foolchi)). -* Fixed deserialization of empty string without newline at end of TSV format. This closes [#20244](https://github.com/ClickHouse/ClickHouse/issues/20244). Possible workaround without version update: set `input_format_null_as_default` to zero. It was zero in old versions. [#22527](https://github.com/ClickHouse/ClickHouse/pull/22527) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed wrong cast of a column of `LowCardinality` type in Merge Join algorithm. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir](https://github.com/vdimir)). -* Buffer overflow (on read) was possible in `tokenbf_v1` full text index. The excessive bytes are not used but the read operation may lead to crash in rare cases. This closes [#19233](https://github.com/ClickHouse/ClickHouse/issues/19233). [#22421](https://github.com/ClickHouse/ClickHouse/pull/22421) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Do not limit HTTP chunk size. Fixes [#21907](https://github.com/ClickHouse/ClickHouse/issues/21907). [#22322](https://github.com/ClickHouse/ClickHouse/pull/22322) ([Ivan](https://github.com/abyss7)). -* Fixed a bug, which leads to underaggregation of data in case of enabled `optimize_aggregation_in_order` and many parts in table. Slightly improve performance of aggregation with enabled `optimize_aggregation_in_order`. [#21889](https://github.com/ClickHouse/ClickHouse/pull/21889) ([Anton Popov](https://github.com/CurtizJ)). -* Check if table function view is used as a column. This complements #20350. [#21465](https://github.com/ClickHouse/ClickHouse/pull/21465) ([Amos Bird](https://github.com/amosbird)). -* Fix "unknown column" error for tables with `Merge` engine in queris with `JOIN` and aggregation. Closes [#18368](https://github.com/ClickHouse/ClickHouse/issues/18368), close [#22226](https://github.com/ClickHouse/ClickHouse/issues/22226). [#21370](https://github.com/ClickHouse/ClickHouse/pull/21370) ([Vladimir](https://github.com/vdimir)). -* Fixed name clashes in pushdown optimization. It caused incorrect `WHERE` filtration after FULL JOIN. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)). -* Fixed very rare bug when quorum insert with `quorum_parallel=1` is not really "quorum" because of deduplication. [#18215](https://github.com/ClickHouse/ClickHouse/pull/18215) ([filimonov](https://github.com/filimonov) - reported, [alesapin](https://github.com/alesapin) - fixed). - -#### Build/Testing/Packaging Improvement - -* Run stateless tests in parallel in CI. [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)). -* Simplify debian packages. This fixes [#21698](https://github.com/ClickHouse/ClickHouse/issues/21698). [#22976](https://github.com/ClickHouse/ClickHouse/pull/22976) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Added support for ClickHouse build on Apple M1. [#21639](https://github.com/ClickHouse/ClickHouse/pull/21639) ([changvvb](https://github.com/changvvb)). -* Fixed ClickHouse Keeper build for MacOS. [#22860](https://github.com/ClickHouse/ClickHouse/pull/22860) ([alesapin](https://github.com/alesapin)). -* Fixed some tests on AArch64 platform. [#22596](https://github.com/ClickHouse/ClickHouse/pull/22596) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Added function alignment for possibly better performance. [#21431](https://github.com/ClickHouse/ClickHouse/pull/21431) ([Danila Kutenin](https://github.com/danlark1)). -* Adjust some tests to output identical results on amd64 and aarch64 (qemu). The result was depending on implementation specific CPU behaviour. [#22590](https://github.com/ClickHouse/ClickHouse/pull/22590) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow query profiling only on x86_64. See [#15174](https://github.com/ClickHouse/ClickHouse/issues/15174#issuecomment-812954965) and [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638#issuecomment-703805337). This closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#22580](https://github.com/ClickHouse/ClickHouse/pull/22580) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow building with unbundled xz (lzma) using `USE_INTERNAL_XZ_LIBRARY=OFF` CMake option. [#22571](https://github.com/ClickHouse/ClickHouse/pull/22571) ([Kfir Itzhak](https://github.com/mastertheknife)). -* Enable bundled `openldap` on `ppc64le` [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)). -* Disable incompatible libraries (platform specific typically) on `ppc64le` [#22475](https://github.com/ClickHouse/ClickHouse/pull/22475) ([Kfir Itzhak](https://github.com/mastertheknife)). -* Add Jepsen test in CI for clickhouse Keeper. [#22373](https://github.com/ClickHouse/ClickHouse/pull/22373) ([alesapin](https://github.com/alesapin)). -* Build `jemalloc` with support for [heap profiling](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Heap-Profiling). [#22834](https://github.com/ClickHouse/ClickHouse/pull/22834) ([nvartolomei](https://github.com/nvartolomei)). -* Avoid UB in `*Log` engines for rwlock unlock due to unlock from another thread. [#22583](https://github.com/ClickHouse/ClickHouse/pull/22583) ([Azat Khuzhin](https://github.com/azat)). -* Fixed UB by unlocking the rwlock of the TinyLog from the same thread. [#22560](https://github.com/ClickHouse/ClickHouse/pull/22560) ([Azat Khuzhin](https://github.com/azat)). - - -## ClickHouse release 21.4 - -### ClickHouse release 21.4.1 2021-04-12 - -#### Backward Incompatible Change - -* The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: `00:00:00..10:59:59`, `11:00:00..21:59:59` and `22:00:00..23:59:59`. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* `Age` and `Precision` in graphite rollup configs should increase from retention to retention. Now it's checked and the wrong config raises an exception. [#21496](https://github.com/ClickHouse/ClickHouse/pull/21496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)). -* Column `keys` in table `system.dictionaries` was replaced to columns `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)). -* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**: - * `ATTACH PART[ITION]` queries may not work during cluster upgrade. - * It's not possible to rollback to older ClickHouse version after executing `ALTER ... ATTACH` query in new version as the old servers would fail to pass the `ATTACH_PART` entry in the replicated log. -* In this version, empty `` will block all access to remote hosts while in previous versions it did nothing. If you want to keep old behaviour and you have empty `remote_url_allow_hosts` element in configuration file, remove it. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)). - - -#### New Feature - -* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)). Not every time and date functions are working for extended range of dates. -* Added support of Kerberos authentication for preconfigured users and HTTP requests (GSS-SPNEGO). [#14995](https://github.com/ClickHouse/ClickHouse/pull/14995) ([Denis Glazachev](https://github.com/traceon)). -* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)). -* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)). -* Added `executable_pool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)). -* Added table function `dictionary`. It works the same way as `Dictionary` engine. Closes [#21560](https://github.com/ClickHouse/ClickHouse/issues/21560). [#21910](https://github.com/ClickHouse/ClickHouse/pull/21910) ([Maksim Kita](https://github.com/kitaisreal)). -* Support `Nullable` type for `PolygonDictionary` attribute. [#21890](https://github.com/ClickHouse/ClickHouse/pull/21890) ([Maksim Kita](https://github.com/kitaisreal)). -* Functions `dictGet`, `dictHas` use current database name if it is not specified for dictionaries created with DDL. Closes [#21632](https://github.com/ClickHouse/ClickHouse/issues/21632). [#21859](https://github.com/ClickHouse/ClickHouse/pull/21859) ([Maksim Kita](https://github.com/kitaisreal)). -* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)). -* Added async update in `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for `Nullable` type in `Cache`, `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for multiple attributes fetch with `dictGet`, `dictGetOrDefault` functions. Fixes [#21517](https://github.com/ClickHouse/ClickHouse/issues/21517). [#20595](https://github.com/ClickHouse/ClickHouse/pull/20595) ([Maksim Kita](https://github.com/kitaisreal)). -* Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)). -* Add function `timezoneOf` that returns the timezone name of `DateTime` or `DateTime64` data types. This does not close [#9959](https://github.com/ClickHouse/ClickHouse/issues/9959). Fix inconsistencies in function names: add aliases `timezone` and `timeZone` as well as `toTimezone` and `toTimeZone` and `timezoneOf` and `timeZoneOf`. [#22001](https://github.com/ClickHouse/ClickHouse/pull/22001) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add new optional clause `GRANTEES` for `CREATE/ALTER USER` commands. It specifies users or roles which are allowed to receive grants from this user on condition this user has also all required access granted with grant option. By default `GRANTEES ANY` is used which means a user with grant option can grant to anyone. Syntax: `CREATE USER ... GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]`. [#21641](https://github.com/ClickHouse/ClickHouse/pull/21641) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add new column `slowdowns_count` to `system.clusters`. When using hedged requests, it shows how many times we switched to another replica because this replica was responding slowly. Also show actual value of `errors_count` in `system.clusters`. [#21480](https://github.com/ClickHouse/ClickHouse/pull/21480) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `_partition_id` virtual column for `MergeTree*` engines. Allow to prune partitions by `_partition_id`. Add `partitionID()` function to calculate partition id string. [#21401](https://github.com/ClickHouse/ClickHouse/pull/21401) ([Amos Bird](https://github.com/amosbird)). -* Add function `isIPAddressInRange` to test if an IPv4 or IPv6 address is contained in a given CIDR network prefix. [#21329](https://github.com/ClickHouse/ClickHouse/pull/21329) ([PHO](https://github.com/depressed-pho)). -* Added new SQL command `ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'`. This command is needed to properly remove 'freezed' partitions from all disks. [#21142](https://github.com/ClickHouse/ClickHouse/pull/21142) ([Pavel Kovalenko](https://github.com/Jokser)). -* Supports implicit key type conversion for JOIN. [#19885](https://github.com/ClickHouse/ClickHouse/pull/19885) ([Vladimir](https://github.com/vdimir)). - -#### Experimental Feature - -* Support `RANGE OFFSET` frame (for window functions) for floating point types. Implement `lagInFrame`/`leadInFrame` window functions, which are analogous to `lag`/`lead`, but respect the window frame. They are identical when the frame is `between unbounded preceding and unbounded following`. This closes [#5485](https://github.com/ClickHouse/ClickHouse/issues/5485). [#21895](https://github.com/ClickHouse/ClickHouse/pull/21895) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Zero-copy replication for `ReplicatedMergeTree` over S3 storage. [#16240](https://github.com/ClickHouse/ClickHouse/pull/16240) ([ianton-ru](https://github.com/ianton-ru)). -* Added possibility to migrate existing S3 disk to the schema with backup-restore capabilities. [#22070](https://github.com/ClickHouse/ClickHouse/pull/22070) ([Pavel Kovalenko](https://github.com/Jokser)). - -#### Performance Improvement - -* Supported parallel formatting in `clickhouse-local` and everywhere else. [#21630](https://github.com/ClickHouse/ClickHouse/pull/21630) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Support parallel parsing for `CSVWithNames` and `TSVWithNames` formats. This closes [#21085](https://github.com/ClickHouse/ClickHouse/issues/21085). [#21149](https://github.com/ClickHouse/ClickHouse/pull/21149) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Enable read with mmap IO for file ranges from 64 MiB (the settings `min_bytes_to_use_mmap_io`). It may lead to moderate performance improvement. [#22326](https://github.com/ClickHouse/ClickHouse/pull/22326) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add cache for files read with `min_bytes_to_use_mmap_io` setting. It makes significant (2x and more) performance improvement when the value of the setting is small by avoiding frequent mmap/munmap calls and the consequent page faults. Note that mmap IO has major drawbacks that makes it less reliable in production (e.g. hung or SIGBUS on faulty disks; less controllable memory usage). Nevertheless it is good in benchmarks. [#22206](https://github.com/ClickHouse/ClickHouse/pull/22206) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Avoid unnecessary data copy when using codec `NONE`. Please note that codec `NONE` is mostly useless - it's recommended to always use compression (`LZ4` is by default). Despite the common belief, disabling compression may not improve performance (the opposite effect is possible). The `NONE` codec is useful in some cases: - when data is uncompressable; - for synthetic benchmarks. [#22145](https://github.com/ClickHouse/ClickHouse/pull/22145) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Faster `GROUP BY` with small `max_rows_to_group_by` and `group_by_overflow_mode='any'`. [#21856](https://github.com/ClickHouse/ClickHouse/pull/21856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Optimize performance of queries like `SELECT ... FINAL ... WHERE`. Now in queries with `FINAL` it's allowed to move to `PREWHERE` columns, which are in sorting key. [#21830](https://github.com/ClickHouse/ClickHouse/pull/21830) ([foolchi](https://github.com/foolchi)). -* Improved performance by replacing `memcpy` to another implementation. This closes [#18583](https://github.com/ClickHouse/ClickHouse/issues/18583). [#21520](https://github.com/ClickHouse/ClickHouse/pull/21520) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance of aggregation in order of sorting key (with enabled setting `optimize_aggregation_in_order`). [#19401](https://github.com/ClickHouse/ClickHouse/pull/19401) ([Anton Popov](https://github.com/CurtizJ)). - -#### Improvement - -* Add connection pool for PostgreSQL table/database engine and dictionary source. Should fix [#21444](https://github.com/ClickHouse/ClickHouse/issues/21444). [#21839](https://github.com/ClickHouse/ClickHouse/pull/21839) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support non-default table schema for postgres storage/table-function. Closes [#21701](https://github.com/ClickHouse/ClickHouse/issues/21701). [#21711](https://github.com/ClickHouse/ClickHouse/pull/21711) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support replicas priority for postgres dictionary source. [#21710](https://github.com/ClickHouse/ClickHouse/pull/21710) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Introduce a new merge tree setting `min_bytes_to_rebalance_partition_over_jbod` which allows assigning new parts to different disks of a JBOD volume in a balanced way. [#16481](https://github.com/ClickHouse/ClickHouse/pull/16481) ([Amos Bird](https://github.com/amosbird)). -* Added `Grant`, `Revoke` and `System` values of `query_kind` column for corresponding queries in `system.query_log`. [#21102](https://github.com/ClickHouse/ClickHouse/pull/21102) ([Vasily Nemkov](https://github.com/Enmk)). -* Allow customizing timeouts for HTTP connections used for replication independently from other HTTP timeouts. [#20088](https://github.com/ClickHouse/ClickHouse/pull/20088) ([nvartolomei](https://github.com/nvartolomei)). -* Better exception message in client in case of exception while server is writing blocks. In previous versions client may get misleading message like `Data compressed with different methods`. [#22427](https://github.com/ClickHouse/ClickHouse/pull/22427) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix error `Directory tmp_fetch_XXX already exists` which could happen after failed fetch part. Delete temporary fetch directory if it already exists. Fixes [#14197](https://github.com/ClickHouse/ClickHouse/issues/14197). [#22411](https://github.com/ClickHouse/ClickHouse/pull/22411) ([nvartolomei](https://github.com/nvartolomei)). -* Fix MSan report for function `range` with `UInt256` argument (support for large integers is experimental). This closes [#22157](https://github.com/ClickHouse/ClickHouse/issues/22157). [#22387](https://github.com/ClickHouse/ClickHouse/pull/22387) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add `current_database` column to `system.processes` table. It contains the current database of the query. [#22365](https://github.com/ClickHouse/ClickHouse/pull/22365) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Add case-insensitive history search/navigation and subword movement features to `clickhouse-client`. [#22105](https://github.com/ClickHouse/ClickHouse/pull/22105) ([Amos Bird](https://github.com/amosbird)). -* If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Update used version of simdjson to 0.9.1. This fixes [#21984](https://github.com/ClickHouse/ClickHouse/issues/21984). [#22057](https://github.com/ClickHouse/ClickHouse/pull/22057) ([Vitaly Baranov](https://github.com/vitlibar)). -* Added case insensitive aliases for `CONNECTION_ID()` and `VERSION()` functions. This fixes [#22028](https://github.com/ClickHouse/ClickHouse/issues/22028). [#22042](https://github.com/ClickHouse/ClickHouse/pull/22042) ([Eugene Klimov](https://github.com/Slach)). -* Add option `strict_increase` to `windowFunnel` function to calculate each event once (resolve [#21835](https://github.com/ClickHouse/ClickHouse/issues/21835)). [#22025](https://github.com/ClickHouse/ClickHouse/pull/22025) ([Vladimir](https://github.com/vdimir)). -* If partition key of a `MergeTree` table does not include `Date` or `DateTime` columns but includes exactly one `DateTime64` column, expose its values in the `min_time` and `max_time` columns in `system.parts` and `system.parts_columns` tables. Add `min_time` and `max_time` columns to `system.parts_columns` table (these was inconsistency to the `system.parts` table). This closes [#18244](https://github.com/ClickHouse/ClickHouse/issues/18244). [#22011](https://github.com/ClickHouse/ClickHouse/pull/22011) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Supported `replication_alter_partitions_sync=1` setting in `clickhouse-copier` for moving partitions from helping table to destination. Decreased default timeouts. Fixes [#21911](https://github.com/ClickHouse/ClickHouse/issues/21911). [#21912](https://github.com/ClickHouse/ClickHouse/pull/21912) ([turbo jason](https://github.com/songenjie)). -* Show path to data directory of `EmbeddedRocksDB` tables in system tables. [#21903](https://github.com/ClickHouse/ClickHouse/pull/21903) ([tavplubix](https://github.com/tavplubix)). -* Add profile event `HedgedRequestsChangeReplica`, change read data timeout from sec to ms. [#21886](https://github.com/ClickHouse/ClickHouse/pull/21886) ([Kruglov Pavel](https://github.com/Avogar)). -* DiskS3 (experimental feature under development). Fixed bug with the impossibility to move directory if the destination is not empty and cache disk is used. [#21837](https://github.com/ClickHouse/ClickHouse/pull/21837) ([Pavel Kovalenko](https://github.com/Jokser)). -* Better formatting for `Array` and `Map` data types in Web UI. [#21798](https://github.com/ClickHouse/ClickHouse/pull/21798) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Update clusters only if their configurations were updated. [#21685](https://github.com/ClickHouse/ClickHouse/pull/21685) ([Kruglov Pavel](https://github.com/Avogar)). -* Propagate query and session settings for distributed DDL queries. Set `distributed_ddl_entry_format_version` to 2 to enable this. Added `distributed_ddl_output_mode` setting. Supported modes: `none`, `throw` (default), `null_status_on_timeout` and `never_throw`. Miscellaneous fixes and improvements for `Replicated` database engine. [#21535](https://github.com/ClickHouse/ClickHouse/pull/21535) ([tavplubix](https://github.com/tavplubix)). -* If `PODArray` was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add `last_error_time`/`last_error_message`/`last_error_stacktrace`/`remote` columns for `system.errors`. [#21529](https://github.com/ClickHouse/ClickHouse/pull/21529) ([Azat Khuzhin](https://github.com/azat)). -* Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes #21383. [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)). -* Add setting `optimize_skip_unused_shards_limit` to limit the number of sharding key values for `optimize_skip_unused_shards`. [#21512](https://github.com/ClickHouse/ClickHouse/pull/21512) ([Azat Khuzhin](https://github.com/azat)). -* Improve `clickhouse-format` to not throw exception when there are extra spaces or comment after the last query, and throw exception early with readable message when format `ASTInsertQuery` with data . [#21311](https://github.com/ClickHouse/ClickHouse/pull/21311) ([flynn](https://github.com/ucasFL)). -* Improve support of integer keys in data type `Map`. [#21157](https://github.com/ClickHouse/ClickHouse/pull/21157) ([Anton Popov](https://github.com/CurtizJ)). -* MaterializeMySQL: attempt to reconnect to MySQL if the connection is lost. [#20961](https://github.com/ClickHouse/ClickHouse/pull/20961) ([Håvard Kvålen](https://github.com/havardk)). -* Support more cases to rewrite `CROSS JOIN` to `INNER JOIN`. [#20392](https://github.com/ClickHouse/ClickHouse/pull/20392) ([Vladimir](https://github.com/vdimir)). -* Do not create empty parts on INSERT when `optimize_on_insert` setting enabled. Fixes [#20304](https://github.com/ClickHouse/ClickHouse/issues/20304). [#20387](https://github.com/ClickHouse/ClickHouse/pull/20387) ([Kruglov Pavel](https://github.com/Avogar)). -* `MaterializeMySQL`: add minmax skipping index for `_version` column. [#20382](https://github.com/ClickHouse/ClickHouse/pull/20382) ([Stig Bakken](https://github.com/stigsb)). -* Add option `--backslash` for `clickhouse-format`, which can add a backslash at the end of each line of the formatted query. [#21494](https://github.com/ClickHouse/ClickHouse/pull/21494) ([flynn](https://github.com/ucasFL)). -* Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). - -#### Bug Fix - -* Remove socket from epoll before cancelling packet receiver in `HedgedConnections` to prevent possible race. Fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)). -* Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix exception which may happen when `SELECT` has constant `WHERE` condition and source table has columns which names are digits. [#22270](https://github.com/ClickHouse/ClickHouse/pull/22270) ([LiuNeng](https://github.com/liuneng1994)). -* Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)). -* Fix uncaught exception in `InterserverIOHTTPHandler`. [#22146](https://github.com/ClickHouse/ClickHouse/pull/22146) ([Azat Khuzhin](https://github.com/azat)). -* Fix docker entrypoint in case `http_port` is not in the config. [#22132](https://github.com/ClickHouse/ClickHouse/pull/22132) ([Ewout](https://github.com/devwout)). -* Fix error `Invalid number of rows in Chunk` in `JOIN` with `TOTALS` and `arrayJoin`. Closes [#19303](https://github.com/ClickHouse/ClickHouse/issues/19303). [#22129](https://github.com/ClickHouse/ClickHouse/pull/22129) ([Vladimir](https://github.com/vdimir)). -* Fix the background thread pool name which used to poll message from Kafka. The Kafka engine with the broken thread pool will not consume the message from message queue. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)). -* Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). -* Disable `async_socket_for_remote`/`use_hedged_requests` for buggy Linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)). -* Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). -* The function `decrypt` was lacking a check for the minimal size of data encrypted in `AEAD` mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)). -* Prevent hedged connections overlaps (`Unknown packet 9 from server` error). [#21941](https://github.com/ClickHouse/ClickHouse/pull/21941) ([Azat Khuzhin](https://github.com/azat)). -* Fix reading the HTTP POST request with "multipart/form-data" content type in some cases. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)). -* Fix wrong `ORDER BY` results when a query contains window functions, and optimization for reading in primary key order is applied. Fixes [#21828](https://github.com/ClickHouse/ClickHouse/issues/21828). [#21915](https://github.com/ClickHouse/ClickHouse/pull/21915) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix incorrect query result (and possible crash) which could happen when `WHERE` or `HAVING` condition is pushed before `GROUP BY`. Fixes [#21773](https://github.com/ClickHouse/ClickHouse/issues/21773). [#21841](https://github.com/ClickHouse/ClickHouse/pull/21841) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Better error handling and logging in `WriteBufferFromS3`. [#21836](https://github.com/ClickHouse/ClickHouse/pull/21836) ([Pavel Kovalenko](https://github.com/Jokser)). -* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. This is a follow-up fix of [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) . Can only reproduced in production env. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)). -* Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896). [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)). -* Fix bug for `ReplicatedMerge` table engines when `ALTER MODIFY COLUMN` query doesn't change the type of `Decimal` column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). -* Fix possible infinite waiting when concurrent `OPTIMIZE` and `DROP` are run for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)). -* Fix function `arrayElement` with type `Map` for constant integer arguments. [#21699](https://github.com/ClickHouse/ClickHouse/pull/21699) ([Anton Popov](https://github.com/CurtizJ)). -* Fix SIGSEGV on not existing attributes from `ip_trie` with `access_to_key_from_attributes`. [#21692](https://github.com/ClickHouse/ClickHouse/pull/21692) ([Azat Khuzhin](https://github.com/azat)). -* Server now start accepting connections only after `DDLWorker` and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)). -* Add type conversion for keys of tables of type `Join` (previously led to SIGSEGV). [#21646](https://github.com/ClickHouse/ClickHouse/pull/21646) ([Azat Khuzhin](https://github.com/azat)). -* Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)). -* Fix `fsync_part_directory` for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)). -* Remove unknown columns from joined table in `WHERE` for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir](https://github.com/vdimir)). -* `std::terminate` was called if there is an error writing data into s3. [#21624](https://github.com/ClickHouse/ClickHouse/pull/21624) ([Vladimir](https://github.com/vdimir)). -* Fix possible error `Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)). -* In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). -* Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasFL)). -* Fix that S3 table holds old credentials after config update. [#21457](https://github.com/ClickHouse/ClickHouse/pull/21457) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). -* Fixed race on SSL object inside `SecureSocket` in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix `Avro` format parsing for `Kafka`. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)). -* Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)). -* `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([tavplubix](https://github.com/tavplubix)). -* Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)). - -#### Build/Testing/Packaging Improvement - -* Add [Jepsen](https://github.com/jepsen-io/jepsen) tests for ClickHouse Keeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)). -* Run stateless tests in parallel in CI. Depends on [#22181](https://github.com/ClickHouse/ClickHouse/issues/22181). [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)). -* Enable status check for [SQLancer](https://github.com/sqlancer/sqlancer) CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)). -* Multiple preparations for PowerPC builds: Enable the bundled openldap on `ppc64le`. [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)). Enable compiling on `ppc64le` with Clang. [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix compiling boost on `ppc64le`. [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix CMake error about internal CMake variable `CMAKE_ASM_COMPILE_OBJECT` not set on `ppc64le`. [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix Fedora/RHEL/CentOS not finding `libclang_rt.builtins` on `ppc64le`. [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)). Enable building with `jemalloc` on `ppc64le`. [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix ClickHouse's config embedding and cctz's timezone embedding on `ppc64le`. [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)). Fixed compiling on `ppc64le` and use the correct instruction pointer register on `ppc64le`. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)). -* Re-enable the S3 (AWS) library on `aarch64`. [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)). -* Add `tzdata` to Docker containers because reading `ORC` formats requires it. This closes [#14156](https://github.com/ClickHouse/ClickHouse/issues/14156). [#22000](https://github.com/ClickHouse/ClickHouse/pull/22000) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Introduce 2 arguments for `clickhouse-server` image Dockerfile: `deb_location` & `single_binary_location`. [#21977](https://github.com/ClickHouse/ClickHouse/pull/21977) ([filimonov](https://github.com/filimonov)). -* Allow to use clang-tidy with release builds by enabling assertions if it is used. [#21914](https://github.com/ClickHouse/ClickHouse/pull/21914) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add llvm-12 binaries name to search in cmake scripts. Implicit constants conversions to mute clang warnings. Updated submodules to build with CMake 3.19. Mute recursion in macro expansion in `readpassphrase` library. Deprecated `-fuse-ld` changed to `--ld-path` for clang. [#21597](https://github.com/ClickHouse/ClickHouse/pull/21597) ([Ilya Yatsishin](https://github.com/qoega)). -* Updating `docker/test/testflows/runner/dockerd-entrypoint.sh` to use Yandex dockerhub-proxy, because Docker Hub has enabled very restrictive rate limits [#21551](https://github.com/ClickHouse/ClickHouse/pull/21551) ([vzakaznikov](https://github.com/vzakaznikov)). -* Fix macOS shared lib build. [#20184](https://github.com/ClickHouse/ClickHouse/pull/20184) ([nvartolomei](https://github.com/nvartolomei)). -* Add `ctime` option to `zookeeper-dump-tree`. It allows to dump node creation time. [#21842](https://github.com/ClickHouse/ClickHouse/pull/21842) ([Ilya](https://github.com/HumanUser)). - - -## ClickHouse release 21.3 (LTS) - -### ClickHouse release v21.3, 2021-03-12 - -#### Backward Incompatible Change - -* Now it's not allowed to create MergeTree tables in old syntax with table TTL because it's just ignored. Attach of old tables is still possible. [#20282](https://github.com/ClickHouse/ClickHouse/pull/20282) ([alesapin](https://github.com/alesapin)). -* Now all case-insensitive function names will be rewritten to their canonical representations. This is needed for projection query routing (the upcoming feature). [#20174](https://github.com/ClickHouse/ClickHouse/pull/20174) ([Amos Bird](https://github.com/amosbird)). -* Fix creation of `TTL` in cases, when its expression is a function and it is the same as `ORDER BY` key. Now it's allowed to set custom aggregation to primary key columns in `TTL` with `GROUP BY`. Backward incompatible: For primary key columns, which are not in `GROUP BY` and aren't set explicitly now is applied function `any` instead of `max`, when TTL is expired. Also if you use TTL with `WHERE` or `GROUP BY` you can see exceptions at merges, while making rolling update. [#15450](https://github.com/ClickHouse/ClickHouse/pull/15450) ([Anton Popov](https://github.com/CurtizJ)). - -#### New Feature - -* Add file engine settings: `engine_file_empty_if_not_exists` and `engine_file_truncate_on_insert`. [#20620](https://github.com/ClickHouse/ClickHouse/pull/20620) ([M0r64n](https://github.com/M0r64n)). -* Add aggregate function `deltaSum` for summing the differences between consecutive rows. [#20057](https://github.com/ClickHouse/ClickHouse/pull/20057) ([Russ Frank](https://github.com/rf)). -* New `event_time_microseconds` column in `system.part_log` table. [#20027](https://github.com/ClickHouse/ClickHouse/pull/20027) ([Bharat Nallan](https://github.com/bharatnc)). -* Added `timezoneOffset(datetime)` function which will give the offset from UTC in seconds. This close [#issue:19850](https://github.com/ClickHouse/ClickHouse/issues/19850). [#19962](https://github.com/ClickHouse/ClickHouse/pull/19962) ([keenwolf](https://github.com/keen-wolf)). -* Add setting `insert_shard_id` to support insert data into specific shard from distributed table. [#19961](https://github.com/ClickHouse/ClickHouse/pull/19961) ([flynn](https://github.com/ucasFL)). -* Function `reinterpretAs` updated to support big integers. Fixes [#19691](https://github.com/ClickHouse/ClickHouse/issues/19691). [#19858](https://github.com/ClickHouse/ClickHouse/pull/19858) ([Maksim Kita](https://github.com/kitaisreal)). -* Added Server Side Encryption Customer Keys (the `x-amz-server-side-encryption-customer-(key/md5)` header) support in S3 client. See [the link](https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html). Closes [#19428](https://github.com/ClickHouse/ClickHouse/issues/19428). [#19748](https://github.com/ClickHouse/ClickHouse/pull/19748) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Added `implicit_key` option for `executable` dictionary source. It allows to avoid printing key for every record if records comes in the same order as the input keys. Implements [#14527](https://github.com/ClickHouse/ClickHouse/issues/14527). [#19677](https://github.com/ClickHouse/ClickHouse/pull/19677) ([Maksim Kita](https://github.com/kitaisreal)). -* Add quota type `query_selects` and `query_inserts`. [#19603](https://github.com/ClickHouse/ClickHouse/pull/19603) ([JackyWoo](https://github.com/JackyWoo)). -* Add function `extractTextFromHTML` [#19600](https://github.com/ClickHouse/ClickHouse/pull/19600) ([zlx19950903](https://github.com/zlx19950903)), ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Tables with `MergeTree*` engine now have two new table-level settings for query concurrency control. Setting `max_concurrent_queries` limits the number of concurrently executed queries which are related to this table. Setting `min_marks_to_honor_max_concurrent_queries` tells to apply previous setting only if query reads at least this number of marks. [#19544](https://github.com/ClickHouse/ClickHouse/pull/19544) ([Amos Bird](https://github.com/amosbird)). -* Added `file` function to read file from user_files directory as a String. This is different from the `file` table function. This implements [#issue:18851](https://github.com/ClickHouse/ClickHouse/issues/18851). [#19204](https://github.com/ClickHouse/ClickHouse/pull/19204) ([keenwolf](https://github.com/keen-wolf)). - -#### Experimental feature - -* Add experimental `Replicated` database engine. It replicates DDL queries across multiple hosts. [#16193](https://github.com/ClickHouse/ClickHouse/pull/16193) ([tavplubix](https://github.com/tavplubix)). -* Introduce experimental support for window functions, enabled with `allow_experimental_window_functions = 1`. This is a preliminary, alpha-quality implementation that is not suitable for production use and will change in backward-incompatible ways in future releases. Please see [the documentation](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/sql-reference/window-functions/index.md#experimental-window-functions) for the list of supported features. [#20337](https://github.com/ClickHouse/ClickHouse/pull/20337) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Add the ability to backup/restore metadata files for DiskS3. [#18377](https://github.com/ClickHouse/ClickHouse/pull/18377) ([Pavel Kovalenko](https://github.com/Jokser)). - -#### Performance Improvement - -* Hedged requests for remote queries. When setting `use_hedged_requests` enabled (off by default), allow to establish many connections with different replicas for query. New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout` or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`); other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported. [#19291](https://github.com/ClickHouse/ClickHouse/pull/19291) ([Kruglov Pavel](https://github.com/Avogar)). This allows to significantly reduce tail latencies on very large clusters. -* Added support for `PREWHERE` (and enable the corresponding optimization) when tables have row-level security expressions specified. [#19576](https://github.com/ClickHouse/ClickHouse/pull/19576) ([Denis Glazachev](https://github.com/traceon)). -* The setting `distributed_aggregation_memory_efficient` is enabled by default. It will lower memory usage and improve performance of distributed queries. [#20599](https://github.com/ClickHouse/ClickHouse/pull/20599) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance of GROUP BY multiple fixed size keys. [#20472](https://github.com/ClickHouse/ClickHouse/pull/20472) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance of aggregate functions by more strict aliasing. [#19946](https://github.com/ClickHouse/ClickHouse/pull/19946) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Speed up reading from `Memory` tables in extreme cases (when reading speed is in order of 50 GB/sec) by simplification of pipeline and (consequently) less lock contention in pipeline scheduling. [#20468](https://github.com/ClickHouse/ClickHouse/pull/20468) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Partially reimplement HTTP server to make it making less copies of incoming and outgoing data. It gives up to 1.5 performance improvement on inserting long records over HTTP. [#19516](https://github.com/ClickHouse/ClickHouse/pull/19516) ([Ivan](https://github.com/abyss7)). -* Add `compress` setting for `Memory` tables. If it's enabled the table will use less RAM. On some machines and datasets it can also work faster on SELECT, but it is not always the case. This closes [#20093](https://github.com/ClickHouse/ClickHouse/issues/20093). Note: there are reasons why Memory tables can work slower than MergeTree: (1) lack of compression (2) static size of blocks (3) lack of indices and prewhere... [#20168](https://github.com/ClickHouse/ClickHouse/pull/20168) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Slightly better code in aggregation. [#20978](https://github.com/ClickHouse/ClickHouse/pull/20978) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add back `intDiv`/`modulo` specializations for better performance. This fixes [#21293](https://github.com/ClickHouse/ClickHouse/issues/21293) . The regression was introduced in https://github.com/ClickHouse/ClickHouse/pull/18145 . [#21307](https://github.com/ClickHouse/ClickHouse/pull/21307) ([Amos Bird](https://github.com/amosbird)). -* Do not squash blocks too much on INSERT SELECT if inserting into Memory table. In previous versions inefficient data representation was created in Memory table after INSERT SELECT. This closes [#13052](https://github.com/ClickHouse/ClickHouse/issues/13052). [#20169](https://github.com/ClickHouse/ClickHouse/pull/20169) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix at least one case when DataType parser may have exponential complexity (found by fuzzer). This closes [#20096](https://github.com/ClickHouse/ClickHouse/issues/20096). [#20132](https://github.com/ClickHouse/ClickHouse/pull/20132) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Parallelize SELECT with FINAL for single part with level > 0 when `do_not_merge_across_partitions_select_final` setting is 1. [#19375](https://github.com/ClickHouse/ClickHouse/pull/19375) ([Kruglov Pavel](https://github.com/Avogar)). -* Fill only requested columns when querying `system.parts` and `system.parts_columns`. Closes [#19570](https://github.com/ClickHouse/ClickHouse/issues/19570). [#21035](https://github.com/ClickHouse/ClickHouse/pull/21035) ([Anmol Arora](https://github.com/anmolarora)). -* Perform algebraic optimizations of arithmetic expressions inside `avg` aggregate function. close [#20092](https://github.com/ClickHouse/ClickHouse/issues/20092). [#20183](https://github.com/ClickHouse/ClickHouse/pull/20183) ([flynn](https://github.com/ucasFL)). - -#### Improvement - -* Case-insensitive compression methods for table functions. Also fixed LZMA compression method which was checked in upper case. [#21416](https://github.com/ClickHouse/ClickHouse/pull/21416) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Add two settings to delay or throw error during insertion when there are too many inactive parts. This is useful when server fails to clean up parts quickly enough. [#20178](https://github.com/ClickHouse/ClickHouse/pull/20178) ([Amos Bird](https://github.com/amosbird)). -* Provide better compatibility for mysql clients. 1. mysql jdbc 2. mycli. [#21367](https://github.com/ClickHouse/ClickHouse/pull/21367) ([Amos Bird](https://github.com/amosbird)). -* Forbid to drop a column if it's referenced by materialized view. Closes [#21164](https://github.com/ClickHouse/ClickHouse/issues/21164). [#21303](https://github.com/ClickHouse/ClickHouse/pull/21303) ([flynn](https://github.com/ucasFL)). -* MySQL dictionary source will now retry unexpected connection failures (Lost connection to MySQL server during query) which sometimes happen on SSL/TLS connections. [#21237](https://github.com/ClickHouse/ClickHouse/pull/21237) ([Alexander Kazakov](https://github.com/Akazz)). -* Usability improvement: more consistent `DateTime64` parsing: recognize the case when unix timestamp with subsecond resolution is specified as scaled integer (like `1111111111222` instead of `1111111111.222`). This closes [#13194](https://github.com/ClickHouse/ClickHouse/issues/13194). [#21053](https://github.com/ClickHouse/ClickHouse/pull/21053) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Do only merging of sorted blocks on initiator with distributed_group_by_no_merge. [#20882](https://github.com/ClickHouse/ClickHouse/pull/20882) ([Azat Khuzhin](https://github.com/azat)). -* When loading config for mysql source ClickHouse will now randomize the list of replicas with the same priority to ensure the round-robin logics of picking mysql endpoint. This closes [#20629](https://github.com/ClickHouse/ClickHouse/issues/20629). [#20632](https://github.com/ClickHouse/ClickHouse/pull/20632) ([Alexander Kazakov](https://github.com/Akazz)). -* Function 'reinterpretAs(x, Type)' renamed into 'reinterpret(x, Type)'. [#20611](https://github.com/ClickHouse/ClickHouse/pull/20611) ([Maksim Kita](https://github.com/kitaisreal)). -* Support vhost for RabbitMQ engine [#20576](https://github.com/ClickHouse/ClickHouse/issues/20576). [#20596](https://github.com/ClickHouse/ClickHouse/pull/20596) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Improved serialization for data types combined of Arrays and Tuples. Improved matching enum data types to protobuf enum type. Fixed serialization of the `Map` data type. Omitted values are now set by default. [#20506](https://github.com/ClickHouse/ClickHouse/pull/20506) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed race between execution of distributed DDL tasks and cleanup of DDL queue. Now DDL task cannot be removed from ZooKeeper if there are active workers. Fixes [#20016](https://github.com/ClickHouse/ClickHouse/issues/20016). [#20448](https://github.com/ClickHouse/ClickHouse/pull/20448) ([tavplubix](https://github.com/tavplubix)). -* Make FQDN and other DNS related functions work correctly in alpine images. [#20336](https://github.com/ClickHouse/ClickHouse/pull/20336) ([filimonov](https://github.com/filimonov)). -* Do not allow early constant folding of explicitly forbidden functions. [#20303](https://github.com/ClickHouse/ClickHouse/pull/20303) ([Azat Khuzhin](https://github.com/azat)). -* Implicit conversion from integer to Decimal type might succeeded if integer value doe not fit into Decimal type. Now it throws `ARGUMENT_OUT_OF_BOUND`. [#20232](https://github.com/ClickHouse/ClickHouse/pull/20232) ([tavplubix](https://github.com/tavplubix)). -* Lockless `SYSTEM FLUSH DISTRIBUTED`. [#20215](https://github.com/ClickHouse/ClickHouse/pull/20215) ([Azat Khuzhin](https://github.com/azat)). -* Normalize count(constant), sum(1) to count(). This is needed for projection query routing. [#20175](https://github.com/ClickHouse/ClickHouse/pull/20175) ([Amos Bird](https://github.com/amosbird)). -* Support all native integer types in bitmap functions. [#20171](https://github.com/ClickHouse/ClickHouse/pull/20171) ([Amos Bird](https://github.com/amosbird)). -* Updated `CacheDictionary`, `ComplexCacheDictionary`, `SSDCacheDictionary`, `SSDComplexKeyDictionary` to use LRUHashMap as underlying index. [#20164](https://github.com/ClickHouse/ClickHouse/pull/20164) ([Maksim Kita](https://github.com/kitaisreal)). -* The setting `access_management` is now configurable on startup by providing `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT`, defaults to disabled (`0`) which was the prior value. [#20139](https://github.com/ClickHouse/ClickHouse/pull/20139) ([Marquitos](https://github.com/sonirico)). -* Fix toDateTime64(toDate()/toDateTime()) for DateTime64 - Implement DateTime64 clamping to match DateTime behaviour. [#20131](https://github.com/ClickHouse/ClickHouse/pull/20131) ([Azat Khuzhin](https://github.com/azat)). -* Quota improvements: SHOW TABLES is now considered as one query in the quota calculations, not two queries. SYSTEM queries now consume quota. Fix calculation of interval's end in quota consumption. [#20106](https://github.com/ClickHouse/ClickHouse/pull/20106) ([Vitaly Baranov](https://github.com/vitlibar)). -* Supports `path IN (set)` expressions for `system.zookeeper` table. [#20105](https://github.com/ClickHouse/ClickHouse/pull/20105) ([小路](https://github.com/nicelulu)). -* Show full details of `MaterializeMySQL` tables in `system.tables`. [#20051](https://github.com/ClickHouse/ClickHouse/pull/20051) ([Stig Bakken](https://github.com/stigsb)). -* Fix data race in executable dictionary that was possible only on misuse (when the script returns data ignoring its input). [#20045](https://github.com/ClickHouse/ClickHouse/pull/20045) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* The value of MYSQL_OPT_RECONNECT option can now be controlled by "opt_reconnect" parameter in the config section of mysql replica. [#19998](https://github.com/ClickHouse/ClickHouse/pull/19998) ([Alexander Kazakov](https://github.com/Akazz)). -* If user calls `JSONExtract` function with `Float32` type requested, allow inaccurate conversion to the result type. For example the number `0.1` in JSON is double precision and is not representable in Float32, but the user still wants to get it. Previous versions return 0 for non-Nullable type and NULL for Nullable type to indicate that conversion is imprecise. The logic was 100% correct but it was surprising to users and leading to questions. This closes [#13962](https://github.com/ClickHouse/ClickHouse/issues/13962). [#19960](https://github.com/ClickHouse/ClickHouse/pull/19960) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add conversion of block structure for INSERT into Distributed tables if it does not match. [#19947](https://github.com/ClickHouse/ClickHouse/pull/19947) ([Azat Khuzhin](https://github.com/azat)). -* Improvement for the `system.distributed_ddl_queue` table. Initialize MaxDDLEntryID to the last value after restarting. Before this PR, MaxDDLEntryID will remain zero until a new DDLTask is processed. [#19924](https://github.com/ClickHouse/ClickHouse/pull/19924) ([Amos Bird](https://github.com/amosbird)). -* Show `MaterializeMySQL` tables in `system.parts`. [#19770](https://github.com/ClickHouse/ClickHouse/pull/19770) ([Stig Bakken](https://github.com/stigsb)). -* Add separate config directive for `Buffer` profile. [#19721](https://github.com/ClickHouse/ClickHouse/pull/19721) ([Azat Khuzhin](https://github.com/azat)). -* Move conditions that are not related to JOIN to WHERE clause. [#18720](https://github.com/ClickHouse/ClickHouse/issues/18720). [#19685](https://github.com/ClickHouse/ClickHouse/pull/19685) ([hexiaoting](https://github.com/hexiaoting)). -* Add ability to throttle INSERT into Distributed based on amount of pending bytes for async send (`bytes_to_delay_insert`/`max_delay_to_insert` and `bytes_to_throw_insert` settings for `Distributed` engine has been added). [#19673](https://github.com/ClickHouse/ClickHouse/pull/19673) ([Azat Khuzhin](https://github.com/azat)). -* Fix some rare cases when write errors can be ignored in destructors. [#19451](https://github.com/ClickHouse/ClickHouse/pull/19451) ([Azat Khuzhin](https://github.com/azat)). -* Print inline frames in stack traces for fatal errors. [#19317](https://github.com/ClickHouse/ClickHouse/pull/19317) ([Ivan](https://github.com/abyss7)). - -#### Bug Fix - -* Fix redundant reconnects to ZooKeeper and the possibility of two active sessions for a single clickhouse server. Both problems introduced in #14678. [#21264](https://github.com/ClickHouse/ClickHouse/pull/21264) ([alesapin](https://github.com/alesapin)). -* Fix error `Bad cast from type ... to DB::ColumnLowCardinality` while inserting into table with `LowCardinality` column from `Values` format. Fixes #21140 [#21357](https://github.com/ClickHouse/ClickHouse/pull/21357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix a deadlock in `ALTER DELETE` mutations for non replicated MergeTree table engines when the predicate contains the table itself. Fixes [#20558](https://github.com/ClickHouse/ClickHouse/issues/20558). [#21477](https://github.com/ClickHouse/ClickHouse/pull/21477) ([alesapin](https://github.com/alesapin)). -* Fix SIGSEGV for distributed queries on failures. [#21434](https://github.com/ClickHouse/ClickHouse/pull/21434) ([Azat Khuzhin](https://github.com/azat)). -* Now `ALTER MODIFY COLUMN` queries will correctly affect changes in partition key, skip indices, TTLs, and so on. Fixes [#13675](https://github.com/ClickHouse/ClickHouse/issues/13675). [#21334](https://github.com/ClickHouse/ClickHouse/pull/21334) ([alesapin](https://github.com/alesapin)). -* Fix bug with `join_use_nulls` and joining `TOTALS` from subqueries. This closes [#19362](https://github.com/ClickHouse/ClickHouse/issues/19362) and [#21137](https://github.com/ClickHouse/ClickHouse/issues/21137). [#21248](https://github.com/ClickHouse/ClickHouse/pull/21248) ([vdimir](https://github.com/vdimir)). -* Fix crash in `EXPLAIN` for query with `UNION`. Fixes [#20876](https://github.com/ClickHouse/ClickHouse/issues/20876), [#21170](https://github.com/ClickHouse/ClickHouse/issues/21170). [#21246](https://github.com/ClickHouse/ClickHouse/pull/21246) ([flynn](https://github.com/ucasFL)). -* Now mutations allowed only for table engines that support them (MergeTree family, Memory, MaterializedView). Other engines will report a more clear error. Fixes [#21168](https://github.com/ClickHouse/ClickHouse/issues/21168). [#21183](https://github.com/ClickHouse/ClickHouse/pull/21183) ([alesapin](https://github.com/alesapin)). -* Fixes [#21112](https://github.com/ClickHouse/ClickHouse/issues/21112). Fixed bug that could cause duplicates with insert query (if one of the callbacks came a little too late). [#21138](https://github.com/ClickHouse/ClickHouse/pull/21138) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix `input_format_null_as_default` take effective when types are nullable. This fixes [#21116](https://github.com/ClickHouse/ClickHouse/issues/21116) . [#21121](https://github.com/ClickHouse/ClickHouse/pull/21121) ([Amos Bird](https://github.com/amosbird)). -* fix bug related to cast Tuple to Map. Closes [#21029](https://github.com/ClickHouse/ClickHouse/issues/21029). [#21120](https://github.com/ClickHouse/ClickHouse/pull/21120) ([hexiaoting](https://github.com/hexiaoting)). -* Fix the metadata leak when the Replicated*MergeTree with custom (non default) ZooKeeper cluster is dropped. [#21119](https://github.com/ClickHouse/ClickHouse/pull/21119) ([fastio](https://github.com/fastio)). -* Fix type mismatch issue when using LowCardinality keys in joinGet. This fixes [#21114](https://github.com/ClickHouse/ClickHouse/issues/21114). [#21117](https://github.com/ClickHouse/ClickHouse/pull/21117) ([Amos Bird](https://github.com/amosbird)). -* fix default_replica_path and default_replica_name values are useless on Replicated(*)MergeTree engine when the engine needs specify other parameters. [#21060](https://github.com/ClickHouse/ClickHouse/pull/21060) ([mxzlxy](https://github.com/mxzlxy)). -* Out of bound memory access was possible when formatting specifically crafted out of range value of type `DateTime64`. This closes [#20494](https://github.com/ClickHouse/ClickHouse/issues/20494). This closes [#20543](https://github.com/ClickHouse/ClickHouse/issues/20543). [#21023](https://github.com/ClickHouse/ClickHouse/pull/21023) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Block parallel insertions into storage join. [#21009](https://github.com/ClickHouse/ClickHouse/pull/21009) ([vdimir](https://github.com/vdimir)). -* Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). -* Closes [#9969](https://github.com/ClickHouse/ClickHouse/issues/9969). Fixed Brotli http compression error, which reproduced for large data sizes, slightly complicated structure and with json output format. Update Brotli to the latest version to include the "fix rare access to uninitialized data in ring-buffer". [#20991](https://github.com/ClickHouse/ClickHouse/pull/20991) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix 'Empty task was returned from async task queue' on query cancellation. [#20881](https://github.com/ClickHouse/ClickHouse/pull/20881) ([Azat Khuzhin](https://github.com/azat)). -* `USE database;` query did not work when using MySQL 5.7 client to connect to ClickHouse server, it's fixed. Fixes [#18926](https://github.com/ClickHouse/ClickHouse/issues/18926). [#20878](https://github.com/ClickHouse/ClickHouse/pull/20878) ([tavplubix](https://github.com/tavplubix)). -* Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). -* Fix subquery with union distinct and limit clause. close [#20597](https://github.com/ClickHouse/ClickHouse/issues/20597). [#20610](https://github.com/ClickHouse/ClickHouse/pull/20610) ([flynn](https://github.com/ucasFL)). -* Fixed inconsistent behavior of dictionary in case of queries where we look for absent keys in dictionary. [#20578](https://github.com/ClickHouse/ClickHouse/pull/20578) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix the number of threads for scalar subqueries and subqueries for index (after [#19007](https://github.com/ClickHouse/ClickHouse/issues/19007) single thread was always used). Fixes [#20457](https://github.com/ClickHouse/ClickHouse/issues/20457), [#20512](https://github.com/ClickHouse/ClickHouse/issues/20512). [#20550](https://github.com/ClickHouse/ClickHouse/pull/20550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix crash which could happen if unknown packet was received from remove query (was introduced in [#17868](https://github.com/ClickHouse/ClickHouse/issues/17868)). [#20547](https://github.com/ClickHouse/ClickHouse/pull/20547) ([Azat Khuzhin](https://github.com/azat)). -* Add proper checks while parsing directory names for async INSERT (fixes SIGSEGV). [#20498](https://github.com/ClickHouse/ClickHouse/pull/20498) ([Azat Khuzhin](https://github.com/azat)). -* Fix function `transform` does not work properly for floating point keys. Closes [#20460](https://github.com/ClickHouse/ClickHouse/issues/20460). [#20479](https://github.com/ClickHouse/ClickHouse/pull/20479) ([flynn](https://github.com/ucasFL)). -* Fix infinite loop when propagating WITH aliases to subqueries. This fixes [#20388](https://github.com/ClickHouse/ClickHouse/issues/20388). [#20476](https://github.com/ClickHouse/ClickHouse/pull/20476) ([Amos Bird](https://github.com/amosbird)). -* Fix abnormal server termination when http client goes away. [#20464](https://github.com/ClickHouse/ClickHouse/pull/20464) ([Azat Khuzhin](https://github.com/azat)). -* Fix `LOGICAL_ERROR` for `join_use_nulls=1` when JOIN contains const from SELECT. [#20461](https://github.com/ClickHouse/ClickHouse/pull/20461) ([Azat Khuzhin](https://github.com/azat)). -* Check if table function `view` is used in expression list and throw an error. This fixes [#20342](https://github.com/ClickHouse/ClickHouse/issues/20342). [#20350](https://github.com/ClickHouse/ClickHouse/pull/20350) ([Amos Bird](https://github.com/amosbird)). -* Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). -* Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect result of binary operations between two constant decimals of different scale. Fixes [#20283](https://github.com/ClickHouse/ClickHouse/issues/20283). [#20339](https://github.com/ClickHouse/ClickHouse/pull/20339) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix too often retries of failed background tasks for `ReplicatedMergeTree` table engines family. This could lead to too verbose logging and increased CPU load. Fixes [#20203](https://github.com/ClickHouse/ClickHouse/issues/20203). [#20335](https://github.com/ClickHouse/ClickHouse/pull/20335) ([alesapin](https://github.com/alesapin)). -* Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). -* Fixed the behavior when in case of broken JSON we tried to read the whole file into memory which leads to exception from the allocator. Fixes [#19719](https://github.com/ClickHouse/ClickHouse/issues/19719). [#20286](https://github.com/ClickHouse/ClickHouse/pull/20286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix exception during vertical merge for `MergeTree` table engines family which don't allow to perform vertical merges. Fixes [#20259](https://github.com/ClickHouse/ClickHouse/issues/20259). [#20279](https://github.com/ClickHouse/ClickHouse/pull/20279) ([alesapin](https://github.com/alesapin)). -* Fix rare server crash on config reload during the shutdown. Fixes [#19689](https://github.com/ClickHouse/ClickHouse/issues/19689). [#20224](https://github.com/ClickHouse/ClickHouse/pull/20224) ([alesapin](https://github.com/alesapin)). -* Fix CTE when using in INSERT SELECT. This fixes [#20187](https://github.com/ClickHouse/ClickHouse/issues/20187), fixes [#20195](https://github.com/ClickHouse/ClickHouse/issues/20195). [#20211](https://github.com/ClickHouse/ClickHouse/pull/20211) ([Amos Bird](https://github.com/amosbird)). -* Fixes [#19314](https://github.com/ClickHouse/ClickHouse/issues/19314). [#20156](https://github.com/ClickHouse/ClickHouse/pull/20156) ([Ivan](https://github.com/abyss7)). -* fix toMinute function to handle special timezone correctly. [#20149](https://github.com/ClickHouse/ClickHouse/pull/20149) ([keenwolf](https://github.com/keen-wolf)). -* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). -* The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). -* Bugfix in StorageJoin. [#20079](https://github.com/ClickHouse/ClickHouse/pull/20079) ([vdimir](https://github.com/vdimir)). -* Fix the case when calculating modulo of division of negative number by small divisor, the resulting data type was not large enough to accomodate the negative result. This closes [#20052](https://github.com/ClickHouse/ClickHouse/issues/20052). [#20067](https://github.com/ClickHouse/ClickHouse/pull/20067) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* MaterializeMySQL: Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). -* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). -* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). -* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). -* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)). -* Fix a bug that moving pieces to destination table may failed in case of launching multiple clickhouse-copiers. [#19743](https://github.com/ClickHouse/ClickHouse/pull/19743) ([madianjun](https://github.com/mdianjun)). -* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). - -#### Build/Testing/Packaging Improvement - -* Allow to build ClickHouse with AVX-2 enabled globally. It gives slight performance benefits on modern CPUs. Not recommended for production and will not be supported as official build for now. [#20180](https://github.com/ClickHouse/ClickHouse/pull/20180) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix some of the issues found by Coverity. See [#19964](https://github.com/ClickHouse/ClickHouse/issues/19964). [#20010](https://github.com/ClickHouse/ClickHouse/pull/20010) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow to start up with modified binary under gdb. In previous version if you set up breakpoint in gdb before start, server will refuse to start up due to failed integrity check. [#21258](https://github.com/ClickHouse/ClickHouse/pull/21258) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add a test for different compression methods in Kafka. [#21111](https://github.com/ClickHouse/ClickHouse/pull/21111) ([filimonov](https://github.com/filimonov)). -* Fixed port clash from test_storage_kerberized_hdfs test. [#19974](https://github.com/ClickHouse/ClickHouse/pull/19974) ([Ilya Yatsishin](https://github.com/qoega)). -* Print `stdout` and `stderr` to log when failed to start docker in integration tests. Before this PR there was a very short error message in this case which didn't help to investigate the problems. [#20631](https://github.com/ClickHouse/ClickHouse/pull/20631) ([Vitaly Baranov](https://github.com/vitlibar)). - - -## ClickHouse release 21.2 - -### ClickHouse release v21.2.2.8-stable, 2021-02-07 - -#### Backward Incompatible Change - -* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)). -* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)). -* Fix memory tracking for `OPTIMIZE TABLE`/merges; account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)). -* Disallow floating point column as partition key, see [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421#event-4147046255). [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)). -* Excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. - -#### New Feature - -* Added `PostgreSQL` table engine (both select/insert, with support for multidimensional arrays), also as table function. Added `PostgreSQL` dictionary source. Added `PostgreSQL` database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)). -* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)). -* Adds a new table called `system.distributed_ddl_queue` that displays the queries in the DDL worker queue. [#17656](https://github.com/ClickHouse/ClickHouse/pull/17656) ([Bharat Nallan](https://github.com/bharatnc)). -* Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)). -* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasFL)). -* Add function `decodeXMLComponent` to decode characters for XML. Example: `SELECT decodeXMLComponent('Hello,"world"!')` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)). -* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasFL)). -* Add information about used features (functions, table engines, etc) into system.query_log. [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Function `formatDateTime` support the `%Q` modification to format date to quarter. [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Support MetaKey+Enter hotkey binding in play UI. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)). -* Add three functions for map data type: 1. `mapContains(map, key)` to check weather map.keys include the second parameter key. 2. `mapKeys(map)` return all the keys in Array format 3. `mapValues(map)` return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)). -* Add `log_comment` setting related to [#18494](https://github.com/ClickHouse/ClickHouse/issues/18494). [#18549](https://github.com/ClickHouse/ClickHouse/pull/18549) ([Zijie Lu](https://github.com/TszKitLo40)). -* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)). -* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)). -* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasFL)). - -#### Performance Improvement - -* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Aliases declared in `WITH` statement are properly used in index analysis. Queries like `WITH column AS alias SELECT ... WHERE alias = ...` may use index now. [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896) ([Amos Bird](https://github.com/amosbird)). -* Add `optimize_alias_column_prediction` (on by default), that will: - Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes; - Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count; - Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)). -* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Update libc++ and use another ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Danila Kutenin](https://github.com/danlark1)). -* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasFL)). -* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)). -* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Reduce lock contention for multiple layers of the `Buffer` engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)). -* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -#### Improvement - -* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). -* Set charset to `utf8mb4` when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)). -* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)). -* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)). -* Dictionary: better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)). -* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)). -* Enable function length/empty/notEmpty for datatype Map, which returns keys number in Map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([taiyang-li](https://github.com/taiyang-li)). -* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)). -* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve MySQL compatibility by making more functions case insensitive and adding aliases. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)). -* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)). -* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)). -* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). -* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([tavplubix](https://github.com/tavplubix)). -* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)). -* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)). -* Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender). Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)). -* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)). -* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)). -* Allow CTE (Common Table Expressions) to be further aliased. Propagate CSE (Common Subexpressions Elimination) to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)). -* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)). -* In case of unexpected exceptions automatically restart background thread which is responsible for execution of distributed DDL queries. Fixes [#17991](https://github.com/ClickHouse/ClickHouse/issues/17991). [#18285](https://github.com/ClickHouse/ClickHouse/pull/18285) ([徐炘](https://github.com/weeds085490)). -* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)). -* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). - -#### Bug Fix - -* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). -* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). -* `MaterializeMySQL` (experimental feature): Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). -* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). -* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). -* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). -* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). -* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). -* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([tavplubix](https://github.com/tavplubix)). -* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([vdimir](https://github.com/vdimir)). -* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)). -* Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)). -* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix issue in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). -* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). -* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix minor issue in JOIN: Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). -* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([tavplubix](https://github.com/tavplubix)). -* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). -* Fixed issue [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)). -* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). -* ATTACH PARTITION will reset mutations. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). -* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). -* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). -* Fix data type convert issue for MySQL engine. [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). -* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)). - - -#### Build/Testing/Packaging Improvement - -* Run [SQLancer](https://twitter.com/RiggerManuel/status/1352345625480884228) (logical SQL fuzzer) in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)). -* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add integration tests run with MSan. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)). -* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)). -* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)). -* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)). -* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)). -* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)). -* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)). -* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([alexey-milovidov](https://github.com/alexey-milovidov)). - - -## ClickHouse release 21.1 - -### ClickHouse release v21.1.3.32-stable, 2021-02-03 - -#### Bug Fix - -* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)). -* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)). -* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)). -* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). -* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). -* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)). -* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). -* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). -* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([tavplubix](https://github.com/tavplubix)). -* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). -* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)). -* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). -* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). - - - -### ClickHouse release v21.1.2.15-stable 2021-01-18 - -#### Backward Incompatible Change - -* The setting `input_format_null_as_default` is enabled by default. [#17525](https://github.com/ClickHouse/ClickHouse/pull/17525) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Check settings constraints for profile settings from config. Server will fail to start if users.xml contain settings that do not meet corresponding constraints. [#18486](https://github.com/ClickHouse/ClickHouse/pull/18486) ([tavplubix](https://github.com/tavplubix)). -* Restrict `ALTER MODIFY SETTING` from changing storage settings that affects data parts (`write_final_mark` and `enable_mixed_granularity_parts`). [#18306](https://github.com/ClickHouse/ClickHouse/pull/18306) ([Amos Bird](https://github.com/amosbird)). -* Set `insert_quorum_parallel` to 1 by default. It is significantly more convenient to use than "sequential" quorum inserts. But if you rely to sequential consistency, you should set the setting back to zero. [#17567](https://github.com/ClickHouse/ClickHouse/pull/17567) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove `sumburConsistentHash` function. This closes [#18120](https://github.com/ClickHouse/ClickHouse/issues/18120). [#18656](https://github.com/ClickHouse/ClickHouse/pull/18656) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Removed aggregate functions `timeSeriesGroupSum`, `timeSeriesGroupRateSum` because a friend of mine said they never worked. This fixes [#16869](https://github.com/ClickHouse/ClickHouse/issues/16869). If you have luck using these functions, write a email to clickhouse-feedback@yandex-team.com. [#17423](https://github.com/ClickHouse/ClickHouse/pull/17423) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Prohibit toUnixTimestamp(Date()) (before it just returns UInt16 representation of Date). [#17376](https://github.com/ClickHouse/ClickHouse/pull/17376) ([Azat Khuzhin](https://github.com/azat)). -* Allow using extended integer types (`Int128`, `Int256`, `UInt256`) in `avg` and `avgWeighted` functions. Also allow using different types (integer, decimal, floating point) for value and for weight in `avgWeighted` function. This is a backward-incompatible change: now the `avg` and `avgWeighted` functions always return `Float64` (as documented). Before this change the return type for `Decimal` arguments was also `Decimal`. [#15419](https://github.com/ClickHouse/ClickHouse/pull/15419) ([Mike](https://github.com/myrrc)). -* Expression `toUUID(N)` no longer works. Replace with `toUUID('00000000-0000-0000-0000-000000000000')`. This change is motivated by non-obvious results of `toUUID(N)` where N is non zero. -* SSL Certificates with incorrect "key usage" are rejected. In previous versions they are used to work. See [#19262](https://github.com/ClickHouse/ClickHouse/issues/19262). -* `incl` references to substitutions file (`/etc/metrika.xml`) were removed from the default config (``, ``, ``, ``, ``). If you were using substitutions file and were relying on those implicit references, you should put them back manually and explicitly by adding corresponding sections with `incl="..."` attributes before the update. See [#18740](https://github.com/ClickHouse/ClickHouse/pull/18740) ([alexey-milovidov](https://github.com/alexey-milovidov)). - -#### New Feature - -* Implement gRPC protocol in ClickHouse. [#15111](https://github.com/ClickHouse/ClickHouse/pull/15111) ([Vitaly Baranov](https://github.com/vitlibar)). -* Allow to use multiple zookeeper clusters. [#17070](https://github.com/ClickHouse/ClickHouse/pull/17070) ([fastio](https://github.com/fastio)). -* Implemented `REPLACE TABLE` and `CREATE OR REPLACE TABLE` queries. [#18521](https://github.com/ClickHouse/ClickHouse/pull/18521) ([tavplubix](https://github.com/tavplubix)). -* Implement `UNION DISTINCT` and treat the plain `UNION` clause as `UNION DISTINCT` by default. Add a setting `union_default_mode` that allows to treat it as `UNION ALL` or require explicit mode specification. [#16338](https://github.com/ClickHouse/ClickHouse/pull/16338) ([flynn](https://github.com/ucasFL)). -* Added function `accurateCastOrNull`. This closes [#10290](https://github.com/ClickHouse/ClickHouse/issues/10290). Add type conversions in `x IN (subquery)` expressions. This closes [#10266](https://github.com/ClickHouse/ClickHouse/issues/10266). [#16724](https://github.com/ClickHouse/ClickHouse/pull/16724) ([Maksim Kita](https://github.com/kitaisreal)). -* IP Dictionary supports `IPv4` / `IPv6` types directly. [#17571](https://github.com/ClickHouse/ClickHouse/pull/17571) ([vdimir](https://github.com/vdimir)). -* IP Dictionary supports key fetching. Resolves [#18241](https://github.com/ClickHouse/ClickHouse/issues/18241). [#18480](https://github.com/ClickHouse/ClickHouse/pull/18480) ([vdimir](https://github.com/vdimir)). -* Add `*.zst` compression/decompression support for data import and export. It enables using `*.zst` in `file()` function and `Content-encoding: zstd` in HTTP client. This closes [#16791 ](https://github.com/ClickHouse/ClickHouse/issues/16791). [#17144](https://github.com/ClickHouse/ClickHouse/pull/17144) ([Abi Palagashvili](https://github.com/fibersel)). -* Added `mannWitneyUTest`, `studentTTest` and `welchTTest` aggregate functions. Refactored `rankCorr` a bit. [#16883](https://github.com/ClickHouse/ClickHouse/pull/16883) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add functions `countMatches`/`countMatchesCaseInsensitive`. [#17459](https://github.com/ClickHouse/ClickHouse/pull/17459) ([Azat Khuzhin](https://github.com/azat)). -* Implement `countSubstrings()`/`countSubstringsCaseInsensitive()`/`countSubstringsCaseInsensitiveUTF8()` (Count the number of substring occurrences). [#17347](https://github.com/ClickHouse/ClickHouse/pull/17347) ([Azat Khuzhin](https://github.com/azat)). -* Add information about used databases, tables and columns in system.query_log. Add `query_kind` and `normalized_query_hash` fields. [#17726](https://github.com/ClickHouse/ClickHouse/pull/17726) ([Amos Bird](https://github.com/amosbird)). -* Add a setting `optimize_on_insert`. When enabled, do the same transformation for INSERTed block of data as if merge was done on this block (e.g. Replacing, Collapsing, Aggregating...). This setting is enabled by default. This can influence Materialized View and MaterializeMySQL behaviour (see detailed description). This closes [#10683](https://github.com/ClickHouse/ClickHouse/issues/10683). [#16954](https://github.com/ClickHouse/ClickHouse/pull/16954) ([Kruglov Pavel](https://github.com/Avogar)). -* Kerberos Authenticaiton for HDFS. [#16621](https://github.com/ClickHouse/ClickHouse/pull/16621) ([Ilya Golshtein](https://github.com/ilejn)). -* Support `SHOW SETTINGS` statement to show parameters in system.settings. `SHOW CHANGED SETTINGS` and `LIKE/ILIKE` clause are also supported. [#18056](https://github.com/ClickHouse/ClickHouse/pull/18056) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Function `position` now supports `POSITION(needle IN haystack)` synax for SQL compatibility. This closes [#18701](https://github.com/ClickHouse/ClickHouse/issues/18701). ... [#18779](https://github.com/ClickHouse/ClickHouse/pull/18779) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Now we have a new storage setting `max_partitions_to_read` for tables in the MergeTree family. It limits the max number of partitions that can be accessed in one query. A user setting `force_max_partition_limit` is also added to enforce this constraint. [#18712](https://github.com/ClickHouse/ClickHouse/pull/18712) ([Amos Bird](https://github.com/amosbird)). -* Add `query_id` column to `system.part_log` for inserted parts. Closes [#10097](https://github.com/ClickHouse/ClickHouse/issues/10097). [#18644](https://github.com/ClickHouse/ClickHouse/pull/18644) ([flynn](https://github.com/ucasFL)). -* Allow create table as select with columns specification. Example `CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;`. [#18060](https://github.com/ClickHouse/ClickHouse/pull/18060) ([Maksim Kita](https://github.com/kitaisreal)). -* Added `arrayMin`, `arrayMax`, `arrayAvg` aggregation functions. [#18032](https://github.com/ClickHouse/ClickHouse/pull/18032) ([Maksim Kita](https://github.com/kitaisreal)). -* Implemented `ATTACH TABLE name FROM 'path/to/data/' (col1 Type1, ...` query. It creates new table with provided structure and attaches table data from provided directory in `user_files`. [#17903](https://github.com/ClickHouse/ClickHouse/pull/17903) ([tavplubix](https://github.com/tavplubix)). -* Add mutation support for StorageMemory. This closes [#9117](https://github.com/ClickHouse/ClickHouse/issues/9117). [#15127](https://github.com/ClickHouse/ClickHouse/pull/15127) ([flynn](https://github.com/ucasFL)). -* Support syntax `EXISTS DATABASE name`. [#18458](https://github.com/ClickHouse/ClickHouse/pull/18458) ([Du Chuan](https://github.com/spongedu)). -* Support builtin function `isIPv4String` && `isIPv6String` like [MySQL](https://github.com/ClickHouse/ClickHouse/compare/master...spongedu:support_is_ipv4?expand=1). [#18349](https://github.com/ClickHouse/ClickHouse/pull/18349) ([Du Chuan](https://github.com/spongedu)). -* Add a new setting `insert_distributed_one_random_shard = 1` to allow insertion into multi-sharded distributed table without any distributed key. [#18294](https://github.com/ClickHouse/ClickHouse/pull/18294) ([Amos Bird](https://github.com/amosbird)). -* Add settings `min_compress_block_size` and `max_compress_block_size` to MergeTreeSettings, which have higher priority than the global settings and take effect when they are set. close [13890](https://github.com/ClickHouse/ClickHouse/issues/13890). [#17867](https://github.com/ClickHouse/ClickHouse/pull/17867) ([flynn](https://github.com/ucasFL)). -* Add support for 64bit roaring bitmaps. [#17858](https://github.com/ClickHouse/ClickHouse/pull/17858) ([Andy Yang](https://github.com/andyyzh)). -* Extended `OPTIMIZE ... DEDUPLICATE` syntax to allow explicit (or implicit with asterisk/column transformers) list of columns to check for duplicates on. ... [#17846](https://github.com/ClickHouse/ClickHouse/pull/17846) ([Vasily Nemkov](https://github.com/Enmk)). -* Added functions `toModifiedJulianDay`, `fromModifiedJulianDay`, `toModifiedJulianDayOrNull`, and `fromModifiedJulianDayOrNull`. These functions convert between Proleptic Gregorian calendar date and Modified Julian Day number. [#17750](https://github.com/ClickHouse/ClickHouse/pull/17750) ([PHO](https://github.com/depressed-pho)). -* Add ability to use custom TLD list: added functions `firstSignificantSubdomainCustom`, `cutToFirstSignificantSubdomainCustom`. [#17748](https://github.com/ClickHouse/ClickHouse/pull/17748) ([Azat Khuzhin](https://github.com/azat)). -* Add support for `PROXYv1` protocol to wrap native TCP interface. Allow quotas to be keyed by proxy-forwarded IP address (applied for `PROXYv1` address and for `X-Forwarded-For` from HTTP interface). This is useful when you provide access to ClickHouse only via trusted proxy (e.g. CloudFlare) but want to account user resources by their original IP addresses. This fixes [#17268](https://github.com/ClickHouse/ClickHouse/issues/17268). [#17707](https://github.com/ClickHouse/ClickHouse/pull/17707) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Now clickhouse-client supports opening `EDITOR` to edit commands. `Alt-Shift-E`. [#17665](https://github.com/ClickHouse/ClickHouse/pull/17665) ([Amos Bird](https://github.com/amosbird)). -* Add function `encodeXMLComponent` to escape characters to place string into XML text node or attribute. [#17659](https://github.com/ClickHouse/ClickHouse/pull/17659) ([nauta](https://github.com/nautaa)). -* Introduce `DETACH TABLE/VIEW ... PERMANENTLY` syntax, so that after restarting the table does not reappear back automatically on restart (only by explicit request). The table can still be attached back using the short syntax ATTACH TABLE. Implements [#5555](https://github.com/ClickHouse/ClickHouse/issues/5555). Fixes [#13850](https://github.com/ClickHouse/ClickHouse/issues/13850). [#17642](https://github.com/ClickHouse/ClickHouse/pull/17642) ([filimonov](https://github.com/filimonov)). -* Add asynchronous metrics on total amount of rows, bytes and parts in MergeTree tables. This fix [#11714](https://github.com/ClickHouse/ClickHouse/issues/11714). [#17639](https://github.com/ClickHouse/ClickHouse/pull/17639) ([flynn](https://github.com/ucasFL)). -* Add settings `limit` and `offset` for out-of-SQL pagination: [#16176](https://github.com/ClickHouse/ClickHouse/issues/16176) They are useful for building APIs. These two settings will affect SELECT query as if it is added like `select * from (your_original_select_query) t limit xxx offset xxx;`. [#17633](https://github.com/ClickHouse/ClickHouse/pull/17633) ([hexiaoting](https://github.com/hexiaoting)). -* Provide a new aggregator combinator : `-SimpleState` to build `SimpleAggregateFunction` types via query. It's useful for defining MaterializedView of AggregatingMergeTree engine, and will benefit projections too. [#16853](https://github.com/ClickHouse/ClickHouse/pull/16853) ([Amos Bird](https://github.com/amosbird)). -* Added `queries-file` parameter for `clickhouse-client` and `clickhouse-local`. [#15930](https://github.com/ClickHouse/ClickHouse/pull/15930) ([Maksim Kita](https://github.com/kitaisreal)). -* Added `query` parameter for `clickhouse-benchmark`. [#17832](https://github.com/ClickHouse/ClickHouse/pull/17832) ([Maksim Kita](https://github.com/kitaisreal)). -* `EXPLAIN AST` now support queries other then `SELECT`. [#18136](https://github.com/ClickHouse/ClickHouse/pull/18136) ([taiyang-li](https://github.com/taiyang-li)). +* Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). +* More efficient handling of globs for URL storage. Now you can easily query million URLs in parallel with retries. Closes [#32866](https://github.com/ClickHouse/ClickHouse/issues/32866). [#32907](https://github.com/ClickHouse/ClickHouse/pull/32907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Avoid exponential backtracking in parser. This closes [#20158](https://github.com/ClickHouse/ClickHouse/issues/20158). [#33481](https://github.com/ClickHouse/ClickHouse/pull/33481) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Abuse of `untuple` function was leading to exponential complexity of query analysis (found by fuzzer). This closes [#33297](https://github.com/ClickHouse/ClickHouse/issues/33297). [#33445](https://github.com/ClickHouse/ClickHouse/pull/33445) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Reduce allocated memory for dictionaries with string attributes. [#33466](https://github.com/ClickHouse/ClickHouse/pull/33466) ([Maksim Kita](https://github.com/kitaisreal)). +* Slight performance improvement of `reinterpret` function. [#32587](https://github.com/ClickHouse/ClickHouse/pull/32587) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Non significant change. In extremely rare cases when data part is lost on every replica, after merging of some data parts, the subsequent queries may skip less amount of partitions during partition pruning. This hardly affects anything. [#32220](https://github.com/ClickHouse/ClickHouse/pull/32220) ([Azat Khuzhin](https://github.com/azat)). +* Improve `clickhouse-keeper` writing performance by optimization the size calculation logic. [#32366](https://github.com/ClickHouse/ClickHouse/pull/32366) ([zhanglistar](https://github.com/zhanglistar)). +* Optimize single part projection materialization. This closes [#31669](https://github.com/ClickHouse/ClickHouse/issues/31669). [#31885](https://github.com/ClickHouse/ClickHouse/pull/31885) ([Amos Bird](https://github.com/amosbird)). +* Improve query performance of system tables. [#33312](https://github.com/ClickHouse/ClickHouse/pull/33312) ([OnePiece](https://github.com/zhongyuankai)). +* Optimize selecting of MergeTree parts that can be moved between volumes. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([OnePiece](https://github.com/zhongyuankai)). +* Fix `sparse_hashed` dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). #### Experimental Feature -* Added functions for calculation of minHash and simHash of text n-grams and shingles. They are intended for semi-duplicate search. Also functions `bitHammingDistance` and `tupleHammingDistance` are added. [#7649](https://github.com/ClickHouse/ClickHouse/pull/7649) ([flynn](https://github.com/ucasFL)). -* Add new data type `Map`. See [#1841](https://github.com/ClickHouse/ClickHouse/issues/1841). First version for Map only supports `String` type of key and value. [#15806](https://github.com/ClickHouse/ClickHouse/pull/15806) ([hexiaoting](https://github.com/hexiaoting)). -* Implement alternative SQL parser based on ANTLR4 runtime and generated from EBNF grammar. [#11298](https://github.com/ClickHouse/ClickHouse/pull/11298) ([Ivan](https://github.com/abyss7)). - - -#### Performance Improvement - -* New IP Dictionary implementation with lower memory consumption, improved performance for some cases, and fixed bugs. [#16804](https://github.com/ClickHouse/ClickHouse/pull/16804) ([vdimir](https://github.com/vdimir)). -* Parallel formatting for data export. [#11617](https://github.com/ClickHouse/ClickHouse/pull/11617) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* LDAP integration: Added `verification_cooldown` parameter in LDAP server connection configuration to allow caching of successful "bind" attempts for configurable period of time. [#15988](https://github.com/ClickHouse/ClickHouse/pull/15988) ([Denis Glazachev](https://github.com/traceon)). -* Add `--no-system-table` option for `clickhouse-local` to run without system tables. This avoids initialization of `DateLUT` that may take noticeable amount of time (tens of milliseconds) at startup. [#18899](https://github.com/ClickHouse/ClickHouse/pull/18899) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Replace `PODArray` with `PODArrayWithStackMemory` in `AggregateFunctionWindowFunnelData` to improve `windowFunnel` function performance. [#18817](https://github.com/ClickHouse/ClickHouse/pull/18817) ([flynn](https://github.com/ucasFL)). -* Don't send empty blocks to shards on synchronous INSERT into Distributed table. This closes [#14571](https://github.com/ClickHouse/ClickHouse/issues/14571). [#18775](https://github.com/ClickHouse/ClickHouse/pull/18775) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Optimized read for StorageMemory. [#18052](https://github.com/ClickHouse/ClickHouse/pull/18052) ([Maksim Kita](https://github.com/kitaisreal)). -* Using Dragonbox algorithm for float to string conversion instead of ryu. This improves performance of float to string conversion significantly. [#17831](https://github.com/ClickHouse/ClickHouse/pull/17831) ([Maksim Kita](https://github.com/kitaisreal)). -* Speedup `IPv6CIDRToRange` implementation. [#17569](https://github.com/ClickHouse/ClickHouse/pull/17569) ([vdimir](https://github.com/vdimir)). -* Add `remerge_sort_lowered_memory_bytes_ratio` setting (If memory usage after remerge does not reduced by this ratio, remerge will be disabled). [#17539](https://github.com/ClickHouse/ClickHouse/pull/17539) ([Azat Khuzhin](https://github.com/azat)). -* Improve performance of AggregatingMergeTree with SimpleAggregateFunction(String) in PK. [#17109](https://github.com/ClickHouse/ClickHouse/pull/17109) ([Azat Khuzhin](https://github.com/azat)). -* Now the `-If` combinator is devirtualized, and `count` is properly vectorized. It is for [this PR](https://github.com/ClickHouse/ClickHouse/pull/17041). [#17043](https://github.com/ClickHouse/ClickHouse/pull/17043) ([Amos Bird](https://github.com/amosbird)). -* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). -* Improved performance of function `repeat`. [#16937](https://github.com/ClickHouse/ClickHouse/pull/16937) ([satanson](https://github.com/satanson)). -* Slightly improved performance of float parsing. [#16809](https://github.com/ClickHouse/ClickHouse/pull/16809) ([Maksim Kita](https://github.com/kitaisreal)). -* Add possibility to skip merged partitions for `OPTIMIZE TABLE ... FINAL`. [#15939](https://github.com/ClickHouse/ClickHouse/pull/15939) ([Kruglov Pavel](https://github.com/Avogar)). -* Integrate with [fast_float from Daniel Lemire](https://github.com/lemire/fast_float) to parse floating point numbers. [#16787](https://github.com/ClickHouse/ClickHouse/pull/16787) ([Maksim Kita](https://github.com/kitaisreal)). It is not enabled, because performance its performance is still lower than rough float parser in ClickHouse. -* Fix max_distributed_connections (affects `prefer_localhost_replica = 1` and `max_threads != max_distributed_connections`). [#17848](https://github.com/ClickHouse/ClickHouse/pull/17848) ([Azat Khuzhin](https://github.com/azat)). -* Adaptive choice of single/multi part upload when sending data to S3. Single part upload is controlled by a new setting `max_single_part_upload_size`. [#17934](https://github.com/ClickHouse/ClickHouse/pull/17934) ([Pavel Kovalenko](https://github.com/Jokser)). -* Support for async tasks in `PipelineExecutor`. Initial support of async sockets for remote queries. [#17868](https://github.com/ClickHouse/ClickHouse/pull/17868) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Allow to use `optimize_move_to_prewhere` optimization with compact parts, when sizes of columns are unknown. [#17330](https://github.com/ClickHouse/ClickHouse/pull/17330) ([Anton Popov](https://github.com/CurtizJ)). +* Parallel reading from multiple replicas within a shard during distributed query without using sample key. To enable this, set `allow_experimental_parallel_reading_from_replicas = 1` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). +* Add "TABLE OVERRIDE" feature for customizing MaterializedMySQL table schemas. [#32325](https://github.com/ClickHouse/ClickHouse/pull/32325) ([Stig Bakken](https://github.com/stigsb)). +* Add `EXPLAIN TABLE OVERRIDE` query. [#32836](https://github.com/ClickHouse/ClickHouse/pull/32836) ([Stig Bakken](https://github.com/stigsb)). +* Support TABLE OVERRIDE clause for MaterializedPostgreSQL. RFC: [#31480](https://github.com/ClickHouse/ClickHouse/issues/31480). [#32749](https://github.com/ClickHouse/ClickHouse/pull/32749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change ZooKeeper path for zero-copy marks for shared data. Note that "zero-copy replication" is non-production feature (in early stages of development) that you shouldn't use anyway. But in case if you have used it, let you keep in mind this change. [#32061](https://github.com/ClickHouse/ClickHouse/pull/32061) ([ianton-ru](https://github.com/ianton-ru)). +* Events clause support for WINDOW VIEW watch query. [#32607](https://github.com/ClickHouse/ClickHouse/pull/32607) ([vxider](https://github.com/Vxider)). +* Fix ACL with explicit digit hash in `clickhouse-keeper`: now the behavior consistent with ZooKeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)). [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246). +* Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). #### Improvement -* Avoid deadlock when executing INSERT SELECT into itself from a table with `TinyLog` or `Log` table engines. This closes [#6802](https://github.com/ClickHouse/ClickHouse/issues/6802). This closes [#18691](https://github.com/ClickHouse/ClickHouse/issues/18691). This closes [#16812](https://github.com/ClickHouse/ClickHouse/issues/16812). This closes [#14570](https://github.com/ClickHouse/ClickHouse/issues/14570). [#15260](https://github.com/ClickHouse/ClickHouse/pull/15260) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support `SHOW CREATE VIEW name` syntax like [MySQL](https://dev.mysql.com/doc/refman/5.7/en/show-create-view.html). [#18095](https://github.com/ClickHouse/ClickHouse/pull/18095) ([Du Chuan](https://github.com/spongedu)). -* All queries of type `Decimal * Float` or vice versa are allowed, including aggregate ones (e.g. `SELECT sum(decimal_field * 1.1)` or `SELECT dec_col * float_col`), the result type is Float32 or Float64. [#18145](https://github.com/ClickHouse/ClickHouse/pull/18145) ([Mike](https://github.com/myrrc)). -* Improved minimal Web UI: add history; add sharing support; avoid race condition of different requests; add request in-flight and ready indicators; add favicon; detect Ctrl+Enter if textarea is not in focus. [#17293](https://github.com/ClickHouse/ClickHouse/pull/17293) [#17770](https://github.com/ClickHouse/ClickHouse/pull/17770) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* clickhouse-server didn't send `close` request to ZooKeeper server. [#16837](https://github.com/ClickHouse/ClickHouse/pull/16837) ([alesapin](https://github.com/alesapin)). -* Avoid server abnormal termination in case of too low memory limits (`max_memory_usage = 1` / `max_untracked_memory = 1`). [#17453](https://github.com/ClickHouse/ClickHouse/pull/17453) ([Azat Khuzhin](https://github.com/azat)). -* Fix non-deterministic result of `windowFunnel` function in case of same timestamp for different events. [#18884](https://github.com/ClickHouse/ClickHouse/pull/18884) ([Fuwang Hu](https://github.com/fuwhu)). -* Docker: Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server Docker images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). -* Asynchronous INSERTs to `Distributed` tables: Two new settings (by analogy with MergeTree family) has been added: - `fsync_after_insert` - Do fsync for every inserted. Will decreases performance of inserts. - `fsync_directories` - Do fsync for temporary directory (that is used for async INSERT only) after all operations (writes, renames, etc.). [#18864](https://github.com/ClickHouse/ClickHouse/pull/18864) ([Azat Khuzhin](https://github.com/azat)). -* `SYSTEM KILL` command started to work in Docker. This closes [#18847](https://github.com/ClickHouse/ClickHouse/issues/18847). [#18848](https://github.com/ClickHouse/ClickHouse/pull/18848) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Expand macros in the zk path when executing `FETCH PARTITION`. [#18839](https://github.com/ClickHouse/ClickHouse/pull/18839) ([fastio](https://github.com/fastio)). -* Apply `ALTER TABLE ON CLUSTER MODIFY SETTING ...` to all replicas. Because we don't replicate such alter commands. [#18789](https://github.com/ClickHouse/ClickHouse/pull/18789) ([Amos Bird](https://github.com/amosbird)). -* Allow column transformer `EXCEPT` to accept a string as regular expression matcher. This resolves [#18685](https://github.com/ClickHouse/ClickHouse/issues/18685) . [#18699](https://github.com/ClickHouse/ClickHouse/pull/18699) ([Amos Bird](https://github.com/amosbird)). -* Fix SimpleAggregateFunction in SummingMergeTree. Now it works like AggregateFunction. In previous versions values were summed together regardless to the aggregate function. This fixes [#18564](https://github.com/ClickHouse/ClickHouse/issues/18564) . [#8052](https://github.com/ClickHouse/ClickHouse/issues/8052). [#18637](https://github.com/ClickHouse/ClickHouse/pull/18637) ([Amos Bird](https://github.com/amosbird)). Another fix of using `SimpleAggregateFunction` in `SummingMergeTree`. This fixes [#18676](https://github.com/ClickHouse/ClickHouse/issues/18676) . [#18677](https://github.com/ClickHouse/ClickHouse/pull/18677) ([Amos Bird](https://github.com/amosbird)). -* Fixed assertion error inside allocator in case when last argument of function bar is NaN. Now simple ClickHouse's exception is being thrown. This fixes [#17876](https://github.com/ClickHouse/ClickHouse/issues/17876). [#18520](https://github.com/ClickHouse/ClickHouse/pull/18520) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix usability issue: no newline after exception message in some tools. [#18444](https://github.com/ClickHouse/ClickHouse/pull/18444) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add ability to modify primary and partition key column type from `LowCardinality(Type)` to `Type` and vice versa. Also add an ability to modify primary key column type from `EnumX ` to `IntX` type. Fixes [#5604](https://github.com/ClickHouse/ClickHouse/issues/5604). [#18362](https://github.com/ClickHouse/ClickHouse/pull/18362) ([alesapin](https://github.com/alesapin)). -* Implement `untuple` field access. [#18133](https://github.com/ClickHouse/ClickHouse/issues/18133). [#18309](https://github.com/ClickHouse/ClickHouse/pull/18309) ([hexiaoting](https://github.com/hexiaoting)). -* Allow to parse Array fields from CSV if it is represented as a string containing array that was serialized as nested CSV. Example: `"[""Hello"", ""world"", ""42"""" TV""]"` will parse as `['Hello', 'world', '42" TV']`. Allow to parse array in CSV in a string without enclosing braces. Example: `"'Hello', 'world', '42"" TV'"` will parse as `['Hello', 'world', '42" TV']`. [#18271](https://github.com/ClickHouse/ClickHouse/pull/18271) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make better adaptive granularity calculation for merge tree wide parts. [#18223](https://github.com/ClickHouse/ClickHouse/pull/18223) ([alesapin](https://github.com/alesapin)). -* Now `clickhouse install` could work on Mac. The problem was that there is no procfs on this platform. [#18201](https://github.com/ClickHouse/ClickHouse/pull/18201) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Better hints for `SHOW ...` query syntax. [#18183](https://github.com/ClickHouse/ClickHouse/pull/18183) ([Du Chuan](https://github.com/spongedu)). -* Array aggregation `arrayMin`, `arrayMax`, `arraySum`, `arrayAvg` support for `Int128`, `Int256`, `UInt256`. [#18147](https://github.com/ClickHouse/ClickHouse/pull/18147) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `disk` to Set and Join storage settings. [#18112](https://github.com/ClickHouse/ClickHouse/pull/18112) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). -* Access control: Now table function `merge()` requires current user to have `SELECT` privilege on each table it receives data from. This PR fixes [#16964](https://github.com/ClickHouse/ClickHouse/issues/16964). [#18104](https://github.com/ClickHouse/ClickHouse/pull/18104) [#17983](https://github.com/ClickHouse/ClickHouse/pull/17983) ([Vitaly Baranov](https://github.com/vitlibar)). -* Temporary tables are visible in the system tables `system.tables` and `system.columns` now only in those session where they have been created. The internal database `_temporary_and_external_tables` is now hidden in those system tables; temporary tables are shown as tables with empty database with the `is_temporary` flag set instead. [#18014](https://github.com/ClickHouse/ClickHouse/pull/18014) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix clickhouse-client rendering issue when the size of terminal window changes. [#18009](https://github.com/ClickHouse/ClickHouse/pull/18009) ([Amos Bird](https://github.com/amosbird)). -* Decrease log verbosity of the events when the client drops the connection from Warning to Information. [#18005](https://github.com/ClickHouse/ClickHouse/pull/18005) ([filimonov](https://github.com/filimonov)). -* Forcibly removing empty or bad metadata files from filesystem for DiskS3. S3 is an experimental feature. [#17935](https://github.com/ClickHouse/ClickHouse/pull/17935) ([Pavel Kovalenko](https://github.com/Jokser)). -* Access control: `allow_introspection_functions=0` prohibits usage of introspection functions but doesn't prohibit giving grants for them anymore (the grantee will need to set `allow_introspection_functions=1` for himself to be able to use that grant). Similarly `allow_ddl=0` prohibits usage of DDL commands but doesn't prohibit giving grants for them anymore. [#17908](https://github.com/ClickHouse/ClickHouse/pull/17908) ([Vitaly Baranov](https://github.com/vitlibar)). -* Usability improvement: hints for column names. [#17112](https://github.com/ClickHouse/ClickHouse/issues/17112). [#17857](https://github.com/ClickHouse/ClickHouse/pull/17857) ([fastio](https://github.com/fastio)). -* Add diagnostic information when two merge tables try to read each other's data. [#17854](https://github.com/ClickHouse/ClickHouse/pull/17854) ([徐炘](https://github.com/weeds085490)). -* Let the possibility to override timeout value for running script using the ClickHouse docker image. [#17818](https://github.com/ClickHouse/ClickHouse/pull/17818) ([Guillaume Tassery](https://github.com/YiuRULE)). -* Check system log tables' engine definition grammar to prevent some configuration errors. Notes that this grammar check is not semantical, that means such mistakes as non-existent columns / expression functions would be not found out util the table is created. [#17739](https://github.com/ClickHouse/ClickHouse/pull/17739) ([Du Chuan](https://github.com/spongedu)). -* Removed exception throwing at `RabbitMQ` table initialization if there was no connection (it will be reconnecting in the background). [#17709](https://github.com/ClickHouse/ClickHouse/pull/17709) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Do not ignore server memory limits during Buffer flush. [#17646](https://github.com/ClickHouse/ClickHouse/pull/17646) ([Azat Khuzhin](https://github.com/azat)). -* Switch to patched version of RocksDB (from ClickHouse-Extras) to fix use-after-free error. [#17643](https://github.com/ClickHouse/ClickHouse/pull/17643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Added an offset to exception message for parallel parsing. This fixes [#17457](https://github.com/ClickHouse/ClickHouse/issues/17457). [#17641](https://github.com/ClickHouse/ClickHouse/pull/17641) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Don't throw "Too many parts" error in the middle of INSERT query. [#17566](https://github.com/ClickHouse/ClickHouse/pull/17566) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow query parameters in UPDATE statement of ALTER query. Fixes [#10976](https://github.com/ClickHouse/ClickHouse/issues/10976). [#17563](https://github.com/ClickHouse/ClickHouse/pull/17563) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Query obfuscator: avoid usage of some SQL keywords for identifier names. [#17526](https://github.com/ClickHouse/ClickHouse/pull/17526) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Export current max ddl entry executed by DDLWorker via server metric. It's useful to check if DDLWorker hangs somewhere. [#17464](https://github.com/ClickHouse/ClickHouse/pull/17464) ([Amos Bird](https://github.com/amosbird)). -* Export asynchronous metrics of all servers current threads. It's useful to track down issues like [this](https://github.com/ClickHouse-Extras/poco/pull/28). [#17463](https://github.com/ClickHouse/ClickHouse/pull/17463) ([Amos Bird](https://github.com/amosbird)). -* Include dynamic columns like MATERIALIZED / ALIAS for wildcard query when settings `asterisk_include_materialized_columns` and `asterisk_include_alias_columns` are turned on. [#17462](https://github.com/ClickHouse/ClickHouse/pull/17462) ([Ken Chen](https://github.com/chenziliang)). -* Allow specifying [TTL](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#mergetree-table-ttl) to remove old entries from [system log tables](https://clickhouse.com/docs/en/operations/system-tables/), using the `` attribute in `config.xml`. [#17438](https://github.com/ClickHouse/ClickHouse/pull/17438) ([Du Chuan](https://github.com/spongedu)). -* Now queries coming to the server via MySQL and PostgreSQL protocols have distinctive interface types (which can be seen in the `interface` column of the table`system.query_log`): `4` for MySQL, and `5` for PostgreSQL, instead of formerly used `1` which is now used for the native protocol only. [#17437](https://github.com/ClickHouse/ClickHouse/pull/17437) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix parsing of SETTINGS clause of the `INSERT ... SELECT ... SETTINGS` query. [#17414](https://github.com/ClickHouse/ClickHouse/pull/17414) ([Azat Khuzhin](https://github.com/azat)). -* Correctly account memory in RadixSort. [#17412](https://github.com/ClickHouse/ClickHouse/pull/17412) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add eof check in `receiveHello` in server to prevent getting `Attempt to read after eof` exception. [#17365](https://github.com/ClickHouse/ClickHouse/pull/17365) ([Kruglov Pavel](https://github.com/Avogar)). -* Avoid possible stack overflow in bigint conversion. Big integers are experimental. [#17269](https://github.com/ClickHouse/ClickHouse/pull/17269) ([flynn](https://github.com/ucasFL)). -* Now `set` indices will work with `GLOBAL IN`. This fixes [#17232](https://github.com/ClickHouse/ClickHouse/issues/17232) , [#5576](https://github.com/ClickHouse/ClickHouse/issues/5576) . [#17253](https://github.com/ClickHouse/ClickHouse/pull/17253) ([Amos Bird](https://github.com/amosbird)). -* Add limit for http redirects in request to S3 storage (`s3_max_redirects`). [#17220](https://github.com/ClickHouse/ClickHouse/pull/17220) ([ianton-ru](https://github.com/ianton-ru)). -* When `-OrNull` combinator combined `-If`, `-Merge`, `-MergeState`, `-State` combinators, we should put `-OrNull` in front. [#16935](https://github.com/ClickHouse/ClickHouse/pull/16935) ([flynn](https://github.com/ucasFL)). -* Support HTTP proxy and HTTPS S3 endpoint configuration. [#16861](https://github.com/ClickHouse/ClickHouse/pull/16861) ([Pavel Kovalenko](https://github.com/Jokser)). -* Added proper authentication using environment, `~/.aws` and `AssumeRole` for S3 client. [#16856](https://github.com/ClickHouse/ClickHouse/pull/16856) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Add more OpenTelemetry spans. Add an example of how to export the span data to Zipkin. [#16535](https://github.com/ClickHouse/ClickHouse/pull/16535) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Cache dictionaries: Completely eliminate callbacks and locks for acquiring them. Keys are not divided into "not found" and "expired", but stored in the same map during query. [#14958](https://github.com/ClickHouse/ClickHouse/pull/14958) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix never worked `fsync_part_directory`/`fsync_after_insert`/`in_memory_parts_insert_sync` (experimental feature). [#18845](https://github.com/ClickHouse/ClickHouse/pull/18845) ([Azat Khuzhin](https://github.com/azat)). -* Allow using `Atomic` engine for nested database of `MaterializeMySQL` engine. [#14849](https://github.com/ClickHouse/ClickHouse/pull/14849) ([tavplubix](https://github.com/tavplubix)). - - -#### Bug Fix - -* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) (Amos Bird, [alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). -* Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)). -* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). -* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message ` MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix use after free bug in `rocksdb` library. [#18862](https://github.com/ClickHouse/ClickHouse/pull/18862) ([sundyli](https://github.com/sundy-li)). -* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). -* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). -* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). -* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Join tries to materialize const columns, but our code wants them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). -* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). -* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). -* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). -* Fix possible hang at shutdown in `clickhouse-local`. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Queries for external databases (MySQL, ODBC, JDBC) were incorrectly rewritten if there was an expression in form of `x IN table`. This fixes [#9756](https://github.com/ClickHouse/ClickHouse/issues/9756). [#18876](https://github.com/ClickHouse/ClickHouse/pull/18876) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). -* Fix the issue that asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). -* Fix minor issue with logging. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). -* Fix removing of empty parts in `ReplicatedMergeTree` tables, created with old syntax. Fixes [#18582](https://github.com/ClickHouse/ClickHouse/issues/18582). [#18614](https://github.com/ClickHouse/ClickHouse/pull/18614) ([Anton Popov](https://github.com/CurtizJ)). -* Fix previous bug when date overflow with different values. Strict Date value limit to "2106-02-07", cast date > "2106-02-07" to value 0. [#18565](https://github.com/ClickHouse/ClickHouse/pull/18565) ([hexiaoting](https://github.com/hexiaoting)). -* Add FixedString data type support for replication from MySQL. Replication from MySQL is an experimental feature. This patch fixes [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)). -* Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). -* Proper support for 12AM in `parseDateTimeBestEffort` function. This fixes [#18402](https://github.com/ClickHouse/ClickHouse/issues/18402). [#18449](https://github.com/ClickHouse/ClickHouse/pull/18449) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). -* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). -* Fix the unexpected behaviour of `SHOW TABLES`. [#18431](https://github.com/ClickHouse/ClickHouse/pull/18431) ([fastio](https://github.com/fastio)). -* Fix -SimpleState combinator generates incompatible arugment type and return type. [#18404](https://github.com/ClickHouse/ClickHouse/pull/18404) ([Amos Bird](https://github.com/amosbird)). -* Fix possible race condition in concurrent usage of `Set` or `Join` tables and selects from `system.tables`. [#18385](https://github.com/ClickHouse/ClickHouse/pull/18385) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes of the odbc-bridge process; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)). -* Access control: `SELECT count() FROM table` now can be executed if the user has access to at least single column from a table. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). -* Access control: `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)). -* Replication from MySQL (experimental feature). Fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) Fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) Fix unique key convert issue in MaterializeMySQL database engine. [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)). -* Fix inconsistency for queries with both `WITH FILL` and `WITH TIES` [#17466](https://github.com/ClickHouse/ClickHouse/issues/17466). [#18188](https://github.com/ClickHouse/ClickHouse/pull/18188) ([hexiaoting](https://github.com/hexiaoting)). -* Fix inserting a row with default value in case of parsing error in the last column. Fixes [#17712](https://github.com/ClickHouse/ClickHouse/issues/17712). [#18182](https://github.com/ClickHouse/ClickHouse/pull/18182) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Fix `Unknown setting profile` error on attempt to set settings profile. [#18167](https://github.com/ClickHouse/ClickHouse/pull/18167) ([tavplubix](https://github.com/tavplubix)). -* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). -* Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Fix comparison of `DateTime64` and `Date`. Fixes [#13804](https://github.com/ClickHouse/ClickHouse/issues/13804) and [#11222](https://github.com/ClickHouse/ClickHouse/issues/11222). ... [#18050](https://github.com/ClickHouse/ClickHouse/pull/18050) ([Vasily Nemkov](https://github.com/Enmk)). -* Replication from MySQL (experimental feature): Fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) Fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) support convert MySQL prefix index for MaterializeMySQL. [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)). -* When server log rotation was configured using `logger.size` parameter with numeric value larger than 2^32, the logs were not rotated properly. This is fixed. [#17905](https://github.com/ClickHouse/ClickHouse/pull/17905) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Trivial query optimization was producing wrong result if query contains ARRAY JOIN (so query is actually non trivial). [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)). -* Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)). -* WAL (experimental feature): Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). -* Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed possible segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)). -* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). -* Windows: Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)). -* In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([tavplubix](https://github.com/tavplubix)). -* Fix empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)). -* Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). -* Fix issue with memory accounting when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). -* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). -* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed high CPU usage in background tasks of *MergeTree tables. [#17416](https://github.com/ClickHouse/ClickHouse/pull/17416) ([tavplubix](https://github.com/tavplubix)). -* Fix possible crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Replication from MySQL (experimental feature): Fixes [#16835](https://github.com/ClickHouse/ClickHouse/issues/16835) try fix miss match header with MySQL SHOW statement. [#17366](https://github.com/ClickHouse/ClickHouse/pull/17366) ([Winter Zhang](https://github.com/zhang2014)). -* Fix nondeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). -* Fix possible `Unexpected packet Data received from client` error for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)). -* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246). [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). -* clickhouse-copier: Fix for non-partitioned tables [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). -* Fixed possible not-working mutations for parts stored on S3 disk (experimental feature). [#17227](https://github.com/ClickHouse/ClickHouse/pull/17227) ([Pavel Kovalenko](https://github.com/Jokser)). -* Bug fix for funciton `fuzzBits`, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). -* Fix `optimize_distributed_group_by_sharding_key` for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)). -* Fix queries from `Merge` tables over `Distributed` tables with JOINs. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)). -* Fix order by optimization with monotonic functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)). -* Fix incorrect comparison of types `DateTime64` with different scales. Fixes [#16655](https://github.com/ClickHouse/ClickHouse/issues/16655) ... [#16952](https://github.com/ClickHouse/ClickHouse/pull/16952) ([Vasily Nemkov](https://github.com/Enmk)). -* Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)). -* Minor fix in SHOW ACCESS query. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)). -* Fix the behaviour with enabled `optimize_trivial_count_query` setting with partition predicate. [#16767](https://github.com/ClickHouse/ClickHouse/pull/16767) ([Azat Khuzhin](https://github.com/azat)). -* Return number of affected rows for INSERT queries via MySQL wire protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). -* Fix inconsistent behavior caused by `select_sequential_consistency` for optimized trivial count query and system tables. [#16309](https://github.com/ClickHouse/ClickHouse/pull/16309) ([Hao Chen](https://github.com/haoch)). -* Throw error when `REPLACE` column transformer operates on non existing column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)). -* Throw exception in case of not equi-join ON expression in RIGH|FULL JOIN. [#15162](https://github.com/ClickHouse/ClickHouse/pull/15162) ([Artem Zuikov](https://github.com/4ertus2)). +* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. +* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). +* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). +* Support hints for mistyped setting names for clickhouse-client and clickhouse-local. Closes [#32237](https://github.com/ClickHouse/ClickHouse/issues/32237). [#32841](https://github.com/ClickHouse/ClickHouse/pull/32841) ([凌涛](https://github.com/lingtaolf)). +* Allow to use virtual columns in Materialized Views. Close [#11210](https://github.com/ClickHouse/ClickHouse/issues/11210). [#33482](https://github.com/ClickHouse/ClickHouse/pull/33482) ([OnePiece](https://github.com/zhongyuankai)). +* Add config to disable IPv6 in clickhouse-keeper if needed. This close [#33381](https://github.com/ClickHouse/ClickHouse/issues/33381). [#33450](https://github.com/ClickHouse/ClickHouse/pull/33450) ([Wu Xueyang](https://github.com/wuxueyang96)). +* Add more info to `system.build_options` about current git revision. [#33431](https://github.com/ClickHouse/ClickHouse/pull/33431) ([taiyang-li](https://github.com/taiyang-li)). +* `clickhouse-local`: track memory under `--max_memory_usage_in_client` option. [#33341](https://github.com/ClickHouse/ClickHouse/pull/33341) ([Azat Khuzhin](https://github.com/azat)). +* Allow negative intervals in function `intervalLengthSum`. Their length will be added as well. This closes [#33323](https://github.com/ClickHouse/ClickHouse/issues/33323). [#33335](https://github.com/ClickHouse/ClickHouse/pull/33335) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `LineAsString` can be used as output format. This closes [#30919](https://github.com/ClickHouse/ClickHouse/issues/30919). [#33331](https://github.com/ClickHouse/ClickHouse/pull/33331) ([Sergei Trifonov](https://github.com/serxa)). +* Support `` in cluster configuration, as an alternative form of `1`. Close [#33270](https://github.com/ClickHouse/ClickHouse/issues/33270). [#33330](https://github.com/ClickHouse/ClickHouse/pull/33330) ([SuperDJY](https://github.com/cmsxbc)). +* Pressing Ctrl+C twice will terminate `clickhouse-benchmark` immediately without waiting for in-flight queries. This closes [#32586](https://github.com/ClickHouse/ClickHouse/issues/32586). [#33303](https://github.com/ClickHouse/ClickHouse/pull/33303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support Unix timestamp with milliseconds in `parseDateTimeBestEffort` function. [#33276](https://github.com/ClickHouse/ClickHouse/pull/33276) ([Ben](https://github.com/benbiti)). +* Allow to cancel query while reading data from external table in the formats: `Arrow` / `Parquet` / `ORC` - it failed to be cancelled it case of big files and setting input_format_allow_seeks as false. Closes [#29678](https://github.com/ClickHouse/ClickHouse/issues/29678). [#33238](https://github.com/ClickHouse/ClickHouse/pull/33238) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If table engine supports `SETTINGS` clause, allow to pass the settings as key-value or via config. Add this support for MySQL. [#33231](https://github.com/ClickHouse/ClickHouse/pull/33231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly prevent Nullable primary keys if necessary. This is for [#32780](https://github.com/ClickHouse/ClickHouse/issues/32780). [#33218](https://github.com/ClickHouse/ClickHouse/pull/33218) ([Amos Bird](https://github.com/amosbird)). +* Add retry for `PostgreSQL` connections in case nothing has been fetched yet. Closes [#33199](https://github.com/ClickHouse/ClickHouse/issues/33199). [#33209](https://github.com/ClickHouse/ClickHouse/pull/33209) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095#issuecomment-1000577517). [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Send profile info inside `clickhouse-local`. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Short circuit evaluation: support for function `throwIf`. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `Date32` date type support in dictionaries. Closes [#32913](https://github.com/ClickHouse/ClickHouse/issues/32913). [#32971](https://github.com/ClickHouse/ClickHouse/pull/32971) ([Maksim Kita](https://github.com/kitaisreal)). +* (This only happens in unofficial builds). Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)). +* Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)). +* Flush all In-Memory data parts when WAL is not enabled while shutdown server or detaching table. [#32742](https://github.com/ClickHouse/ClickHouse/pull/32742) ([nauta](https://github.com/nautaa)). +* Allow to control connection timeouts for MySQL (previously was supported only for dictionary source). Closes [#16669](https://github.com/ClickHouse/ClickHouse/issues/16669). Previously default connect_timeout was rather small, now it is configurable. [#32734](https://github.com/ClickHouse/ClickHouse/pull/32734) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support `authSource` option for storage `MongoDB`. Closes [#32594](https://github.com/ClickHouse/ClickHouse/issues/32594). [#32702](https://github.com/ClickHouse/ClickHouse/pull/32702) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support `Date32` type in `genarateRandom` table function. [#32643](https://github.com/ClickHouse/ClickHouse/pull/32643) ([nauta](https://github.com/nautaa)). +* Add settings `max_concurrent_select_queries` and `max_concurrent_insert_queries` for control concurrent queries by query kind. Close [#3575](https://github.com/ClickHouse/ClickHouse/issues/3575). [#32609](https://github.com/ClickHouse/ClickHouse/pull/32609) ([SuperDJY](https://github.com/cmsxbc)). +* Improve handling nested structures with missing columns while reading data in `Protobuf` format. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/31988. [#32531](https://github.com/ClickHouse/ClickHouse/pull/32531) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow empty credentials for `MongoDB` engine. Closes [#26267](https://github.com/ClickHouse/ClickHouse/issues/26267). [#32460](https://github.com/ClickHouse/ClickHouse/pull/32460) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable some optimizations for window functions that may lead to exceptions. Closes [#31535](https://github.com/ClickHouse/ClickHouse/issues/31535). Closes [#31620](https://github.com/ClickHouse/ClickHouse/issues/31620). [#32453](https://github.com/ClickHouse/ClickHouse/pull/32453) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allows to connect to MongoDB 5.0. Closes [#31483](https://github.com/ClickHouse/ClickHouse/issues/31483),. [#32416](https://github.com/ClickHouse/ClickHouse/pull/32416) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Enable comparison between `Decimal` and `Float`. Closes [#22626](https://github.com/ClickHouse/ClickHouse/issues/22626). [#31966](https://github.com/ClickHouse/ClickHouse/pull/31966) ([flynn](https://github.com/ucasFL)). +* Added settings `command_read_timeout`, `command_write_timeout` for `StorageExecutable`, `StorageExecutablePool`, `ExecutableDictionary`, `ExecutablePoolDictionary`, `ExecutableUserDefinedFunctions`. Setting `command_read_timeout` controls timeout for reading data from command stdout in milliseconds. Setting `command_write_timeout` timeout for writing data to command stdin in milliseconds. Added settings `command_termination_timeout` for `ExecutableUserDefinedFunction`, `ExecutableDictionary`, `StorageExecutable`. Added setting `execute_direct` for `ExecutableUserDefinedFunction`, by default true. Added setting `execute_direct` for `ExecutableDictionary`, `ExecutablePoolDictionary`, by default false. [#30957](https://github.com/ClickHouse/ClickHouse/pull/30957) ([Maksim Kita](https://github.com/kitaisreal)). +* Bitmap aggregate functions will give correct result for out of range argument instead of wraparound. [#33127](https://github.com/ClickHouse/ClickHouse/pull/33127) ([DR](https://github.com/freedomDR)). +* Fix parsing incorrect queries with `FROM INFILE` statement. [#33521](https://github.com/ClickHouse/ClickHouse/pull/33521) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't allow to write into `S3` if path contains globs. [#33142](https://github.com/ClickHouse/ClickHouse/pull/33142) ([Kruglov Pavel](https://github.com/Avogar)). +* `--echo` option was not used by `clickhouse-client` in batch mode with single query. [#32843](https://github.com/ClickHouse/ClickHouse/pull/32843) ([N. Kolotov](https://github.com/nkolotov)). +* Use `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix surprisingly bad code in SQL ordinary function `file`. Now it supports symlinks. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Updating `modification_time` for data part in `system.parts` after part movement [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). #### Build/Testing/Packaging Improvement -* Add simple integrity check for ClickHouse binary. It allows to detect corruption due to faulty hardware (bit rot on storage media or bit flips in RAM). [#18811](https://github.com/ClickHouse/ClickHouse/pull/18811) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Change `OpenSSL` to `BoringSSL`. It allows to avoid issues with sanitizers. This fixes [#12490](https://github.com/ClickHouse/ClickHouse/issues/12490). This fixes [#17502](https://github.com/ClickHouse/ClickHouse/issues/17502). This fixes [#12952](https://github.com/ClickHouse/ClickHouse/issues/12952). [#18129](https://github.com/ClickHouse/ClickHouse/pull/18129) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Simplify `Sys/V` init script. It was not working on Ubuntu 12.04 or older. [#17428](https://github.com/ClickHouse/ClickHouse/pull/17428) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Multiple improvements in `./clickhouse install` script. [#17421](https://github.com/ClickHouse/ClickHouse/pull/17421) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Now ClickHouse can pretend to be a fake ZooKeeper. Currently, storage implementation is just stored in-memory hash-table, and server partially support ZooKeeper protocol. [#16877](https://github.com/ClickHouse/ClickHouse/pull/16877) ([alesapin](https://github.com/alesapin)). -* Fix dead list watches removal for TestKeeperStorage (a mock for ZooKeeper). [#18065](https://github.com/ClickHouse/ClickHouse/pull/18065) ([alesapin](https://github.com/alesapin)). -* Add `SYSTEM SUSPEND` command for fault injection. It can be used to faciliate failover tests. This closes [#15979](https://github.com/ClickHouse/ClickHouse/issues/15979). [#18850](https://github.com/ClickHouse/ClickHouse/pull/18850) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Generate build id when ClickHouse is linked with `lld`. It's appeared that `lld` does not generate it by default on my machine. Build id is used for crash reports and introspection. [#18808](https://github.com/ClickHouse/ClickHouse/pull/18808) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix shellcheck errors in style check. [#18566](https://github.com/ClickHouse/ClickHouse/pull/18566) ([Ilya Yatsishin](https://github.com/qoega)). -* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). -* Fix codespell warnings. Split style checks into separate parts. Update style checks docker image. [#18463](https://github.com/ClickHouse/ClickHouse/pull/18463) ([Ilya Yatsishin](https://github.com/qoega)). -* Automated check for leftovers of conflict markers in docs. [#18332](https://github.com/ClickHouse/ClickHouse/pull/18332) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Enable Thread Fuzzer for stateless tests flaky check. [#18299](https://github.com/ClickHouse/ClickHouse/pull/18299) ([alesapin](https://github.com/alesapin)). -* Do not use non thread-safe function `strerror`. [#18204](https://github.com/ClickHouse/ClickHouse/pull/18204) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Update `anchore/scan-action@main` workflow action (was moved from `master` to `main`). [#18192](https://github.com/ClickHouse/ClickHouse/pull/18192) ([Stig Bakken](https://github.com/stigsb)). -* Now `clickhouse-test` does DROP/CREATE databases with a timeout. [#18098](https://github.com/ClickHouse/ClickHouse/pull/18098) ([alesapin](https://github.com/alesapin)). -* Enable experimental support for Pytest framework for stateless tests. [#17902](https://github.com/ClickHouse/ClickHouse/pull/17902) ([Ivan](https://github.com/abyss7)). -* Now we use the fresh docker daemon version in integration tests. [#17671](https://github.com/ClickHouse/ClickHouse/pull/17671) ([alesapin](https://github.com/alesapin)). -* Send info about official build, memory, cpu and free disk space to Sentry if it is enabled. Sentry is opt-in feature to help ClickHouse developers. This closes [#17279](https://github.com/ClickHouse/ClickHouse/issues/17279). [#17543](https://github.com/ClickHouse/ClickHouse/pull/17543) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* There was an uninitialized variable in the code of clickhouse-copier. [#17363](https://github.com/ClickHouse/ClickHouse/pull/17363) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix [one MSan report](https://clickhouse-test-reports.s3.yandex.net/17309/065cd002578f2e8228f12a2744bd40c970065e0c/stress_test_(memory)/stderr.log) from [#17309](https://github.com/ClickHouse/ClickHouse/issues/17309). [#17344](https://github.com/ClickHouse/ClickHouse/pull/17344) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix for the issue with IPv6 in Arrow Flight library. See [the comments](https://github.com/ClickHouse/ClickHouse/pull/16243#issuecomment-720830294) for details. [#16664](https://github.com/ClickHouse/ClickHouse/pull/16664) ([Zhanna](https://github.com/FawnD2)). -* Add a library that replaces some `libc` functions to traps that will terminate the process. [#16366](https://github.com/ClickHouse/ClickHouse/pull/16366) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Provide diagnostics in server logs in case of stack overflow, send error message to clickhouse-client. This closes [#14840](https://github.com/ClickHouse/ClickHouse/issues/14840). [#16346](https://github.com/ClickHouse/ClickHouse/pull/16346) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Now we can run almost all stateless functional tests in parallel. [#15236](https://github.com/ClickHouse/ClickHouse/pull/15236) ([alesapin](https://github.com/alesapin)). -* Fix corruption in `librdkafka` snappy decompression (was a problem only for gcc10 builds, but official builds uses clang already, so at least recent official releases are not affected). [#18053](https://github.com/ClickHouse/ClickHouse/pull/18053) ([Azat Khuzhin](https://github.com/azat)). -* If server was terminated by OOM killer, print message in log. [#13516](https://github.com/ClickHouse/ClickHouse/pull/13516) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* PODArray: Avoid call to memcpy with (nullptr, 0) arguments (Fix UBSan report). This fixes [#18525](https://github.com/ClickHouse/ClickHouse/issues/18525). [#18526](https://github.com/ClickHouse/ClickHouse/pull/18526) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Minor improvement for path concatenation of zookeeper paths inside DDLWorker. [#17767](https://github.com/ClickHouse/ClickHouse/pull/17767) ([Bharat Nallan](https://github.com/bharatnc)). -* Allow to reload symbols from debug file. This PR also fixes a build-id issue. [#17637](https://github.com/ClickHouse/ClickHouse/pull/17637) ([Amos Bird](https://github.com/amosbird)). +* Add packages, functional tests and Docker builds for AArch64 (ARM) version of ClickHouse. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) +* Prepare ClickHouse to be built with musl-libc. It is not enabled by default. [#33134](https://github.com/ClickHouse/ClickHouse/pull/33134) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make installation script working on FreeBSD. This closes [#33384](https://github.com/ClickHouse/ClickHouse/issues/33384). [#33418](https://github.com/ClickHouse/ClickHouse/pull/33418) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `actionlint` for GitHub Actions workflows and verify workflow files via `act --list` to check the correct workflow syntax. [#33612](https://github.com/ClickHouse/ClickHouse/pull/33612) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add more tests for the nullable primary key feature. Add more tests with different types and merge tree kinds, plus randomly generated data. [#33228](https://github.com/ClickHouse/ClickHouse/pull/33228) ([Amos Bird](https://github.com/amosbird)). +* Add a simple tool to visualize flaky tests in web browser. [#33185](https://github.com/ClickHouse/ClickHouse/pull/33185) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Enable hermetic build for shared builds. This is mainly for developers. [#32968](https://github.com/ClickHouse/ClickHouse/pull/32968) ([Amos Bird](https://github.com/amosbird)). +* Update `libc++` and `libc++abi` to the latest. [#32484](https://github.com/ClickHouse/ClickHouse/pull/32484) ([Raúl Marín](https://github.com/Algunenano)). +* Added integration test for external .NET client ([ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)). [#23230](https://github.com/ClickHouse/ClickHouse/pull/23230) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)). +* Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([taiyang-li](https://github.com/taiyang-li)). +* Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([alexey-milovidov](https://github.com/alexey-milovidov)). -## [Changelog for 2020](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2020.md) +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Several fixes for format parsing. This is relevant if `clickhouse-server` is open for write access to adversary. Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). Fixed Apache Avro Union type index out of boundary issue in Apache Avro binary format. [#33022](https://github.com/ClickHouse/ClickHouse/pull/33022) ([Harry Lee](https://github.com/HarryLeeIBM)). Fix null pointer dereference in `LowCardinality` data when deserializing `LowCardinality` data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). +* ClickHouse Keeper handler will correctly remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). +* Potential off-by-one miscalculation of quotas: quota limit was not reached, but the limit was exceeded. This fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). +* Fixed CASTing from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)) [#27914](https://github.com/ClickHouse/ClickHouse/pull/27914) ([Vasily Nemkov](https://github.com/Enmk)). +* Fixed an exception like `Unknown aggregate function nothing` during an execution on a remote server. This fixes [#16689](https://github.com/ClickHouse/ClickHouse/issues/16689). [#26074](https://github.com/ClickHouse/ClickHouse/pull/26074) ([hexiaoting](https://github.com/hexiaoting)). +* Fix wrong database for JOIN without explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). +* Fix segfault in Apache `Avro` format that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault in Apache `Arrow` format if schema contains `Dictionary` type. Closes [#33507](https://github.com/ClickHouse/ClickHouse/issues/33507). [#33529](https://github.com/ClickHouse/ClickHouse/pull/33529) ([Kruglov Pavel](https://github.com/Avogar)). +* Out of band `offset` and `limit` settings may be applied incorrectly for views. Close [#33289](https://github.com/ClickHouse/ClickHouse/issues/33289) [#33518](https://github.com/ClickHouse/ClickHouse/pull/33518) ([hexiaoting](https://github.com/hexiaoting)). +* Fix an exception `Block structure mismatch` which may happen during insertion into table with default nested `LowCardinality` column. Fixes [#33028](https://github.com/ClickHouse/ClickHouse/issues/33028). [#33504](https://github.com/ClickHouse/ClickHouse/pull/33504) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix dictionary expressions for `range_hashed` range min and range max attributes when created using DDL. Closes [#30809](https://github.com/ClickHouse/ClickHouse/issues/30809). [#33478](https://github.com/ClickHouse/ClickHouse/pull/33478) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible use-after-free for INSERT into Materialized View with concurrent DROP ([Azat Khuzhin](https://github.com/azat)). +* Do not try to read pass EOF (to workaround for a bug in the Linux kernel), this bug can be reproduced on kernels (3.14..5.9), and requires `index_granularity_bytes=0` (i.e. turn off adaptive index granularity). [#33372](https://github.com/ClickHouse/ClickHouse/pull/33372) ([Azat Khuzhin](https://github.com/azat)). +* The commands `SYSTEM SUSPEND` and `SYSTEM ... THREAD FUZZER` missed access control. It is fixed. Author: Kevin Michel. [#33333](https://github.com/ClickHouse/ClickHouse/pull/33333) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix when `COMMENT` for dictionaries does not appear in `system.tables`, `system.dictionaries`. Allow to modify the comment for `Dictionary` engine. Closes [#33251](https://github.com/ClickHouse/ClickHouse/issues/33251). [#33261](https://github.com/ClickHouse/ClickHouse/pull/33261) ([Maksim Kita](https://github.com/kitaisreal)). +* Add asynchronous inserts (with enabled setting `async_insert`) to query log. Previously such queries didn't appear in the query log. [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239) ([Anton Popov](https://github.com/CurtizJ)). +* Fix sending `WHERE 1 = 0` expressions for external databases query. Closes [#33152](https://github.com/ClickHouse/ClickHouse/issues/33152). [#33214](https://github.com/ClickHouse/ClickHouse/pull/33214) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix DDL validation for MaterializedPostgreSQL. Fix setting `materialized_postgresql_allow_automatic_update`. Closes [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33200](https://github.com/ClickHouse/ClickHouse/pull/33200) ([Kseniia Sumarokova](https://github.com/kssenii)). Make sure unused replication slots are always removed. Found in [#26952](https://github.com/ClickHouse/ClickHouse/issues/26952). [#33187](https://github.com/ClickHouse/ClickHouse/pull/33187) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix MaterializedPostreSQL detach/attach (removing / adding to replication) tables with non-default schema. Found in [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33179](https://github.com/ClickHouse/ClickHouse/pull/33179) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix DROP MaterializedPostgreSQL database. [#33468](https://github.com/ClickHouse/ClickHouse/pull/33468) ([Kseniia Sumarokova](https://github.com/kssenii)). +* The metric `StorageBufferBytes` sometimes was miscalculated. [#33159](https://github.com/ClickHouse/ClickHouse/pull/33159) ([xuyatian](https://github.com/xuyatian)). +* Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `s3` table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong tuple output in `CSV` format in case of custom csv delimiter. [#32981](https://github.com/ClickHouse/ClickHouse/pull/32981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix HDFS URL check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix throwing exception like positional argument out of bounds for non-positional arguments. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173)#event-5789668239. [#32961](https://github.com/ClickHouse/ClickHouse/pull/32961) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix UB in case of unexpected EOF during filling a set from HTTP query (i.e. if the client interrupted in the middle, i.e. `timeout 0.15s curl -Ss -F 's=@t.csv;' 'http://127.0.0.1:8123/?s_structure=key+Int&query=SELECT+dummy+IN+s'` and with large enough `t.csv`). [#32955](https://github.com/ClickHouse/ClickHouse/pull/32955) ([Azat Khuzhin](https://github.com/azat)). +* Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `ORC` format stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([kreuzerkrieg](https://github.com/kreuzerkrieg)). +* `topKWeightedState` failed for some input types. [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([vdimir](https://github.com/vdimir)). +* Fix exception `Single chunk is expected from view inner query (LOGICAL_ERROR)` in materialized view. Fixes [#31419](https://github.com/ClickHouse/ClickHouse/issues/31419). [#32862](https://github.com/ClickHouse/ClickHouse/pull/32862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix optimization with lazy seek for async reads from remote filesystems. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([tavplubix](https://github.com/tavplubix)). +* Avoid reusing the scalar subquery cache when processing MV blocks. This fixes a bug when the scalar query reference the source table but it means that all subscalar queries in the MV definition will be calculated for each block. [#32811](https://github.com/ClickHouse/ClickHouse/pull/32811) ([Raúl Marín](https://github.com/Algunenano)). +* Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([tavplubix](https://github.com/tavplubix)). +* Fix crash when used `fuzzBits` function, close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). +* Fix error `Column is not under aggregate function` in case of MV with `GROUP BY (list of columns)` (which is pared as `GROUP BY tuple(...)`) over `Kafka`/`RabbitMQ`. Fixes [#32668](https://github.com/ClickHouse/ClickHouse/issues/32668) and [#32744](https://github.com/ClickHouse/ClickHouse/issues/32744). [#32751](https://github.com/ClickHouse/ClickHouse/pull/32751) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `ALTER TABLE ... MATERIALIZE TTL` query with `TTL ... DELETE WHERE ...` and `TTL ... GROUP BY ...` modes. [#32695](https://github.com/ClickHouse/ClickHouse/pull/32695) ([Anton Popov](https://github.com/CurtizJ)). +* Fix `optimize_read_in_order` optimization in case when table engine is `Distributed` or `Merge` and its underlying `MergeTree` tables have monotonous function in prefix of sorting key. [#32670](https://github.com/ClickHouse/ClickHouse/pull/32670) ([Anton Popov](https://github.com/CurtizJ)). +* Fix LOGICAL_ERROR exception when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). +* Inserting into S3 with multipart upload to Google Cloud Storage may trigger abort. [#32504](https://github.com/ClickHouse/ClickHouse/issues/32504). [#32649](https://github.com/ClickHouse/ClickHouse/pull/32649) ([vdimir](https://github.com/vdimir)). +* Fix possible exception at `RabbitMQ` storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). +* Fix async inserts with formats `CustomSeparated`, `Template`, `Regexp`, `MsgPack` and `JSONAsString`. Previousely the async inserts with these formats didn't read any data. [#32530](https://github.com/ClickHouse/ClickHouse/pull/32530) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `groupBitmapAnd` function on distributed table. [#32529](https://github.com/ClickHouse/ClickHouse/pull/32529) ([minhthucdao](https://github.com/dmthuc)). +* Fix crash in JOIN found by fuzzer, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([vdimir](https://github.com/vdimir)). +* Proper handling of the case with Apache Arrow column duplication. [#32507](https://github.com/ClickHouse/ClickHouse/pull/32507) ([Dmitriy Mokhnatkin](https://github.com/DMokhnatkin)). +* Fix issue with ambiguous query formatting in distributed queries that led to errors when some table columns were named `ALL` or `DISTINCT`. This closes [#32391](https://github.com/ClickHouse/ClickHouse/issues/32391). [#32490](https://github.com/ClickHouse/ClickHouse/pull/32490) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). +* Fix broken select query when there are more than 2 row policies on same column, begin at second queries on the same session. [#31606](https://github.com/ClickHouse/ClickHouse/issues/31606). [#32291](https://github.com/ClickHouse/ClickHouse/pull/32291) ([SuperDJY](https://github.com/cmsxbc)). +* Fix fractional unix timestamp conversion to `DateTime64`, fractional part was reversed for negative unix timestamps (before 1970-01-01). [#32240](https://github.com/ClickHouse/ClickHouse/pull/32240) ([Ben](https://github.com/benbiti)). +* Some entries of replication queue might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([tavplubix](https://github.com/tavplubix)). +* Fix parsing of `APPLY lambda` column transformer which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `base64Encode` adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix hang up with command `DROP TABLE system.query_log sync`. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). + + +## [Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md) diff --git a/CMakeLists.txt b/CMakeLists.txt index fdc9cfcd303..72c00bcb622 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -511,6 +511,7 @@ include (cmake/find/h3.cmake) include (cmake/find/libxml2.cmake) include (cmake/find/brotli.cmake) include (cmake/find/protobuf.cmake) +include (cmake/find/thrift.cmake) include (cmake/find/grpc.cmake) include (cmake/find/pdqsort.cmake) include (cmake/find/miniselect.cmake) @@ -520,7 +521,7 @@ include (cmake/find/curl.cmake) include (cmake/find/s3.cmake) include (cmake/find/blob_storage.cmake) include (cmake/find/base64.cmake) -include (cmake/find/parquet.cmake) +include (cmake/find/parquet.cmake) # uses protobuf and thrift include (cmake/find/simdjson.cmake) include (cmake/find/fast_float.cmake) include (cmake/find/rapidjson.cmake) @@ -548,6 +549,7 @@ include (cmake/find/cassandra.cmake) include (cmake/find/sentry.cmake) include (cmake/find/datasketches.cmake) include (cmake/find/libprotobuf-mutator.cmake) +include (cmake/find/hive-metastore.cmake) set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "") find_contrib_lib(cityhash) diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index cffffcc213f..ec146fd5821 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -125,11 +125,11 @@ public: template struct common_type, wide::integer> { - using type = std::conditional_t < Bits == Bits2, - wide::integer< - Bits, - std::conditional_t<(std::is_same_v && std::is_same_v), signed, unsigned>>, - std::conditional_t, wide::integer>>; + using type = std::conditional_t && std::is_same_v), signed, unsigned>>, + std::conditional_t, wide::integer>>; }; template diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 8e7c061088a..39aa6c88c4e 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54458) -SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 13) +SET(VERSION_REVISION 54459) +SET(VERSION_MAJOR 22) +SET(VERSION_MINOR 2) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 4cc45c1e15912ee300bca7cc8b8da2b888a70e2a) -SET(VERSION_DESCRIBE v21.13.1.1-prestable) -SET(VERSION_STRING 21.13.1.1) +SET(VERSION_GITHASH dfe64a2789bbf51046bb6b5476f874f7b59d124c) +SET(VERSION_DESCRIBE v22.2.1.1-prestable) +SET(VERSION_STRING 22.2.1.1) # end of autochange diff --git a/cmake/find/hive-metastore.cmake b/cmake/find/hive-metastore.cmake new file mode 100644 index 00000000000..bc283cf8bd2 --- /dev/null +++ b/cmake/find/hive-metastore.cmake @@ -0,0 +1,26 @@ +option(ENABLE_HIVE "Enable Hive" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_HIVE) + message("Hive disabled") + return() +endif() + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hive-metastore") + message(WARNING "submodule contrib/hive-metastore is missing. to fix try run: \n git submodule update --init") + set(USE_HIVE 0) +elseif (NOT USE_THRIFT) + message(WARNING "Thrift is not found, which is needed by Hive") + set(USE_HIVE 0) +elseif (NOT USE_HDFS) + message(WARNING "HDFS is not found, which is needed by Hive") + set(USE_HIVE 0) +elseif (NOT USE_ORC OR NOT USE_ARROW OR NOT USE_PARQUET) + message(WARNING "ORC/Arrow/Parquet is not found, which are needed by Hive") + set(USE_HIVE 0) +else() + set(USE_HIVE 1) + set(HIVE_METASTORE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore) + set(HIVE_METASTORE_LIBRARY hivemetastore) +endif() + +message (STATUS "Using_Hive=${USE_HIVE}: ${HIVE_METASTORE_INCLUDE_DIR} : ${HIVE_METASTORE_LIBRARY}") diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake index 89615710d48..48c2bb7babb 100644 --- a/cmake/find/parquet.cmake +++ b/cmake/find/parquet.cmake @@ -34,7 +34,6 @@ endif() if(NOT USE_INTERNAL_PARQUET_LIBRARY) find_package(Arrow) find_package(Parquet) - find_library(THRIFT_LIBRARY thrift) find_library(UTF8_PROC_LIBRARY utf8proc) find_package(BZip2) @@ -145,12 +144,10 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O set(FLATBUFFERS_LIBRARY flatbuffers) set(ARROW_LIBRARY arrow_static) set(PARQUET_LIBRARY parquet_static) - set(THRIFT_LIBRARY thrift_static) else() set(FLATBUFFERS_LIBRARY flatbuffers_shared) set(ARROW_LIBRARY arrow_shared) set(PARQUET_LIBRARY parquet_shared) - set(THRIFT_LIBRARY thrift) endif() set(USE_PARQUET 1) diff --git a/cmake/find/thrift.cmake b/cmake/find/thrift.cmake new file mode 100644 index 00000000000..08eeb60915e --- /dev/null +++ b/cmake/find/thrift.cmake @@ -0,0 +1,34 @@ +option(ENABLE_THRIFT "Enable Thrift" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_THRIFT) + message (STATUS "thrift disabled") + set(USE_INTERNAL_THRIFT_LIBRARY 0) + return() +endif() + +option(USE_INTERNAL_THRIFT_LIBRARY "Set to FALSE to use system thrift library instead of bundled" ON) +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/thrift") + if (USE_INTERNAL_THRIFT_LIBRARY) + message (WARNING "submodule contrib/thrift is missing. to fix try run: \n git submodule update --init --recursive") + set(USE_INTERNAL_THRIFT_LIBRARY 0) + endif () +endif() + +if (USE_INTERNAL_THRIFT_LIBRARY) + if (MAKE_STATIC_LIBRARIES) + set(THRIFT_LIBRARY thrift_static) + else() + set(THRIFT_LIBRARY thrift) + endif() + set (THRIFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src") + set(USE_THRIFT 1) +else() + find_library(THRIFT_LIBRARY thrift) + if (NOT THRIFT_LIBRARY) + set(USE_THRIFT 0) + else() + set(USE_THRIFT 1) + endif() +endif () + +message (STATUS "Using thrift=${USE_THRIFT}: ${THRIFT_INCLUDE_DIR} : ${THRIFT_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9eb3b145c85..17954159b3a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -150,7 +150,6 @@ if (USE_INTERNAL_PARQUET_LIBRARY) # The library is large - avoid bloat. target_compile_options (${ARROW_LIBRARY} PRIVATE -g0) - target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0) target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0) endif() @@ -206,6 +205,10 @@ if (USE_INTERNAL_PROTOBUF_LIBRARY) add_subdirectory(protobuf-cmake) endif () +if (USE_INTERNAL_THRIFT_LIBRARY) + add_subdirectory(thrift-cmake) +endif () + if (USE_INTERNAL_HDFS3_LIBRARY) add_subdirectory(libhdfs3-cmake) endif () @@ -299,6 +302,10 @@ if (USE_S2_GEOMETRY) add_subdirectory(s2geometry-cmake) endif() +if (USE_HIVE) + add_subdirectory (hive-metastore-cmake) +endif() + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/arrow b/contrib/arrow index aa9a7a698e3..1d9cc51daa4 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e +Subproject commit 1d9cc51daa4e7e9fc6926320ef73759818bd736e diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index e01b546310f..d4882a57124 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -17,57 +17,8 @@ else() set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0") endif() -# === thrift - -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp") -# contrib/thrift/lib/cpp/CMakeLists.txt -set(thriftcpp_SOURCES - "${LIBRARY_DIR}/src/thrift/TApplicationException.cpp" - "${LIBRARY_DIR}/src/thrift/TOutput.cpp" - "${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp" - "${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp" - "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h" - "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp" - "${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp" - "${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp" - "${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp" - "${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp" - "${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp" - "${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp" - "${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp" - "${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp" - "${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp" - "${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp" - "${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp" - "${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp" - "${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp" - "${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp" - "${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp" - "${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp" - "${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp" - "${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp" - ) -set(thriftcpp_threads_SOURCES - "${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp" - "${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp" - "${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp" - "${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp" - ) -add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES}) -set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641 -target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src") -target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only) - # === orc - set(ORC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++") set(ORC_INCLUDE_DIR "${ORC_SOURCE_DIR}/include") set(ORC_SOURCE_SRC_DIR "${ORC_SOURCE_DIR}/src") @@ -463,49 +414,6 @@ set(PARQUET_SRCS #list(TRANSFORM PARQUET_SRCS PREPEND "${LIBRARY_DIR}/") # cmake 3.12 add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS}) target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src" PRIVATE ${OPENSSL_INCLUDE_DIR}) - -set (HAVE_ARPA_INET_H 1) -set (HAVE_FCNTL_H 1) -set (HAVE_GETOPT_H 1) -set (HAVE_INTTYPES_H 1) -set (HAVE_NETDB_H 1) -set (HAVE_NETINET_IN_H 1) -set (HAVE_SIGNAL_H 1) -set (HAVE_STDINT_H 1) -set (HAVE_UNISTD_H 1) -set (HAVE_PTHREAD_H 1) -set (HAVE_SYS_IOCTL_H 1) -set (HAVE_SYS_PARAM_H 1) -set (HAVE_SYS_RESOURCE_H 1) -set (HAVE_SYS_SOCKET_H 1) -set (HAVE_SYS_STAT_H 1) -set (HAVE_SYS_TIME_H 1) -set (HAVE_SYS_UN_H 1) -set (HAVE_POLL_H 1) -set (HAVE_SYS_POLL_H 1) -set (HAVE_SYS_SELECT_H 1) -set (HAVE_SCHED_H 1) -set (HAVE_STRING_H 1) -set (HAVE_STRINGS_H 1) -set (HAVE_GETHOSTBYNAME 1) -set (HAVE_STRERROR_R 1) -set (HAVE_SCHED_GET_PRIORITY_MAX 1) -set (HAVE_SCHED_GET_PRIORITY_MIN 1) - -if (OS_LINUX AND NOT USE_MUSL) - set (STRERROR_R_CHAR_P 1) -endif () - -#set(PACKAGE ${PACKAGE_NAME}) -#set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") -#set(VERSION ${thrift_VERSION}) - -# generate a config.h file -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/thrift/config.h") - -include_directories("${CMAKE_CURRENT_BINARY_DIR}") - - target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex ${OPENSSL_LIBRARIES}) if (SANITIZE STREQUAL "undefined") diff --git a/contrib/hive-metastore b/contrib/hive-metastore new file mode 160000 index 00000000000..809a77d435c --- /dev/null +++ b/contrib/hive-metastore @@ -0,0 +1 @@ +Subproject commit 809a77d435ce218d9b000733f19489c606fc567b diff --git a/contrib/hive-metastore-cmake/CMakeLists.txt b/contrib/hive-metastore-cmake/CMakeLists.txt new file mode 100644 index 00000000000..c92405fa4e8 --- /dev/null +++ b/contrib/hive-metastore-cmake/CMakeLists.txt @@ -0,0 +1,9 @@ +set (SRCS + ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_constants.cpp + ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_types.cpp + ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/ThriftHiveMetastore.cpp +) + +add_library(${HIVE_METASTORE_LIBRARY} ${SRCS}) +target_link_libraries(${HIVE_METASTORE_LIBRARY} PUBLIC ${THRIFT_LIBRARY}) +target_include_directories(${HIVE_METASTORE_LIBRARY} SYSTEM PUBLIC ${HIVE_METASTORE_INCLUDE_DIR}) diff --git a/contrib/thrift-cmake/CMakeLists.txt b/contrib/thrift-cmake/CMakeLists.txt new file mode 100644 index 00000000000..088dd0a969b --- /dev/null +++ b/contrib/thrift-cmake/CMakeLists.txt @@ -0,0 +1,87 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp") +set(thriftcpp_SOURCES + "${LIBRARY_DIR}/src/thrift/TApplicationException.cpp" + "${LIBRARY_DIR}/src/thrift/TOutput.cpp" + "${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp" + "${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp" + "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h" + "${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp" + "${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp" + "${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp" + "${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp" + "${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp" + "${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp" + "${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp" + "${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp" + "${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp" + "${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp" + "${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp" + "${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp" + "${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp" + "${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp" + "${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp" + "${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp" + "${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp" + "${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp" + "${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp" + ) +set(thriftcpp_threads_SOURCES + "${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp" + "${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp" + "${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp" + "${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp" + ) + +include("${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake") # makes config.h + +set (HAVE_ARPA_INET_H 1) +set (HAVE_FCNTL_H 1) +set (HAVE_GETOPT_H 1) +set (HAVE_INTTYPES_H 1) +set (HAVE_NETDB_H 1) +set (HAVE_NETINET_IN_H 1) +set (HAVE_SIGNAL_H 1) +set (HAVE_STDINT_H 1) +set (HAVE_UNISTD_H 1) +set (HAVE_PTHREAD_H 1) +set (HAVE_SYS_IOCTL_H 1) +set (HAVE_SYS_PARAM_H 1) +set (HAVE_SYS_RESOURCE_H 1) +set (HAVE_SYS_SOCKET_H 1) +set (HAVE_SYS_STAT_H 1) +set (HAVE_SYS_TIME_H 1) +set (HAVE_SYS_UN_H 1) +set (HAVE_POLL_H 1) +set (HAVE_SYS_POLL_H 1) +set (HAVE_SYS_SELECT_H 1) +set (HAVE_SCHED_H 1) +set (HAVE_STRING_H 1) +set (HAVE_STRINGS_H 1) +set (HAVE_GETHOSTBYNAME 1) +set (HAVE_STRERROR_R 1) +set (HAVE_SCHED_GET_PRIORITY_MAX 1) +set (HAVE_SCHED_GET_PRIORITY_MIN 1) + +if (OS_LINUX AND NOT USE_MUSL) + set (STRERROR_R_CHAR_P 1) +endif () + +#set(PACKAGE ${PACKAGE_NAME}) +#set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") +#set(VERSION ${thrift_VERSION}) + +# generate a config.h file +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build/cmake/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/thrift/config.h") + +include_directories("${CMAKE_CURRENT_BINARY_DIR}") + +add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES}) +target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC "${THRIFT_INCLUDE_DIR}" ${CMAKE_CURRENT_BINARY_DIR}) +target_link_libraries (${THRIFT_LIBRARY} PUBLIC boost::headers_only) diff --git a/contrib/thrift-cmake/build/cmake/config.h.in b/contrib/thrift-cmake/build/cmake/config.h.in new file mode 120000 index 00000000000..eb28c214748 --- /dev/null +++ b/contrib/thrift-cmake/build/cmake/config.h.in @@ -0,0 +1 @@ +../../../thrift/build/cmake/config.h.in \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index 3c1be00d664..43b46f561c8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -clickhouse (21.13.1.1) unstable; urgency=low +clickhouse (22.1.1.1) unstable; urgency=low * Modified source code diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 9ce06939a85..1c185daec75 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.13.1.* +ARG version=22.1.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/docs/builder/Dockerfile b/docker/docs/builder/Dockerfile index 50e3fadf9ac..906312a19a2 100644 --- a/docker/docs/builder/Dockerfile +++ b/docker/docs/builder/Dockerfile @@ -1,4 +1,5 @@ -# docker build -t clickhouse/docs-build . +# rebuild in #33610 +# docker build -t clickhouse/docs-builder . FROM ubuntu:20.04 # ARG for quick switch to a given ubuntu mirror @@ -9,8 +10,6 @@ ENV LANG=C.UTF-8 RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ - python3-setuptools \ - virtualenv \ wget \ bash \ python \ diff --git a/docker/docs/check/Dockerfile b/docker/docs/check/Dockerfile index 55647df5c3e..174be123eed 100644 --- a/docker/docs/check/Dockerfile +++ b/docker/docs/check/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/docs-check . -FROM clickhouse/docs-builder +ARG FROM_TAG=latest +FROM clickhouse/docs-builder:$FROM_TAG COPY run.sh / diff --git a/docker/docs/check/run.sh b/docker/docs/check/run.sh index f70f82aeb4c..016881be1bc 100644 --- a/docker/docs/check/run.sh +++ b/docker/docs/check/run.sh @@ -2,6 +2,7 @@ set -euo pipefail cd $REPO_PATH/docs/tools +rm -rf venv mkdir venv virtualenv -p $(which python3) venv source venv/bin/activate diff --git a/docker/docs/release/Dockerfile b/docker/docs/release/Dockerfile index 63765180a4c..3c661c1d3fd 100644 --- a/docker/docs/release/Dockerfile +++ b/docker/docs/release/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/docs-release . -FROM clickhouse/docs-builder +ARG FROM_TAG=latest +FROM clickhouse/docs-builder:$FROM_TAG COPY run.sh / diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 8f886ea357d..e3e2e689b17 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -1,4 +1,5 @@ -# docker build -t clickhouse/binary-builder . +# rebuild in #33610 +# docker build -t clickhouse/binary-builder . FROM ubuntu:20.04 # ARG for quick switch to a given ubuntu mirror diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 89c34846efa..76a5f1d91c0 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -1,3 +1,4 @@ +# rebuild in #33610 # docker build -t clickhouse/deb-builder . FROM ubuntu:20.04 @@ -28,12 +29,14 @@ RUN apt-get update \ software-properties-common \ --yes --no-install-recommends +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ - && chmod +x dpkg-deb \ - && cp dpkg-deb /usr/bin +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} RUN apt-get update \ && apt-get install \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index bfdf65cd56c..5b10d1fc490 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -5,8 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.13.1.* -ARG gosu_ver=1.10 +ARG version=22.1.1.* # set non-empty deb_location_url url to create a docker image # from debs created by CI build, for example: @@ -30,6 +29,23 @@ ARG DEBIAN_FRONTEND=noninteractive # installed to prevent picking those uid / gid by some unrelated software. # The same uid / gid (101) is used both for alpine and ubuntu. +# To drop privileges, we need 'su' command, that simply changes uid and gid. +# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux: +# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking +# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal +# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does) +# and for these reasons people are using alternatives to the 'su' command in Docker, +# that don't mess with the terminal, don't care about closing the opened files, etc... +# but can only be safe to drop privileges inside Docker. +# The question - what implementation of 'su' command to use. +# It should be a simple script doing about just two syscalls. +# Some people tend to use 'gosu' tool that is written in Go. +# It is not used for several reasons: +# 1. Dependency on some foreign code in yet another programming language - does not sound alright. +# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners. + +COPY su-exec.c /su-exec.c + RUN groupadd -r clickhouse --gid=101 \ && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ && apt-get update \ @@ -68,8 +84,12 @@ RUN groupadd -r clickhouse --gid=101 \ clickhouse-client=$version \ clickhouse-server=$version ; \ fi \ - && wget --progress=bar:force:noscroll "https://github.com/tianon/gosu/releases/download/$gosu_ver/gosu-$(dpkg --print-architecture)" -O /bin/gosu \ - && chmod +x /bin/gosu \ + && apt-get install -y --no-install-recommends tcc libc-dev && \ + tcc /su-exec.c -o /bin/su-exec && \ + chown root:root /bin/su-exec && \ + chmod 0755 /bin/su-exec && \ + rm /su-exec.c && \ + apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ && clickhouse-local -q 'SELECT * FROM system.build_options' \ && rm -rf \ /var/lib/apt/lists/* \ @@ -100,4 +120,3 @@ VOLUME /var/lib/clickhouse ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml ENTRYPOINT ["/entrypoint.sh"] - diff --git a/docker/server/local.Dockerfile b/docker/server/local.Dockerfile index 390f840513e..0d86c9ce45a 100644 --- a/docker/server/local.Dockerfile +++ b/docker/server/local.Dockerfile @@ -6,7 +6,7 @@ # Middle steps are performed by the bash script. FROM ubuntu:18.04 as clickhouse-server-base -ARG gosu_ver=1.10 +ARG gosu_ver=1.14 VOLUME /packages/ diff --git a/docker/server/su-exec.c b/docker/server/su-exec.c new file mode 100644 index 00000000000..a375e704f55 --- /dev/null +++ b/docker/server/su-exec.c @@ -0,0 +1,138 @@ +/* + +https://github.com/ncopa/su-exec +The file is copy-pasted verbatim to avoid supply chain attacks. + +The MIT License (MIT) + +Copyright (c) 2015 ncopa + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +/* set user and group id and exec */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static char *argv0; + +static void usage(int exitcode) +{ + printf("Usage: %s user-spec command [args]\n", argv0); + exit(exitcode); +} + +int main(int argc, char *argv[]) +{ + char *user, *group, **cmdargv; + char *end; + + uid_t uid = getuid(); + gid_t gid = getgid(); + + argv0 = argv[0]; + if (argc < 3) + usage(0); + + user = argv[1]; + group = strchr(user, ':'); + if (group) + *group++ = '\0'; + + cmdargv = &argv[2]; + + struct passwd *pw = NULL; + if (user[0] != '\0') { + uid_t nuid = strtol(user, &end, 10); + if (*end == '\0') + uid = nuid; + else { + pw = getpwnam(user); + if (pw == NULL) + err(1, "getpwnam(%s)", user); + } + } + if (pw == NULL) { + pw = getpwuid(uid); + } + if (pw != NULL) { + uid = pw->pw_uid; + gid = pw->pw_gid; + } + + setenv("HOME", pw != NULL ? pw->pw_dir : "/", 1); + + if (group && group[0] != '\0') { + /* group was specified, ignore grouplist for setgroups later */ + pw = NULL; + + gid_t ngid = strtol(group, &end, 10); + if (*end == '\0') + gid = ngid; + else { + struct group *gr = getgrnam(group); + if (gr == NULL) + err(1, "getgrnam(%s)", group); + gid = gr->gr_gid; + } + } + + if (pw == NULL) { + if (setgroups(1, &gid) < 0) + err(1, "setgroups(%i)", gid); + } else { + int ngroups = 0; + gid_t *glist = NULL; + + while (1) { + int r = getgrouplist(pw->pw_name, gid, glist, &ngroups); + + if (r >= 0) { + if (setgroups(ngroups, glist) < 0) + err(1, "setgroups"); + break; + } + + glist = realloc(glist, ngroups * sizeof(gid_t)); + if (glist == NULL) + err(1, "malloc"); + } + } + + if (setgid(gid) < 0) + err(1, "setgid(%i)", gid); + + if (setuid(uid) < 0) + err(1, "setuid(%i)", uid); + + execvp(cmdargv[0], cmdargv); + err(1, "%s", cmdargv[0]); + + return 1; +} diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index c24c013646f..66d1afb309b 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.13.1.* +ARG version=22.1.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index a661f8875a2..6beab2e5bb7 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/test-base . -FROM clickhouse/test-util +ARG FROM_TAG=latest +FROM clickhouse/test-util:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" @@ -28,12 +30,14 @@ RUN apt-get update \ software-properties-common \ --yes --no-install-recommends +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ - && chmod +x dpkg-deb \ - && cp dpkg-deb /usr/bin +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} RUN apt-get update \ && apt-get install \ diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index d1059b3dacc..97f3f54ad98 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -1,12 +1,14 @@ +# rebuild in #33610 # docker build --network=host -t clickhouse/codebrowser . # docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser -FROM clickhouse/binary-builder +ARG FROM_TAG=latest +FROM clickhouse/binary-builder:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev +RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev libmlir-13-dev # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 6fa5b0aa9db..46b74d89e13 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/fasttest . -FROM clickhouse/test-util +ARG FROM_TAG=latest +FROM clickhouse/test-util:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" @@ -28,12 +30,14 @@ RUN apt-get update \ software-properties-common \ --yes --no-install-recommends +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ - && chmod +x dpkg-deb \ - && cp dpkg-deb /usr/bin +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} RUN apt-get update \ && apt-get install \ diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index c602cba50aa..eb4b09c173f 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/fuzzer . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 89c2b19236e..91b26735fe5 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -1,44 +1,57 @@ +# rebuild in #33610 # docker build -t clickhouse/integration-test . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG SHELL ["/bin/bash", "-c"] RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get -y install \ - tzdata \ - python3 \ - libicu-dev \ bsdutils \ + curl \ + default-jre \ + g++ \ gdb \ - unixodbc \ - odbcinst \ + iproute2 \ + krb5-user \ + libicu-dev \ libsqlite3-dev \ libsqliteodbc \ - odbc-postgresql \ - sqlite3 \ - curl \ - tar \ - lz4 \ - krb5-user \ - iproute2 \ lsof \ - g++ \ - default-jre + lz4 \ + odbc-postgresql \ + odbcinst \ + python3 \ + rpm2cpio \ + sqlite3 \ + tar \ + tzdata \ + unixodbc \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ -RUN apt-get clean +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH -# Install MySQL ODBC driver -RUN curl 'https://downloads.mysql.com/archives/get/p/10/file/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --location --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so +# Install MySQL ODBC driver from RHEL rpm +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && cd /tmp \ + && curl -o mysql-odbc.rpm "https://cdn.mysql.com/Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \ + && rpm2archive mysql-odbc.rpm \ + && tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \ + && LINK_DIR=$(dpkg -L libodbc1 | grep '^/usr/lib/.*-linux-gnu/odbc$') \ + && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR" \ + && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR"/libmyodbc.so # Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper. # ZooKeeper is not started by default, but consumes some space in containers. # 777 perms used to allow anybody to start/stop ZooKeeper ENV ZOOKEEPER_VERSION='3.6.3' -RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" +RUN curl -O "https://dlcdn.apache.org/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz RUN echo $'tickTime=2500 \n\ tickTime=2500 \n\ diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile new file mode 100644 index 00000000000..fa6e4bf6313 --- /dev/null +++ b/docker/test/integration/hive_server/Dockerfile @@ -0,0 +1,47 @@ +FROM ubuntu:20.04 +MAINTAINER lgbo-ustc + +RUN apt-get update +RUN apt-get install -y wget openjdk-8-jre + +RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \ + tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz +RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ + tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz +RUN apt install -y vim + +RUN apt install -y openssh-server openssh-client + +RUN apt install -y mysql-server + +RUN mkdir -p /root/.ssh && \ + ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa && \ + cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys && \ + cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key && \ + cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub + +RUN wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz &&\ + tar -xf mysql-connector-java-8.0.27.tar.gz && \ + mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ && \ + rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27 + +RUN apt install -y iputils-ping net-tools + +ENV JAVA_HOME=/usr +ENV HADOOP_HOME=/hadoop-3.1.0 +ENV HDFS_NAMENODE_USER=root +ENV HDFS_DATANODE_USER=root HDFS_SECONDARYNAMENODE_USER=root YARN_RESOURCEMANAGER_USER=root YARN_NODEMANAGER_USER=root HDFS_DATANODE_SECURE_USER=hdfs +COPY hdfs-site.xml /hadoop-3.1.0/etc/hadoop +COPY mapred-site.xml /hadoop-3.1.0/etc/hadoop +COPY yarn-site.xml /hadoop-3.1.0/etc/hadoop +COPY hadoop-env.sh /hadoop-3.1.0/etc/hadoop/ +#COPY core-site.xml /hadoop-3.1.0/etc/hadoop +COPY core-site.xml.template /hadoop-3.1.0/etc/hadoop +COPY hive-site.xml /apache-hive-2.3.9-bin/conf +COPY prepare_hive_data.sh / +COPY demo_data.txt / + +ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH + +COPY start.sh / + diff --git a/docker/test/integration/hive_server/core-site.xml.template b/docker/test/integration/hive_server/core-site.xml.template new file mode 100644 index 00000000000..232338e0443 --- /dev/null +++ b/docker/test/integration/hive_server/core-site.xml.template @@ -0,0 +1,14 @@ + + + fs.defaultFS + hdfs://HOSTNAME:9000 + + + hadoop.proxyuser.root.hosts + * + + + hadoop.proxyuser.root.groups + * + + diff --git a/docker/test/integration/hive_server/demo_data.txt b/docker/test/integration/hive_server/demo_data.txt new file mode 100644 index 00000000000..dbe7c4bd990 --- /dev/null +++ b/docker/test/integration/hive_server/demo_data.txt @@ -0,0 +1,6 @@ +abc,1,2021-11-16 +abd,15,2021-11-05 +aaa,22,2021-11-16 +dda,0,2021-11-01 +dfb,11,2021-11-05 +jhn,89,2021-11-11 diff --git a/docker/test/integration/hive_server/hadoop-env.sh b/docker/test/integration/hive_server/hadoop-env.sh new file mode 100644 index 00000000000..84fcde490c3 --- /dev/null +++ b/docker/test/integration/hive_server/hadoop-env.sh @@ -0,0 +1,422 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set Hadoop-specific environment variables here. + +## +## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS. +## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS. THEREFORE, +## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE +## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh. +## +## Precedence rules: +## +## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults +## +## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults +## + +# Many of the options here are built from the perspective that users +# may want to provide OVERWRITING values on the command line. +# For example: +# +JAVA_HOME=/usr/ +# +# Therefore, the vast majority (BUT NOT ALL!) of these defaults +# are configured for substitution and not append. If append +# is preferable, modify this file accordingly. + +### +# Generic settings for HADOOP +### + +# Technically, the only required environment variable is JAVA_HOME. +# All others are optional. However, the defaults are probably not +# preferred. Many sites configure these options outside of Hadoop, +# such as in /etc/profile.d + +# The java implementation to use. By default, this environment +# variable is REQUIRED on ALL platforms except OS X! +# export JAVA_HOME= + +# Location of Hadoop. By default, Hadoop will attempt to determine +# this location based upon its execution path. +# export HADOOP_HOME= + +# Location of Hadoop's configuration information. i.e., where this +# file is living. If this is not defined, Hadoop will attempt to +# locate it based upon its execution path. +# +# NOTE: It is recommend that this variable not be set here but in +# /etc/profile.d or equivalent. Some options (such as +# --config) may react strangely otherwise. +# +# export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop + +# The maximum amount of heap to use (Java -Xmx). If no unit +# is provided, it will be converted to MB. Daemons will +# prefer any Xmx setting in their respective _OPT variable. +# There is no default; the JVM will autoscale based upon machine +# memory size. +# export HADOOP_HEAPSIZE_MAX= + +# The minimum amount of heap to use (Java -Xms). If no unit +# is provided, it will be converted to MB. Daemons will +# prefer any Xms setting in their respective _OPT variable. +# There is no default; the JVM will autoscale based upon machine +# memory size. +# export HADOOP_HEAPSIZE_MIN= + +# Enable extra debugging of Hadoop's JAAS binding, used to set up +# Kerberos security. +# export HADOOP_JAAS_DEBUG=true + +# Extra Java runtime options for all Hadoop commands. We don't support +# IPv6 yet/still, so by default the preference is set to IPv4. +# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true" +# For Kerberos debugging, an extended option set logs more invormation +# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug" + +# Some parts of the shell code may do special things dependent upon +# the operating system. We have to set this here. See the next +# section as to why.... +export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} + + +# Under certain conditions, Java on OS X will throw SCDynamicStore errors +# in the system logs. +# See HADOOP-8719 for more information. If one needs Kerberos +# support on OS X, one will want to change/remove this extra bit. +case ${HADOOP_OS_TYPE} in + Darwin*) + export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= " + export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= " + export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= " + ;; +esac + +# Extra Java runtime options for some Hadoop commands +# and clients (i.e., hdfs dfs -blah). These get appended to HADOOP_OPTS for +# such commands. In most cases, # this should be left empty and +# let users supply it on the command line. +# export HADOOP_CLIENT_OPTS="" + +# +# A note about classpaths. +# +# By default, Apache Hadoop overrides Java's CLASSPATH +# environment variable. It is configured such +# that it sarts out blank with new entries added after passing +# a series of checks (file/dir exists, not already listed aka +# de-deduplication). During de-depulication, wildcards and/or +# directories are *NOT* expanded to keep it simple. Therefore, +# if the computed classpath has two specific mentions of +# awesome-methods-1.0.jar, only the first one added will be seen. +# If two directories are in the classpath that both contain +# awesome-methods-1.0.jar, then Java will pick up both versions. + +# An additional, custom CLASSPATH. Site-wide configs should be +# handled via the shellprofile functionality, utilizing the +# hadoop_add_classpath function for greater control and much +# harder for apps/end-users to accidentally override. +# Similarly, end users should utilize ${HOME}/.hadooprc . +# This variable should ideally only be used as a short-cut, +# interactive way for temporary additions on the command line. +# export HADOOP_CLASSPATH="/some/cool/path/on/your/machine" + +# Should HADOOP_CLASSPATH be first in the official CLASSPATH? +# export HADOOP_USER_CLASSPATH_FIRST="yes" + +# If HADOOP_USE_CLIENT_CLASSLOADER is set, the classpath along +# with the main jar are handled by a separate isolated +# client classloader when 'hadoop jar', 'yarn jar', or 'mapred job' +# is utilized. If it is set, HADOOP_CLASSPATH and +# HADOOP_USER_CLASSPATH_FIRST are ignored. +# export HADOOP_USE_CLIENT_CLASSLOADER=true + +# HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES overrides the default definition of +# system classes for the client classloader when HADOOP_USE_CLIENT_CLASSLOADER +# is enabled. Names ending in '.' (period) are treated as package names, and +# names starting with a '-' are treated as negative matches. For example, +# export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop." + +# Enable optional, bundled Hadoop features +# This is a comma delimited list. It may NOT be overridden via .hadooprc +# Entries may be added/removed as needed. +# export HADOOP_OPTIONAL_TOOLS="hadoop-openstack,hadoop-aliyun,hadoop-azure,hadoop-azure-datalake,hadoop-aws,hadoop-kafka" + +### +# Options for remote shell connectivity +### + +# There are some optional components of hadoop that allow for +# command and control of remote hosts. For example, +# start-dfs.sh will attempt to bring up all NNs, DNS, etc. + +# Options to pass to SSH when one of the "log into a host and +# start/stop daemons" scripts is executed +# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s" + +# The built-in ssh handler will limit itself to 10 simultaneous connections. +# For pdsh users, this sets the fanout size ( -f ) +# Change this to increase/decrease as necessary. +# export HADOOP_SSH_PARALLEL=10 + +# Filename which contains all of the hosts for any remote execution +# helper scripts # such as workers.sh, start-dfs.sh, etc. +# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers" + +### +# Options for all daemons +### +# + +# +# Many options may also be specified as Java properties. It is +# very common, and in many cases, desirable, to hard-set these +# in daemon _OPTS variables. Where applicable, the appropriate +# Java property is also identified. Note that many are re-used +# or set differently in certain contexts (e.g., secure vs +# non-secure) +# + +# Where (primarily) daemon log files are stored. +# ${HADOOP_HOME}/logs by default. +# Java property: hadoop.log.dir +# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs + +# A string representing this instance of hadoop. $USER by default. +# This is used in writing log and pid files, so keep that in mind! +# Java property: hadoop.id.str +# export HADOOP_IDENT_STRING=$USER + +# How many seconds to pause after stopping a daemon +# export HADOOP_STOP_TIMEOUT=5 + +# Where pid files are stored. /tmp by default. +# export HADOOP_PID_DIR=/tmp + +# Default log4j setting for interactive commands +# Java property: hadoop.root.logger +# export HADOOP_ROOT_LOGGER=INFO,console + +# Default log4j setting for daemons spawned explicitly by +# --daemon option of hadoop, hdfs, mapred and yarn command. +# Java property: hadoop.root.logger +# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA + +# Default log level and output location for security-related messages. +# You will almost certainly want to change this on a per-daemon basis via +# the Java property (i.e., -Dhadoop.security.logger=foo). (Note that the +# defaults for the NN and 2NN override this by default.) +# Java property: hadoop.security.logger +# export HADOOP_SECURITY_LOGGER=INFO,NullAppender + +# Default process priority level +# Note that sub-processes will also run at this level! +# export HADOOP_NICENESS=0 + +# Default name for the service level authorization file +# Java property: hadoop.policy.file +# export HADOOP_POLICYFILE="hadoop-policy.xml" + +# +# NOTE: this is not used by default! <----- +# You can define variables right here and then re-use them later on. +# For example, it is common to use the same garbage collection settings +# for all the daemons. So one could define: +# +# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps" +# +# .. and then use it as per the b option under the namenode. + +### +# Secure/privileged execution +### + +# +# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons +# on privileged ports. This functionality can be replaced by providing +# custom functions. See hadoop-functions.sh for more information. +# + +# The jsvc implementation to use. Jsvc is required to run secure datanodes +# that bind to privileged ports to provide authentication of data transfer +# protocol. Jsvc is not required if SASL is configured for authentication of +# data transfer protocol using non-privileged ports. +# export JSVC_HOME=/usr/bin + +# +# This directory contains pids for secure and privileged processes. +#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR} + +# +# This directory contains the logs for secure and privileged processes. +# Java property: hadoop.log.dir +# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR} + +# +# When running a secure daemon, the default value of HADOOP_IDENT_STRING +# ends up being a bit bogus. Therefore, by default, the code will +# replace HADOOP_IDENT_STRING with HADOOP_xx_SECURE_USER. If one wants +# to keep HADOOP_IDENT_STRING untouched, then uncomment this line. +# export HADOOP_SECURE_IDENT_PRESERVE="true" + +### +# NameNode specific parameters +### + +# Default log level and output location for file system related change +# messages. For non-namenode daemons, the Java property must be set in +# the appropriate _OPTS if one wants something other than INFO,NullAppender +# Java property: hdfs.audit.logger +# export HDFS_AUDIT_LOGGER=INFO,NullAppender + +# Specify the JVM options to be used when starting the NameNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# a) Set JMX options +# export HDFS_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026" +# +# b) Set garbage collection logs +# export HDFS_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" +# +# c) ... or set them directly +# export HDFS_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" + +# this is the default: +# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" + +### +# SecondaryNameNode specific parameters +### +# Specify the JVM options to be used when starting the SecondaryNameNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# This is the default: +# export HDFS_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" + +### +# DataNode specific parameters +### +# Specify the JVM options to be used when starting the DataNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# This is the default: +# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS" + +# On secure datanodes, user to run the datanode as after dropping privileges. +# This **MUST** be uncommented to enable secure HDFS if using privileged ports +# to provide authentication of data transfer protocol. This **MUST NOT** be +# defined if SASL is configured for authentication of data transfer protocol +# using non-privileged ports. +# This will replace the hadoop.id.str Java property in secure mode. +# export HDFS_DATANODE_SECURE_USER=hdfs + +# Supplemental options for secure datanodes +# By default, Hadoop uses jsvc which needs to know to launch a +# server jvm. +# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server" + +### +# NFS3 Gateway specific parameters +### +# Specify the JVM options to be used when starting the NFS3 Gateway. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_NFS3_OPTS="" + +# Specify the JVM options to be used when starting the Hadoop portmapper. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_PORTMAP_OPTS="-Xmx512m" + +# Supplemental options for priviliged gateways +# By default, Hadoop uses jsvc which needs to know to launch a +# server jvm. +# export HDFS_NFS3_SECURE_EXTRA_OPTS="-jvm server" + +# On privileged gateways, user to run the gateway as after dropping privileges +# This will replace the hadoop.id.str Java property in secure mode. +# export HDFS_NFS3_SECURE_USER=nfsserver + +### +# ZKFailoverController specific parameters +### +# Specify the JVM options to be used when starting the ZKFailoverController. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_ZKFC_OPTS="" + +### +# QuorumJournalNode specific parameters +### +# Specify the JVM options to be used when starting the QuorumJournalNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_JOURNALNODE_OPTS="" + +### +# HDFS Balancer specific parameters +### +# Specify the JVM options to be used when starting the HDFS Balancer. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_BALANCER_OPTS="" + +### +# HDFS Mover specific parameters +### +# Specify the JVM options to be used when starting the HDFS Mover. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_MOVER_OPTS="" + +### +# Router-based HDFS Federation specific parameters +# Specify the JVM options to be used when starting the RBF Routers. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_DFSROUTER_OPTS="" +### + +### +# Advanced Users Only! +### + +# +# When building Hadoop, one can add the class paths to the commands +# via this special env var: +# export HADOOP_ENABLE_BUILD_PATHS="true" + +# +# To prevent accidents, shell commands be (superficially) locked +# to only allow certain users to execute certain subcommands. +# It uses the format of (command)_(subcommand)_USER. +# +# For example, to limit who can execute the namenode command, +# export HDFS_NAMENODE_USER=hdfs diff --git a/docker/test/integration/hive_server/hdfs-site.xml b/docker/test/integration/hive_server/hdfs-site.xml new file mode 100644 index 00000000000..82c525ea414 --- /dev/null +++ b/docker/test/integration/hive_server/hdfs-site.xml @@ -0,0 +1,6 @@ + + + dfs.replication + 1 + + diff --git a/docker/test/integration/hive_server/hive-site.xml b/docker/test/integration/hive_server/hive-site.xml new file mode 100644 index 00000000000..ec1735ea16f --- /dev/null +++ b/docker/test/integration/hive_server/hive-site.xml @@ -0,0 +1,35 @@ + + + + + javax.jdo.option.ConnectionURL + jdbc:mysql://localhost/hcatalog?createDatabaseIfNotExist=true + + + javax.jdo.option.ConnectionUserName + test + + + javax.jdo.option.ConnectionPassword + test + + + javax.jdo.option.ConnectionDriverName + com.mysql.jdbc.Driver + + diff --git a/docker/test/integration/hive_server/mapred-site.xml b/docker/test/integration/hive_server/mapred-site.xml new file mode 100644 index 00000000000..dba582f1c31 --- /dev/null +++ b/docker/test/integration/hive_server/mapred-site.xml @@ -0,0 +1,6 @@ + + + mapreduce.framework.name + yarn + + diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh new file mode 100755 index 00000000000..afecbb91c5d --- /dev/null +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -0,0 +1,6 @@ +#!/bin/bash +hive -e "create database test" + +hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " +hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " + hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" diff --git a/docker/test/integration/hive_server/start.sh b/docker/test/integration/hive_server/start.sh new file mode 100755 index 00000000000..e01f28542af --- /dev/null +++ b/docker/test/integration/hive_server/start.sh @@ -0,0 +1,12 @@ +service ssh start +sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml +hadoop namenode -format +start-all.sh +service mysql start +mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\"" +mysql -u root -e "GRANT ALL ON * . * TO 'test'@'localhost'" +schematool -initSchema -dbType mysql +#nohup hiveserver2 & +nohup hive --service metastore & +bash /prepare_hive_data.sh +while true; do sleep 1000; done diff --git a/docker/test/integration/hive_server/yarn-site.xml b/docker/test/integration/hive_server/yarn-site.xml new file mode 100644 index 00000000000..bd5f694e6a6 --- /dev/null +++ b/docker/test/integration/hive_server/yarn-site.xml @@ -0,0 +1,32 @@ + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + + yarn.application.classpath + /hadoop-3.1.0/etc/hadoop,/hadoop-3.1.0/share/hadoop/common/*,/hadoop-3.1.0/share/hadoop/common/lib/*,/hadoop-3.1.0/share/hadoop/hdfs/*, /hadoop-3.1.0/share/hadoop/hdfs/lib/*, /hadoop-3.1.0/share/hadoop/mapreduce/*, /hadoop-3.1.0/share/hadoop/mapreduce/lib/*, /hadoop-3.1.0/share/hadoop/yarn/*, /hadoop-3.1.0/share/hadoop/yarn/lib/* + + + + + Number of seconds after an application finishes before the nodemanager's + DeletionService will delete the application's localized file directory + and log directory. + + To diagnose Yarn application problems, set this property's value large + enough (for example, to 600 = 10 minutes) to permit examination of these + directories. After changing the property's value, you must restart the + nodemanager in order for it to have an effect. + + The roots of Yarn applications' work directories is configurable with + the yarn.nodemanager.local-dirs property (see below), and the roots + of the Yarn applications' log directories is configurable with the + yarn.nodemanager.log-dirs property (see also below). + + yarn.nodemanager.delete.debug-delay-sec + 600 + + + diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 6a40fea7500..1aad2ae6770 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -45,7 +45,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - -RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" +RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ @@ -58,7 +58,9 @@ RUN apt-get update \ RUN dockerd --version; docker --version -RUN python3 -m pip install \ +ARG TARGETARCH +# FIXME: psycopg2-binary is not available for aarch64, we skip it for now +RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \ PyMySQL \ aerospike==4.0.0 \ avro==1.10.2 \ @@ -88,7 +90,7 @@ RUN python3 -m pip install \ urllib3 \ requests-kerberos \ pyhdfs \ - azure-storage-blob + azure-storage-blob ) COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ @@ -102,8 +104,6 @@ RUN set -x \ && echo 'dockremap:165536:65536' >> /etc/subuid \ && echo 'dockremap:165536:65536' >> /etc/subgid -RUN echo '127.0.0.1 localhost test.com' >> /etc/hosts - EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] CMD ["sh", "-c", "pytest $PYTEST_OPTS"] diff --git a/docker/test/integration/runner/compose/docker_compose_hive.yml b/docker/test/integration/runner/compose/docker_compose_hive.yml new file mode 100644 index 00000000000..44f23655d2a --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_hive.yml @@ -0,0 +1,7 @@ +version: '2.3' +services: + hdfs1: + image: lgboustc/hive_test:v1.0 + hostname: hivetest + restart: always + entrypoint: bash /start.sh diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml index 0bdd054420a..80190b0a293 100644 --- a/docker/test/integration/runner/compose/docker_compose_mongo.yml +++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml @@ -14,4 +14,4 @@ services: image: mongo:5.0 restart: always ports: - - "27018:27017" + - ${MONGO_NO_CRED_EXTERNAL_PORT}:${MONGO_NO_CRED_INTERNAL_PORT} diff --git a/docker/test/keeper-jepsen/Dockerfile b/docker/test/keeper-jepsen/Dockerfile index 5bb7f9433c2..a794e076ec0 100644 --- a/docker/test/keeper-jepsen/Dockerfile +++ b/docker/test/keeper-jepsen/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/keeper-jepsen-test . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG ENV DEBIAN_FRONTEND=noninteractive ENV CLOJURE_VERSION=1.10.3.814 diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index c236b3a51d1..f484feecfd0 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -1,8 +1,14 @@ +# rebuild in #33610 # docker build -t clickhouse/pvs-test . -FROM clickhouse/binary-builder +ARG FROM_TAG=latest +FROM clickhouse/binary-builder:$FROM_TAG -RUN apt-get update --yes \ +# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere +# We'll produce an empty image for arm64 +ARG TARGETARCH + +RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \ && apt-get install \ bash \ wget \ @@ -15,7 +21,7 @@ RUN apt-get update --yes \ libprotoc-dev \ libgrpc++-dev \ libc-ares-dev \ - --yes --no-install-recommends + --yes --no-install-recommends ) #RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add - #RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list @@ -27,7 +33,7 @@ RUN apt-get update --yes \ ENV PKG_VERSION="pvs-studio-latest" -RUN set -x \ +RUN test x$TARGETARCH = xarm64 || ( set -x \ && export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \ && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \ && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \ @@ -35,7 +41,7 @@ RUN set -x \ && wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \ && { debsig-verify ${PKG_VERSION}.deb \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ - && dpkg -i "${PKG_VERSION}.deb" + && dpkg -i "${PKG_VERSION}.deb" ) ENV CCACHE_DIR=/test_output/ccache diff --git a/docker/test/split_build_smoke_test/Dockerfile b/docker/test/split_build_smoke_test/Dockerfile index 3cc2f26a507..5f84eb42216 100644 --- a/docker/test/split_build_smoke_test/Dockerfile +++ b/docker/test/split_build_smoke_test/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/split-build-smoke-test . -FROM clickhouse/binary-builder +ARG FROM_TAG=latest +FROM clickhouse/binary-builder:$FROM_TAG COPY run.sh /run.sh COPY process_split_build_smoke_test_result.py / diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index b6e9fad600c..7c16e69a99b 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/stateful-test . -FROM clickhouse/stateless-test +ARG FROM_TAG=latest +FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 05d26924b15..9b7fde7d542 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -1,11 +1,10 @@ +# rebuild in #33610 # docker build -t clickhouse/stateless-test . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" -RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5 - RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ @@ -30,7 +29,7 @@ RUN apt-get update -y \ tree \ unixodbc \ wget \ - mysql-client=5.7* \ + mysql-client=8.0* \ postgresql-client \ sqlite3 @@ -49,10 +48,13 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 ENV MAX_RUN_TIME=0 +ARG TARGETARCH + # Download Minio-related binaries -RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \ +RUN arch=${TARGETARCH:-amd64} \ + && wget "https://dl.min.io/server/minio/release/linux-${arch}/minio" \ && chmod +x ./minio \ - && wget 'https://dl.min.io/client/mc/release/linux-amd64/mc' \ + && wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \ && chmod +x ./mc ENV MINIO_ROOT_USER="clickhouse" diff --git a/docker/test/stateless_pytest/Dockerfile b/docker/test/stateless_pytest/Dockerfile index c1e47523f6d..f692f8f39f0 100644 --- a/docker/test/stateless_pytest/Dockerfile +++ b/docker/test/stateless_pytest/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/stateless-pytest . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index e28d25c9485..4e0b6741061 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/stress-test . -FROM clickhouse/stateful-test +ARG FROM_TAG=latest +FROM clickhouse/stateful-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 2efb62689ff..4387d16ea7c 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -146,6 +146,7 @@ handle SIGUSR2 nostop noprint pass handle SIG$RTMIN nostop noprint pass info signals continue +gcore backtrace full info locals info registers @@ -263,3 +264,10 @@ done # Write check result into check_status.tsv clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv + +# Core dumps (see gcore) +# Default filename is 'core.PROCESS_ID' +for core in core.*; do + pigz $core + mv $core.gz /output/ +done diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 824af0de022..a68b52170e0 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,19 +1,41 @@ # docker build -t clickhouse/style-test . FROM ubuntu:20.04 +ARG ACT_VERSION=0.2.25 +ARG ACTIONLINT_VERSION=1.6.8 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ - shellcheck \ - libxml2-utils \ + curl \ git \ - python3-pip \ + libxml2-utils \ pylint \ + python3-pip \ + shellcheck \ yamllint \ && pip3 install codespell PyGithub boto3 unidiff dohq-artifactory +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + +# Get act and actionlint from releases +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) act_arch=x86_64 ;; \ + arm64) act_arch=$arch ;; \ + esac \ + && curl -o /tmp/act.tgz -L \ + "https://github.com/nektos/act/releases/download/v${ACT_VERSION}/act_Linux_${act_arch}.tar.gz" \ + && tar xf /tmp/act.tgz -C /usr/bin act \ + && rm /tmp/act.tgz \ + && curl -o /tmp/actiolint.zip -L \ + "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_${arch}.tar.gz" \ + && tar xf /tmp/actiolint.zip -C /usr/bin actionlint \ + && rm /tmp/actiolint.zip + + COPY run.sh / COPY process_style_check_result.py / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 61b1e0f05c5..b7e00c49e06 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -11,40 +11,7 @@ def process_result(result_folder): description = "" test_results = [] - style_log_path = '{}/style_output.txt'.format(result_folder) - if not os.path.exists(style_log_path): - logging.info("No style check log on path %s", style_log_path) - return "exception", "No style check log", [] - elif os.stat(style_log_path).st_size != 0: - description += "Style check failed. " - test_results.append(("Style check", "FAIL")) - status = "failure" # Disabled for now - else: - test_results.append(("Style check", "OK")) - - typos_log_path = '{}/typos_output.txt'.format(result_folder) - if not os.path.exists(style_log_path): - logging.info("No typos check log on path %s", style_log_path) - return "exception", "No typos check log", [] - elif os.stat(typos_log_path).st_size != 0: - description += "Typos check failed. " - test_results.append(("Typos check", "FAIL")) - status = "failure" - else: - test_results.append(("Typos check", "OK")) - - whitespaces_log_path = '{}/whitespaces_output.txt'.format(result_folder) - if not os.path.exists(style_log_path): - logging.info("No whitespaces check log on path %s", style_log_path) - return "exception", "No whitespaces check log", [] - elif os.stat(whitespaces_log_path).st_size != 0: - description += "Whitespaces check failed. " - test_results.append(("Whitespaces check", "FAIL")) - status = "failure" - else: - test_results.append(("Whitespaces check", "OK")) - - duplicate_log_path = '{}/duplicate_output.txt'.format(result_folder) + duplicate_log_path = "{}/duplicate_output.txt".format(result_folder) if not os.path.exists(duplicate_log_path): logging.info("No header duplicates check log on path %s", duplicate_log_path) return "exception", "No header duplicates check log", [] @@ -55,7 +22,7 @@ def process_result(result_folder): else: test_results.append(("Header duplicates check", "OK")) - shellcheck_log_path = '{}/shellcheck_output.txt'.format(result_folder) + shellcheck_log_path = "{}/shellcheck_output.txt".format(result_folder) if not os.path.exists(shellcheck_log_path): logging.info("No shellcheck log on path %s", shellcheck_log_path) return "exception", "No shellcheck log", [] @@ -66,6 +33,50 @@ def process_result(result_folder): else: test_results.append(("Shellcheck", "OK")) + style_log_path = "{}/style_output.txt".format(result_folder) + if not os.path.exists(style_log_path): + logging.info("No style check log on path %s", style_log_path) + return "exception", "No style check log", [] + elif os.stat(style_log_path).st_size != 0: + description += "Style check failed. " + test_results.append(("Style check", "FAIL")) + status = "failure" + else: + test_results.append(("Style check", "OK")) + + typos_log_path = "{}/typos_output.txt".format(result_folder) + if not os.path.exists(typos_log_path): + logging.info("No typos check log on path %s", typos_log_path) + return "exception", "No typos check log", [] + elif os.stat(typos_log_path).st_size != 0: + description += "Typos check failed. " + test_results.append(("Typos check", "FAIL")) + status = "failure" + else: + test_results.append(("Typos check", "OK")) + + whitespaces_log_path = "{}/whitespaces_output.txt".format(result_folder) + if not os.path.exists(whitespaces_log_path): + logging.info("No whitespaces check log on path %s", whitespaces_log_path) + return "exception", "No whitespaces check log", [] + elif os.stat(whitespaces_log_path).st_size != 0: + description += "Whitespaces check failed. " + test_results.append(("Whitespaces check", "FAIL")) + status = "failure" + else: + test_results.append(("Whitespaces check", "OK")) + + workflows_log_path = "{}/workflows_output.txt".format(result_folder) + if not os.path.exists(workflows_log_path): + logging.info("No workflows check log on path %s", style_log_path) + return "exception", "No workflows check log", [] + elif os.stat(whitespaces_log_path).st_size != 0: + description += "Workflows check failed. " + test_results.append(("Workflows check", "FAIL")) + status = "failure" + else: + test_results.append(("Workflows check", "OK")) + if not description: description += "Style check success" @@ -73,20 +84,22 @@ def process_result(result_folder): def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of style check") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of style check" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results = process_result(args.in_results_dir) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 0118e6df764..98bc0053ab9 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -3,9 +3,10 @@ # yaml check is not the best one cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv +./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt ./check-style -n |& tee /test_output/style_output.txt ./check-typos |& tee /test_output/typos_output.txt ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt -./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt +./check-workflows |& tee /test_output/workflows_output.txt ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/unit/Dockerfile b/docker/test/unit/Dockerfile index 20d67773363..b75bfb6661c 100644 --- a/docker/test/unit/Dockerfile +++ b/docker/test/unit/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/unit-test . -FROM clickhouse/stateless-test +ARG FROM_TAG=latest +FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get install gdb diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index d9894451528..d9827260acb 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -1,3 +1,4 @@ +# rebuild in #33610 # docker build -t clickhouse/test-util . FROM ubuntu:20.04 diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md index db9138fbbb7..f950cdcc6db 100644 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ b/docs/_includes/cmake_in_clickhouse_header.md @@ -6,8 +6,8 @@ Minimal ClickHouse build example: ```bash cmake .. \ - -DCMAKE_C_COMPILER=$(which clang-11) \ - -DCMAKE_CXX_COMPILER=$(which clang++-11) \ + -DCMAKE_C_COMPILER=$(which clang-13) \ + -DCMAKE_CXX_COMPILER=$(which clang++-13) \ -DCMAKE_BUILD_TYPE=Debug \ -DENABLE_CLICKHOUSE_ALL=OFF \ -DENABLE_CLICKHOUSE_SERVER=ON \ diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 44e68d645b7..b696c441374 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -175,7 +175,7 @@ When we are going to read something from a part in `MergeTree`, we look at `prim When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. That’s why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts. -`MergeTree` is not an LSM tree because it does not contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications. +`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges are executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form. diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index a110e63801c..19d157bcd7d 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -5,7 +5,7 @@ toc_title: Build on Mac OS X # How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x} -!!! info "You don't have to build ClickHouse yourself!" +!!! info "You don't have to build ClickHouse yourself" You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start). Follow `macOS (Intel)` or `macOS (Apple silicon)` installation instructions. diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 43f61201946..56793435fac 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -26,7 +26,7 @@ ENGINE = MaterializedPostgreSQL('host:port', 'database', 'user', 'password') [SE ## Example of Use {#example-of-use} ``` sql -CREATE DATABASE postgresql; +CREATE DATABASE postgres_db ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'); SHOW TABLES FROM postgres_db; diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md new file mode 100644 index 00000000000..b804b9c2279 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -0,0 +1,408 @@ +--- +toc_priority: 4 +toc_title: Hive +--- + +# Hive {#hive} + +The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below: + +- Text: only supports simple scalar column types except `binary` + +- ORC: support simple scalar columns types except `char`; only support complex types like `array` + +- Parquet: support all simple scalar columns types; only support complex types like `array` + +## Creating a Table {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [ALIAS expr1], + name2 [type2] [ALIAS expr2], + ... +) ENGINE = Hive('thrift://host:port', 'database', 'table'); +PARTITION BY expr +``` +See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query. + +The table structure can differ from the original Hive table structure: +- Column names should be the same as in the original Hive table, but you can use just some of these columns and in any order, also you can use some alias columns calculated from other columns. +- Column types should be the same from those in the original Hive table. +- Partition by expression should be consistent with the original Hive table, and columns in partition by expression should be in the table structure. + +**Engine Parameters** + +- `thrift://host:port` — Hive Metastore address + +- `database` — Remote database name. + +- `table` — Remote table name. + +## Usage Example {#usage-example} + +### How to Use Local Cache for HDFS Filesystem +We strongly advice you to enable local cache for remote filesystems. Benchmark shows that its almost 2x faster with cache. + +Before using cache, add it to `config.xml` +``` xml + + true + local_cache + 559096952 + 1048576 + +``` + +- enable: ClickHouse will maintain local cache for remote filesystem(HDFS) after startup if true. +- root_dir: Required. The root directory to store local cache files for remote filesystem. +- limit_size: Required. The maximum size(in bytes) of local cache files. +- bytes_read_before_flush: Control bytes before flush to local filesystem when downloading file from remote filesystem. The default value is 1MB. + +When ClickHouse is started up with local cache for remote filesystem enabled, users can still choose not to use cache with `settings use_local_cache_for_remote_fs = 0` in their query. `use_local_cache_for_remote_fs` is `false` in default. + +### Query Hive Table with ORC Input Format + +#### Create Table in Hive +``` text +hive > CREATE TABLE `test`.`test_orc`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_orc' + +OK +Time taken: 0.51 seconds + +hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_orc; +OK +1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.295 seconds, Fetched: 1 row(s) +``` + +#### Create Table in ClickHouse +Table in ClickHouse, retrieving data from the Hive table created above: +``` sql +CREATE TABLE test.test_orc +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_bool` Bool, + `f_binary` String, + `f_array_int` Array(Int32), + `f_array_string` Array(String), + `f_array_float` Array(Float32), + `f_array_array_int` Array(Array(Int32)), + `f_array_array_string` Array(Array(String)), + `f_array_array_float` Array(Array(Float32)), + `day` String +) +ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +PARTITION BY day + +``` + +``` sql +SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G +``` + +``` text +SELECT * +FROM test.test_orc +SETTINGS input_format_orc_allow_missing_columns = 1 + +Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-04 04:00:44 +f_date: 2021-12-03 +f_string: hello world +f_varchar: hello world +f_bool: true +f_binary: hello world +f_array_int: [1,2,3] +f_array_string: ['hello world','hello world'] +f_array_float: [1.1,1.2] +f_array_array_int: [[1,2],[3,4]] +f_array_array_string: [['a','b'],['c','d']] +f_array_array_float: [[1.11,2.22],[3.33,4.44]] +day: 2021-09-18 + + +1 rows in set. Elapsed: 0.078 sec. +``` + +### Query Hive Table with Parquet Input Format + +#### Create Table in Hive +``` text +hive > +CREATE TABLE `test`.`test_parquet`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_char` char(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_parquet' +OK +Time taken: 0.51 seconds + +hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_parquet; +OK +1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.766 seconds, Fetched: 1 row(s) +``` + +#### Create Table in ClickHouse +Table in ClickHouse, retrieving data from the Hive table created above: +``` sql +CREATE TABLE test.test_parquet +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_char` String, + `f_bool` Bool, + `f_binary` String, + `f_array_int` Array(Int32), + `f_array_string` Array(String), + `f_array_float` Array(Float32), + `f_array_array_int` Array(Array(Int32)), + `f_array_array_string` Array(Array(String)), + `f_array_array_float` Array(Array(Float32)), + `day` String +) +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet') +PARTITION BY day +``` + +``` sql +SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G +``` + +``` text +SELECT * +FROM test_parquet +SETTINGS input_format_parquet_allow_missing_columns = 1 + +Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-14 17:54:56 +f_date: 2021-12-14 +f_string: hello world +f_varchar: hello world +f_char: hello world +f_bool: true +f_binary: hello world +f_array_int: [1,2,3] +f_array_string: ['hello world','hello world'] +f_array_float: [1.1,1.2] +f_array_array_int: [[1,2],[3,4]] +f_array_array_string: [['a','b'],['c','d']] +f_array_array_float: [[1.11,2.22],[3.33,4.44]] +day: 2021-09-18 + +1 rows in set. Elapsed: 0.357 sec. +``` + +### Query Hive Table with Text Input Format +#### Create Table in Hive +``` text +hive > +CREATE TABLE `test`.`test_text`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_char` char(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_text' +Time taken: 0.1 seconds, Fetched: 34 row(s) + + +hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_text; +OK +1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.624 seconds, Fetched: 1 row(s) +``` + +#### Create Table in ClickHouse + +Table in ClickHouse, retrieving data from the Hive table created above: +``` sql +CREATE TABLE test.test_text +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_char` String, + `f_bool` Bool, + `day` String +) +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text') +PARTITION BY day +``` + +``` sql +SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G +``` + +``` text +SELECT * +FROM test.test_text +SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort' + +Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-14 18:11:17 +f_date: 2021-12-14 +f_string: hello world +f_varchar: hello world +f_char: hello world +f_bool: true +day: 2021-09-18 +``` diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index 743d25ad616..a06b4c78394 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -20,3 +20,4 @@ List of supported integrations: - [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) - [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) - [SQLite](../../../engines/table-engines/integrations/sqlite.md) +- [Hive](../../../engines/table-engines/integrations/hive.md) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 52876674475..475416ffb94 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -66,4 +66,14 @@ SELECT COUNT() FROM mongo_table; └─────────┘ ``` +You can also adjust connection timeout: + +``` sql +CREATE TABLE mongo_table +( + key UInt64, + data String +) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'connectTimeoutMS=100000'); +``` + [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mongodb/) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index c2660653907..b67e373be35 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -153,6 +153,7 @@ toc_title: Adopters | Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | | Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | | Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | +| Superwall | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) | | Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | | Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | @@ -178,7 +179,7 @@ toc_title: Adopters | Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | | Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) | | Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| Yandex Metrica | Web analytics | Macin product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) | +| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) | | | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) | | Yotascale | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) | | Your Analytics | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) | diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 350ca835187..fcfc675f9d7 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -36,7 +36,8 @@ Other common parameters are inherited from the ClickHouse server config (`listen Internal coordination settings are located in `.` section: - `operation_timeout_ms` — Timeout for a single client operation (ms) (default: 10000). -- `session_timeout_ms` — Timeout for client session (ms) (default: 30000). +- `min_session_timeout_ms` — Min timeout for client session (ms) (default: 10000). +- `session_timeout_ms` — Max timeout for client session (ms) (default: 100000). - `dead_session_check_period_ms` — How often ClickHouse Keeper check dead sessions and remove them (ms) (default: 500). - `heart_beat_interval_ms` — How often a ClickHouse Keeper leader will send heartbeats to followers (ms) (default: 500). - `election_timeout_lower_bound_ms` — If the follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election (default: 1000). diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index da84c1f6a89..f326762a610 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -51,9 +51,6 @@ With filtering by realm: ``` -!!! warning "Note" - You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication. - !!! warning "Note" `principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication. diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 2efe980a4cf..fcd78c4452f 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -740,74 +740,6 @@ Result: └───────┘ ``` -## h3DegsToRads {#h3degstorads} - -Converts degrees to radians. - -**Syntax** - -``` sql -h3DegsToRads(degrees) -``` - -**Parameter** - -- `degrees` — Input in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). - -**Returned values** - -- Radians. Type: [Float64](../../../sql-reference/data-types/float.md). - -**Example** - -Query: - -``` sql -SELECT h3DegsToRads(180.0) AS radians; -``` - -Result: - -``` text -┌───────────radians─┐ -│ 3.141592653589793 │ -└───────────────────┘ -``` - -## h3RadsToDegs {#h3radstodegs} - -Converts radians to degrees. - -**Syntax** - -``` sql -h3RadsToDegs(radians) -``` - -**Parameter** - -- `radians` — Input in radians. Type: [Float64](../../../sql-reference/data-types/float.md). - -**Returned values** - -- Degrees. Type: [Float64](../../../sql-reference/data-types/float.md). - -**Example** - -Query: - -``` sql -SELECT h3RadsToDegs(3.141592653589793) AS degrees; -``` - -Result: - -``` text -┌─degrees─┐ -│ 180 │ -└─────────┘ -``` - ## h3CellAreaM2 {#h3cellaream2} Returns the exact area of a specific cell in square meters corresponding to the given input H3 index. diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index c36f1b1c9cf..f8eefad7051 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -55,7 +55,11 @@ SELECT * FROM insert_select_testtable; └───┴───┴───┘ ``` -In this example, we see that the second inserted row has `a` and `c` columns filled by the passed values, and `b` filled with value by default. +In this example, we see that the second inserted row has `a` and `c` columns filled by the passed values, and `b` filled with value by default. It is also possible to use `DEFAULT` keyword to insert default values: + +``` sql +INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ; +``` If a list of columns does not include all existing columns, the rest of the columns are filled with: diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 4054f373cc1..30899cc2940 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -16,7 +16,7 @@ This query tries to initialize an unscheduled merge of data parts for tables. OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] ``` -The `OPTMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. +The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `2`) or on current replica (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `1`). diff --git a/docs/en/whats-new/changelog/2021.md b/docs/en/whats-new/changelog/2021.md new file mode 100644 index 00000000000..1c3ceca08a7 --- /dev/null +++ b/docs/en/whats-new/changelog/2021.md @@ -0,0 +1,2050 @@ +--- +toc_priority: 75 +toc_title: '2021' +--- + +### ClickHouse release v21.12, 2021-12-15 + +#### Backward Incompatible Change + +* *A fix for a feature that previously had unwanted behaviour.* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). +* *A slight change in behaviour of a new function.* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). +* *Setting rename.* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). +* *Further deprecation of already unused code.* This is relevant only for users of ClickHouse versions older than 20.6. A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). + +#### New Feature + +* Implemented more of the ZooKeeper Four Letter Words commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). Now `clickhouse-keeper` is feature complete. +* Support for `Bool` data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)). +* Support for `PARTITION BY` in File, URL, HDFS storages and with `INSERT INTO` table function. Closes [#30273](https://github.com/ClickHouse/ClickHouse/issues/30273). [#30690](https://github.com/ClickHouse/ClickHouse/pull/30690) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `CONSTRAINT ... ASSUME ...` (without checking during `INSERT`). Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization. Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later). Added ability to replace heavy columns with light columns if it's possible. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)). +* Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)). +* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)). +* *TLDR: Major improvements of completeness and consistency of text formats.* Refactor formats `TSV`, `TSVRaw`, `CSV` and `JSONCompactEachRow`, `JSONCompactStringsEachRow`, remove code duplication, add base interface for formats with `-WithNames` and `-WithNamesAndTypes` suffixes. Add formats `CSVWithNamesAndTypes`, `TSVRawWithNames`, `TSVRawWithNamesAndTypes`, `JSONCompactEachRowWIthNames`, `JSONCompactStringsEachRowWIthNames`, `RowBinaryWithNames`. Support parallel parsing for formats `TSVWithNamesAndTypes`, `TSVRaw(WithNames/WIthNamesAndTypes)`, `CSVWithNamesAndTypes`, `JSONCompactEachRow(WithNames/WIthNamesAndTypes)`, `JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes)`. Support columns mapping and types checking for `RowBinaryWithNamesAndTypes` format. Add setting `input_format_with_types_use_header` which specify if we should check that types written in `WIthNamesAndTypes` format matches with table structure. Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should not control `csv_empty_as_default`). Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields). Fix Nullable input/output in `TSVRaw` format, make this format fully compatible with inserting into TSV. Fix inserting NULLs in `LowCardinality(Nullable)` when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). Fix strings deserialization in `JSONStringsEachRow`/`JSONCompactStringsEachRow` formats (strings were parsed just until first '\n' or '\t'). Add ability to use `Raw` escaping rule in Template input format. Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. Fix bug with parallel parsing of `-WithNames` formats in case when setting `min_chunk_bytes_for_parallel_parsing` is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). Allow to print/parse names and types of colums in `CustomSeparated` input/output format. Add formats `CustomSeparatedWithNames/WithNamesAndTypes` similar to `TSVWithNames/WithNamesAndTypes`. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)). +* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)). +* Exposes all settings of the global thread pool in the configuration file. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)). +* Introduced window functions `exponentialTimeDecayedSum`, `exponentialTimeDecayedMax`, `exponentialTimeDecayedCount` and `exponentialTimeDecayedAvg` which are more effective than `exponentialMovingAverage` for bigger windows. Also more use-cases were covered. [#29799](https://github.com/ClickHouse/ClickHouse/pull/29799) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add option to compress logs before writing them to a file using LZ4. Closes [#23860](https://github.com/ClickHouse/ClickHouse/issues/23860). [#29219](https://github.com/ClickHouse/ClickHouse/pull/29219) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `JOIN ON 1 = 1` that have CROSS JOIN semantic. This closes [#25578](https://github.com/ClickHouse/ClickHouse/issues/25578). [#25894](https://github.com/ClickHouse/ClickHouse/pull/25894) ([Vladimir C](https://github.com/vdimir)). +* Add Map combinator for `Map` type. - Rename old `sum-, min-, max- Map` for mapped arrays to `sum-, min-, max- MappedArrays`. [#24539](https://github.com/ClickHouse/ClickHouse/pull/24539) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Experimental Feature + +* `WINDOW VIEW` to enable stream processing in ClickHouse. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)). +* Drop support for using Ordinary databases with `MaterializedMySQL`. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)). +* Implement the commands BACKUP and RESTORE for the Log family. This feature is under development. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Performance Improvement + +* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)). +* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)). +* Speed up count over nullable columns. [#31806](https://github.com/ClickHouse/ClickHouse/pull/31806) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up `avg` and `sumCount` aggregate functions. [#31694](https://github.com/ClickHouse/ClickHouse/pull/31694) ([Raúl Marín](https://github.com/Algunenano)). +* Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of syncing data to block device. This closes [#31181](https://github.com/ClickHouse/ClickHouse/issues/31181). [#31229](https://github.com/ClickHouse/ClickHouse/pull/31229) ([zhanglistar](https://github.com/zhanglistar)). +* Fixing query performance issue in `LiveView` tables. Fixes [#30831](https://github.com/ClickHouse/ClickHouse/issues/30831). [#31006](https://github.com/ClickHouse/ClickHouse/pull/31006) ([vzakaznikov](https://github.com/vzakaznikov)). +* Speed up query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional `rule_type` field). [#25122](https://github.com/ClickHouse/ClickHouse/pull/25122) ([Michail Safronov](https://github.com/msaf1980)). +* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)). +* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)). +* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)). +* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Improvement + +* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([tavplubix](https://github.com/tavplubix)). +* Allow versioning of aggregate function states. Now we can introduce backward compatible changes in serialization format of aggregate function states. Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support PostgreSQL style `ALTER MODIFY COLUMN` syntax. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)). +* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)). +* The `murmurHash3_128` and `sipHash128` functions now accept an arbitrary number of arguments. This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)). +* Support default expression for `HDFS` storage and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)). +* Improve the operation name of an opentelemetry span. [#32234](https://github.com/ClickHouse/ClickHouse/pull/32234) ([Frank Chen](https://github.com/FrankChen021)). +* Use `Content-Type: application/x-ndjson` (http://ndjson.org/) for output format `JSONEachRow`. [#32223](https://github.com/ClickHouse/ClickHouse/pull/32223) ([Dmitriy Dorofeev](https://github.com/deem0n)). +* Improve skipping unknown fields with quoted escaping rule in Template/CustomSeparated formats. Previously you could skip only quoted strings, now you can skip values with any type. [#32204](https://github.com/ClickHouse/ClickHouse/pull/32204) ([Kruglov Pavel](https://github.com/Avogar)). +* Now `clickhouse-keeper` refuses to start or apply configuration changes when they contain duplicated IDs or endpoints. Fixes [#31339](https://github.com/ClickHouse/ClickHouse/issues/31339). [#32121](https://github.com/ClickHouse/ClickHouse/pull/32121) ([alesapin](https://github.com/alesapin)). +* Set Content-Type in HTTP packets issued from URL engine. [#32113](https://github.com/ClickHouse/ClickHouse/pull/32113) ([Frank Chen](https://github.com/FrankChen021)). +* Return Content-Type as 'application/json' for `JSONEachRow` format if `output_format_json_array_of_rows` is enabled. [#32112](https://github.com/ClickHouse/ClickHouse/pull/32112) ([Frank Chen](https://github.com/FrankChen021)). +* Allow to parse `+` before `Float32`/`Float64` values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow a user configured `hdfs_replication` parameter for `DiskHDFS` and `StorageHDFS`. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)). +* Added ClickHouse `exception` and `exception_code` fields to opentelemetry span log. [#32040](https://github.com/ClickHouse/ClickHouse/pull/32040) ([Frank Chen](https://github.com/FrankChen021)). +* Improve opentelemetry span log duration - it was is zero at the query level if there is a query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)). +* Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Recreate `system.*_log` tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)). +* `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)). +* ClickHouse dictionary source: support predefined connections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to use predefined connections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)) (author: Amos Bird). +* Add key bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)). +* Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)). +* Better exception message when `users.xml` cannot be loaded due to bad password hash. This closes [#24126](https://github.com/ClickHouse/ClickHouse/issues/24126). [#31557](https://github.com/ClickHouse/ClickHouse/pull/31557) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use shard and replica name from `Replicated` database arguments when expanding macros in `ReplicatedMergeTree` arguments if these macros are not defined in config. Closes [#31471](https://github.com/ClickHouse/ClickHouse/issues/31471). [#31488](https://github.com/ClickHouse/ClickHouse/pull/31488) ([tavplubix](https://github.com/tavplubix)). +* Better analysis for `min/max/count` projection. Now, with enabled `allow_experimental_projection_optimization`, virtual `min/max/count` projection can be used together with columns from partition key. [#31474](https://github.com/ClickHouse/ClickHouse/pull/31474) ([Amos Bird](https://github.com/amosbird)). +* Add `--pager` support for `clickhouse-local`. [#31457](https://github.com/ClickHouse/ClickHouse/pull/31457) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting of the editor during interactive query edition (`waitpid()` returns -1 on `SIGWINCH` and `EDITOR` and `clickhouse-local`/`clickhouse-client` works concurrently). [#31456](https://github.com/ClickHouse/ClickHouse/pull/31456) ([Azat Khuzhin](https://github.com/azat)). +* Throw an exception if there is some garbage after field in `JSONCompactStrings(EachRow)` format. [#31455](https://github.com/ClickHouse/ClickHouse/pull/31455) ([Kruglov Pavel](https://github.com/Avogar)). +* Default value of `http_send_timeout` and `http_receive_timeout` settings changed from 1800 (30 minutes) to 180 (3 minutes). [#31450](https://github.com/ClickHouse/ClickHouse/pull/31450) ([tavplubix](https://github.com/tavplubix)). +* `MaterializedMySQL` now handles `CREATE TABLE ... LIKE ...` DDL queries. [#31410](https://github.com/ClickHouse/ClickHouse/pull/31410) ([Stig Bakken](https://github.com/stigsb)). +* Return artificial create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)). +* Previously progress was shown only for `numbers` table function. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Initial user's roles are used now to find row policies, see [#31080](https://github.com/ClickHouse/ClickHouse/issues/31080). [#31262](https://github.com/ClickHouse/ClickHouse/pull/31262) ([Vitaly Baranov](https://github.com/vitlibar)). +* If some obsolete setting is changed - show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([tavplubix](https://github.com/tavplubix)). +* Improved backoff for background cleanup tasks in `MergeTree`. Settings `merge_tree_clear_old_temporary_directories_interval_seconds` and `merge_tree_clear_old_parts_interval_seconds` moved from users settings to merge tree settings. [#31180](https://github.com/ClickHouse/ClickHouse/pull/31180) ([tavplubix](https://github.com/tavplubix)). +* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). This makes `--hardware_utilization` option in `clickhouse-client` usable. +* Enable multiline editing in clickhouse-client by default. This addresses [#31121](https://github.com/ClickHouse/ClickHouse/issues/31121) . [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123) ([Amos Bird](https://github.com/amosbird)). +* Function name normalization for `ALTER` queries. This helps avoid metadata mismatch between creating table with indices/projections and adding indices/projections via alter commands. This is a follow-up PR of https://github.com/ClickHouse/ClickHouse/pull/20174. Mark as improvements as there are no bug reports and the senario is somehow rare. [#31095](https://github.com/ClickHouse/ClickHouse/pull/31095) ([Amos Bird](https://github.com/amosbird)). +* Support `IF EXISTS` modifier for `RENAME DATABASE`/`TABLE`/`DICTIONARY` query. If this directive is used, one will not get an error if the DATABASE/TABLE/DICTIONARY to be renamed doesn't exist. [#31081](https://github.com/ClickHouse/ClickHouse/pull/31081) ([victorgao](https://github.com/kafka1991)). +* Cancel vertical merges when partition is dropped. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/25684 and https://github.com/ClickHouse/ClickHouse/pull/30996. [#31057](https://github.com/ClickHouse/ClickHouse/pull/31057) ([Amos Bird](https://github.com/amosbird)). +* The local session inside a Clickhouse dictionary source won't send its events to the session log anymore. This fixes a possible deadlock (tsan alert) on shutdown. Also this PR fixes flaky `test_dictionaries_dependency_xml/`. [#31013](https://github.com/ClickHouse/ClickHouse/pull/31013) ([Vitaly Baranov](https://github.com/vitlibar)). +* Less locking in ALTER command. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)). +* Fix `--verbose` option in clickhouse-local interactive mode and allow logging into file. [#30881](https://github.com/ClickHouse/ClickHouse/pull/30881) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `\l`, `\d`, `\c` commands in `clickhouse-client` like in MySQL and PostgreSQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)). +* For clickhouse-local or clickhouse-client: if there is `--interactive` option with `--query` or `--queries-file`, then first execute them like in non-interactive and then start interactive mode. [#30851](https://github.com/ClickHouse/ClickHouse/pull/30851) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error (if DROP fails during removing znodes from zookeeper). [#30826](https://github.com/ClickHouse/ClickHouse/pull/30826) ([Azat Khuzhin](https://github.com/azat)). +* Avro format works against Kafka. Setting `output_format_avro_rows_in_file` added. [#30351](https://github.com/ClickHouse/ClickHouse/pull/30351) ([Ilya Golshtein](https://github.com/ilejn)). +* Allow to specify one or any number of PostgreSQL schemas for one `MaterializedPostgreSQL` database. Closes [#28901](https://github.com/ClickHouse/ClickHouse/issues/28901). Closes [#29324](https://github.com/ClickHouse/ClickHouse/issues/29324). [#28933](https://github.com/ClickHouse/ClickHouse/pull/28933) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)). +* Implement function transform with Decimal arguments. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([李帅](https://github.com/loneylee)). +* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fixes + +* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). +* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). +* Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). +* Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). +* Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([tavplubix](https://github.com/tavplubix)). +* Server might fail to start with `Cannot attach 1 tables due to cyclic dependencies` error if `Dictionary` table looks at XML-dictionary with the same name, it's fixed. Fixes [#31315](https://github.com/ClickHouse/ClickHouse/issues/31315). [#32288](https://github.com/ClickHouse/ClickHouse/pull/32288) ([tavplubix](https://github.com/tavplubix)). +* Fix parsing error while NaN deserializing for `Nullable(Float)` for `Quoted` escaping rule. [#32190](https://github.com/ClickHouse/ClickHouse/pull/32190) ([Kruglov Pavel](https://github.com/Avogar)). +* XML dictionaries: identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([tavplubix](https://github.com/tavplubix)). +* Dictionaries: fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)). +* Fix CREATE TABLE of Join Storage in some obscure cases. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)). +* Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([tavplubix](https://github.com/tavplubix)). +* `MaterializedMySQL` (experimental feature): Fix misinterpretation of `DECIMAL` data from MySQL. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)). +* `FileLog` (experimental feature) engine unnesessary created meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). +* Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([tavplubix](https://github.com/tavplubix)). +* Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). +* Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler`. Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). +* Fix invalid cast of Nullable type when nullable primary key is used. (Nullable primary key is a discouraged feature - please do not use). This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Fix crash in recursive UDF in SQL. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash with empty result of ODBC query (with some ODBC drivers). Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix disabling query profiler (In case of `query_profiler_real_time_period_ns>0`/`query_profiler_cpu_time_period_ns>0` query profiler can stayed enabled even after query finished). [#31740](https://github.com/ClickHouse/ClickHouse/pull/31740) ([Azat Khuzhin](https://github.com/azat)). +* Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([tavplubix](https://github.com/tavplubix)). +* Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([tavplubix](https://github.com/tavplubix)). +* Fix exception on some of the applications of `decrypt` function on Nullable columns. This closes [#31662](https://github.com/ClickHouse/ClickHouse/issues/31662). This closes [#31426](https://github.com/ClickHouse/ClickHouse/issues/31426). [#31707](https://github.com/ClickHouse/ClickHouse/pull/31707) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed function ngrams when string contains UTF-8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). +* Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([tavplubix](https://github.com/tavplubix)). +* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([tavplubix](https://github.com/tavplubix)). +* Implement `sparkbar` aggregate function as it was intended, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). +* Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Fix progress for short `INSERT SELECT` queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong behavior with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Remove `notLike` function from index analysis, because it was wrong. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Rewrite right distributed table in local join. solves [#25809](https://github.com/ClickHouse/ClickHouse/issues/25809). [#31105](https://github.com/ClickHouse/ClickHouse/pull/31105) ([abel-cheng](https://github.com/abel-cheng)). +* Fix `Merge` table with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix JSON_VALUE/JSON_QUERY with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Using `formatRow` function with not row-oriented formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skip `max_partition_size_to_drop check` in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Fix some corner cases with `INTERSECT` and `EXCEPT` operators. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement + +* Fix incorrect filtering result on non-x86 builds. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([alexey-milovidov](https://github.com/alexey-milovidov)). Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Use our own CMakeLists for `zlib-ng`, `cassandra`, `mariadb-connector-c` and `xz`, `re2`, `sentry`, `gsasl`, `arrow`, `protobuf`. This is needed for [#20151](https://github.com/ClickHouse/ClickHouse/issues/20151). Part of [#9226](https://github.com/ClickHouse/ClickHouse/issues/9226). A small step towards removal of annoying trash from the build system. [#30599](https://github.com/ClickHouse/ClickHouse/pull/30599) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Hermetic builds: use fixed version of libc and make sure that no source or binary files from the host OS are using during build. This closes [#27133](https://github.com/ClickHouse/ClickHouse/issues/27133). This closes [#21435](https://github.com/ClickHouse/ClickHouse/issues/21435). This closes [#30462](https://github.com/ClickHouse/ClickHouse/issues/30462). [#30011](https://github.com/ClickHouse/ClickHouse/pull/30011) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)). +* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)). +* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)). +* Support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)). + + +### ClickHouse release v21.11, 2021-11-09 + +#### Backward Incompatible Change + +* Change order of json_path and json arguments in SQL/JSON functions (to be consistent with the standard). Closes [#30449](https://github.com/ClickHouse/ClickHouse/issues/30449). [#30474](https://github.com/ClickHouse/ClickHouse/pull/30474) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove `MergeTree` table setting `write_final_mark`. It will be always `true`. [#30455](https://github.com/ClickHouse/ClickHouse/pull/30455) ([Kseniia Sumarokova](https://github.com/kssenii)). No actions required, all tables are compatible with the new version. +* Function `bayesAB` is removed. Please help to return this function back, refreshed. This closes [#26233](https://github.com/ClickHouse/ClickHouse/issues/26233). [#29934](https://github.com/ClickHouse/ClickHouse/pull/29934) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* This is relevant only if you already started using the experimental `clickhouse-keeper` support. Now ClickHouse Keeper snapshots compressed with `ZSTD` codec by default instead of custom ClickHouse LZ4 block compression. This behavior can be turned off with `compress_snapshots_with_zstd_format` coordination setting (must be equal on all quorum replicas). Backward incompatibility is quite rare and may happen only when new node will send snapshot (happens in case of recovery) to the old node which is unable to read snapshots in ZSTD format. [#29417](https://github.com/ClickHouse/ClickHouse/pull/29417) ([alesapin](https://github.com/alesapin)). + +#### New Feature + +* New asynchronous INSERT mode allows to accumulate inserted data and store it in a single batch in background. On client it can be enabled by setting `async_insert` for `INSERT` queries with data inlined in query or in separate buffer (e.g. for `INSERT` queries via HTTP protocol). If `wait_for_async_insert` is true (by default) the client will wait until data will be flushed to table. On server-side it controlled by the settings `async_insert_threads`, `async_insert_max_data_size` and `async_insert_busy_timeout_ms`. Implements [#18282](https://github.com/ClickHouse/ClickHouse/issues/18282). [#27537](https://github.com/ClickHouse/ClickHouse/pull/27537) ([Anton Popov](https://github.com/CurtizJ)). [#20557](https://github.com/ClickHouse/ClickHouse/pull/20557) ([Ivan](https://github.com/abyss7)). Notes on performance: with asynchronous inserts you can do up to around 10 000 individual INSERT queries per second, so it is still recommended to insert in batches if you want to achieve performance up to millions inserted rows per second. +* Add interactive mode for `clickhouse-local`. So, you can just run `clickhouse-local` to get a command line ClickHouse interface without connecting to a server and process data from files and external data sources. Also merge the code of `clickhouse-client` and `clickhouse-local` together. Closes [#7203](https://github.com/ClickHouse/ClickHouse/issues/7203). Closes [#25516](https://github.com/ClickHouse/ClickHouse/issues/25516). Closes [#22401](https://github.com/ClickHouse/ClickHouse/issues/22401). [#26231](https://github.com/ClickHouse/ClickHouse/pull/26231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added support for executable (scriptable) user defined functions. These are UDFs that can be written in any programming language. [#28803](https://github.com/ClickHouse/ClickHouse/pull/28803) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow predefined connections to external data sources. This allows to avoid specifying credentials or addresses while using external data sources, they can be referenced by names instead. Closes [#28367](https://github.com/ClickHouse/ClickHouse/issues/28367). [#28577](https://github.com/ClickHouse/ClickHouse/pull/28577) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `INFORMATION_SCHEMA` database with `SCHEMATA`, `TABLES`, `VIEWS` and `COLUMNS` views to the corresponding tables in `system` database. Closes [#9770](https://github.com/ClickHouse/ClickHouse/issues/9770). [#28691](https://github.com/ClickHouse/ClickHouse/pull/28691) ([tavplubix](https://github.com/tavplubix)). +* Support `EXISTS (subquery)`. Closes [#6852](https://github.com/ClickHouse/ClickHouse/issues/6852). [#29731](https://github.com/ClickHouse/ClickHouse/pull/29731) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Session logging for audit. Logging all successful and failed login and logout events to a new `system.session_log` table. [#22415](https://github.com/ClickHouse/ClickHouse/pull/22415) ([Vasily Nemkov](https://github.com/Enmk)) ([Vitaly Baranov](https://github.com/vitlibar)). +* Support multidimensional cosine distance and euclidean distance functions; L1, L2, Lp, Linf distances and norms. Scalar product on tuples and various arithmetic operators on tuples. This fully closes [#4509](https://github.com/ClickHouse/ClickHouse/issues/4509) and even more. [#27933](https://github.com/ClickHouse/ClickHouse/pull/27933) ([Alexey Boykov](https://github.com/mathalex)). +* Add support for compression and decompression for `INTO OUTFILE` and `FROM INFILE` (with autodetect or with additional optional parameter). [#27135](https://github.com/ClickHouse/ClickHouse/pull/27135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add CORS (Cross Origin Resource Sharing) support with HTTP `OPTIONS` request. It means, now Grafana will work with serverless requests without a kludges. Closes [#18693](https://github.com/ClickHouse/ClickHouse/issues/18693). [#29155](https://github.com/ClickHouse/ClickHouse/pull/29155) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Queries with JOIN ON now supports disjunctions (OR). [#21320](https://github.com/ClickHouse/ClickHouse/pull/21320) ([Ilya Golshtein](https://github.com/ilejn)). +* Added function `tokens`. That allow to split string into tokens using non-alpha numeric ASCII characters as separators. [#29981](https://github.com/ClickHouse/ClickHouse/pull/29981) ([Maksim Kita](https://github.com/kitaisreal)). Added function `ngrams` to extract ngrams from text. Closes [#29699](https://github.com/ClickHouse/ClickHouse/issues/29699). [#29738](https://github.com/ClickHouse/ClickHouse/pull/29738) ([Maksim Kita](https://github.com/kitaisreal)). +* Add functions for Unicode normalization: `normalizeUTF8NFC`, `normalizeUTF8NFD`, `normalizeUTF8NFKC`, `normalizeUTF8NFKD` functions. [#28633](https://github.com/ClickHouse/ClickHouse/pull/28633) ([darkkeks](https://github.com/darkkeks)). +* Streaming consumption of application log files in ClickHouse with `FileLog` table engine. It's like `Kafka` or `RabbitMQ` engine but for append-only and rotated logs in local filesystem. Closes [#6953](https://github.com/ClickHouse/ClickHouse/issues/6953). [#25969](https://github.com/ClickHouse/ClickHouse/pull/25969) ([flynn](https://github.com/ucasfl)) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `CapnProto` output format, refactor `CapnProto` input format. [#29291](https://github.com/ClickHouse/ClickHouse/pull/29291) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to write number in query as binary literal. Example `SELECT 0b001;`. [#29304](https://github.com/ClickHouse/ClickHouse/pull/29304) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `hashed_array` dictionary type. It saves memory when using dictionaries with multiple attributes. Closes [#30236](https://github.com/ClickHouse/ClickHouse/issues/30236). [#30242](https://github.com/ClickHouse/ClickHouse/pull/30242) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `JSONExtractKeys` function. [#30056](https://github.com/ClickHouse/ClickHouse/pull/30056) ([Vitaly](https://github.com/orloffv)). +* Add a function `getOSKernelVersion` - it returns a string with OS kernel version. [#29755](https://github.com/ClickHouse/ClickHouse/pull/29755) ([Memo](https://github.com/Joeywzr)). +* Added `MD4` and `SHA384` functions. MD4 is an obsolete and insecure hash function, it can be used only in rare cases when MD4 is already being used in some legacy system and you need to get exactly the same result. [#29602](https://github.com/ClickHouse/ClickHouse/pull/29602) ([Nikita Tikhomirov](https://github.com/NSTikhomirov)). +* HSTS can be enabled for Clickhouse HTTP server by setting `hsts_max_age` in configuration file with a positive number. [#29516](https://github.com/ClickHouse/ClickHouse/pull/29516) ([凌涛](https://github.com/lingtaolf)). +* Huawei OBS Storage support. Closes [#24294](https://github.com/ClickHouse/ClickHouse/issues/24294). [#29511](https://github.com/ClickHouse/ClickHouse/pull/29511) ([kevin wan](https://github.com/MaxWk)). +* New function `mapContainsKeyLike` to get the map that key matches a simple regular expression. [#29471](https://github.com/ClickHouse/ClickHouse/pull/29471) ([凌涛](https://github.com/lingtaolf)). New function `mapExtractKeyLike` to get the map only kept elements matched specified pattern. [#30793](https://github.com/ClickHouse/ClickHouse/pull/30793) ([凌涛](https://github.com/lingtaolf)). +* Implemented `ALTER TABLE x MODIFY COMMENT`. [#29264](https://github.com/ClickHouse/ClickHouse/pull/29264) ([Vasily Nemkov](https://github.com/Enmk)). +* Adds H3 inspection functions that are missing from ClickHouse but are available via the H3 api: https://h3geo.org/docs/api/inspection. [#29209](https://github.com/ClickHouse/ClickHouse/pull/29209) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow non-replicated ALTER TABLE FETCH and ATTACH in Replicated databases. [#29202](https://github.com/ClickHouse/ClickHouse/pull/29202) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Added a setting `output_format_csv_null_representation`: This is the same as `output_format_tsv_null_representation` but is for CSV output. [#29123](https://github.com/ClickHouse/ClickHouse/pull/29123) ([PHO](https://github.com/depressed-pho)). +* Added function `zookeeperSessionUptime()` which returns uptime of current ZooKeeper session in seconds. [#28983](https://github.com/ClickHouse/ClickHouse/pull/28983) ([tavplubix](https://github.com/tavplubix)). +* Implements the `h3ToGeoBoundary` function. [#28952](https://github.com/ClickHouse/ClickHouse/pull/28952) ([Ivan Veselov](https://github.com/fuzzERot)). +* Add aggregate function `exponentialMovingAverage` that can be used as window function. This closes [#27511](https://github.com/ClickHouse/ClickHouse/issues/27511). [#28914](https://github.com/ClickHouse/ClickHouse/pull/28914) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to include subcolumns of table columns into `DESCRIBE` query result (can be enabled by setting `describe_include_subcolumns`). [#28905](https://github.com/ClickHouse/ClickHouse/pull/28905) ([Anton Popov](https://github.com/CurtizJ)). +* `Executable`, `ExecutablePool` added option `send_chunk_header`. If this option is true then chunk rows_count with line break will be sent to client before chunk. [#28833](https://github.com/ClickHouse/ClickHouse/pull/28833) ([Maksim Kita](https://github.com/kitaisreal)). +* `tokenbf_v1` and `ngram` support Map with key of String of FixedSring type. It enhance data skipping in query with map key filter. ```sql CREATE TABLE map_tokenbf ( row_id UInt32, map Map(String, String), INDEX map_tokenbf map TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1 ) Engine=MergeTree() Order by id ``` With table above, the query `select * from map_tokebf where map['K']='V'` will skip the granule that doesn't contain key `A` . Of course, how many rows will skipped is depended on the `granularity` and `index_granularity` you set. [#28511](https://github.com/ClickHouse/ClickHouse/pull/28511) ([凌涛](https://github.com/lingtaolf)). +* Send profile events from server to client. New packet type `ProfileEvents` was introduced. Closes [#26177](https://github.com/ClickHouse/ClickHouse/issues/26177). [#28364](https://github.com/ClickHouse/ClickHouse/pull/28364) ([Dmitry Novik](https://github.com/novikd)). +* Bit shift operations for `FixedString` and `String` data types. This closes [#27763](https://github.com/ClickHouse/ClickHouse/issues/27763). [#28325](https://github.com/ClickHouse/ClickHouse/pull/28325) ([小路](https://github.com/nicelulu)). +* Support adding / deleting tables to replication from PostgreSQL dynamically in database engine MaterializedPostgreSQL. Support alter for database settings. Closes [#27573](https://github.com/ClickHouse/ClickHouse/issues/27573). [#28301](https://github.com/ClickHouse/ClickHouse/pull/28301) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added function accurateCastOrDefault(x, T). Closes [#21330](https://github.com/ClickHouse/ClickHouse/issues/21330). Authors @taiyang-li. [#23028](https://github.com/ClickHouse/ClickHouse/pull/23028) ([Maksim Kita](https://github.com/kitaisreal)). +* Add Function `toUUIDOrDefault`, `toUInt8/16/32/64/256OrDefault`, `toInt8/16/32/64/128/256OrDefault`, which enables user defining default value(not null) when string parsing is failed. [#21330](https://github.com/ClickHouse/ClickHouse/pull/21330) ([taiyang-li](https://github.com/taiyang-li)). + +#### Performance Improvement + +* Background merges can be preempted by each other and they are scheduled with appropriate priorities. Now long running merges won't prevent short merges to proceed. This is needed for a better scheduling and controlling of merges execution. It reduces the chances to get "too many parts" error. [#22381](https://github.com/ClickHouse/ClickHouse/issues/22381). [#25165](https://github.com/ClickHouse/ClickHouse/pull/25165) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Added an ability to execute more merges and mutations than the number of threads in background pool. Merges and mutations will be executed step by step according to their sizes (lower is more prioritized). The ratio of the number of tasks to threads to execute is controlled by a setting `background_merges_mutations_concurrency_ratio`, 2 by default. [#29140](https://github.com/ClickHouse/ClickHouse/pull/29140) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to use asynchronous reads for remote filesystems. Lower the number of seeks while reading from remote filesystems. It improves performance tremendously and makes the experimental `web` and `s3` disks to work faster than EBS under certain conditions. [#29205](https://github.com/ClickHouse/ClickHouse/pull/29205) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, the `web` disk type (static dataset hosted on a web server) is graduated from being experimental to be production ready. +* Queries with `INTO OUTFILE` in `clickhouse-client` will use multiple threads. Fix the issue with flickering progress-bar when using `INTO OUTFILE`. This closes [#30873](https://github.com/ClickHouse/ClickHouse/issues/30873). This closes [#30872](https://github.com/ClickHouse/ClickHouse/issues/30872). [#30886](https://github.com/ClickHouse/ClickHouse/pull/30886) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Reduce amount of redundant compressed data read from disk for some types `SELECT` queries (only for `MergeTree` engines family). [#30111](https://github.com/ClickHouse/ClickHouse/pull/30111) ([alesapin](https://github.com/alesapin)). +* Remove some redundant `seek` calls while reading compressed blocks in MergeTree table engines family. [#29766](https://github.com/ClickHouse/ClickHouse/pull/29766) ([alesapin](https://github.com/alesapin)). +* Make `url` table function to process multiple URLs in parallel. This closes [#29670](https://github.com/ClickHouse/ClickHouse/issues/29670) and closes [#29671](https://github.com/ClickHouse/ClickHouse/issues/29671). [#29673](https://github.com/ClickHouse/ClickHouse/pull/29673) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of aggregation in order of primary key (with enabled setting `optimize_aggregation_in_order`). [#30266](https://github.com/ClickHouse/ClickHouse/pull/30266) ([Anton Popov](https://github.com/CurtizJ)). +* Now clickhouse is using DNS cache while communicating with external S3. [#29999](https://github.com/ClickHouse/ClickHouse/pull/29999) ([alesapin](https://github.com/alesapin)). +* Add support for pushdown of `IS NULL`/`IS NOT NULL` to external databases (i.e. MySQL). [#29463](https://github.com/ClickHouse/ClickHouse/pull/29463) ([Azat Khuzhin](https://github.com/azat)). Transform `isNull`/`isNotNull` to `IS NULL`/`IS NOT NULL` (for external dbs, i.e. MySQL). [#29446](https://github.com/ClickHouse/ClickHouse/pull/29446) ([Azat Khuzhin](https://github.com/azat)). +* SELECT queries from Dictionary tables will use multiple threads. [#30500](https://github.com/ClickHouse/ClickHouse/pull/30500) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance for filtering (WHERE operation) of `Decimal` columns. [#30431](https://github.com/ClickHouse/ClickHouse/pull/30431) ([Jun Jin](https://github.com/vesslanjin)). +* Remove branchy code in filter operation with a better implementation with popcnt/ctz which have better performance. [#29881](https://github.com/ClickHouse/ClickHouse/pull/29881) ([Jun Jin](https://github.com/vesslanjin)). +* Improve filter bytemask generator (used for WHERE operator) function all in one with SSE/AVX2/AVX512 instructions. Note that by default ClickHouse is only using SSE, so it's only relevant for custom builds. [#30014](https://github.com/ClickHouse/ClickHouse/pull/30014) ([jasperzhu](https://github.com/jinjunzh)). [#30670](https://github.com/ClickHouse/ClickHouse/pull/30670) ([jasperzhu](https://github.com/jinjunzh)). +* Improve the performance of SUM aggregate function of Nullable floating point numbers. [#28906](https://github.com/ClickHouse/ClickHouse/pull/28906) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up part loading process with multiple disks are in use. The idea is similar to https://github.com/ClickHouse/ClickHouse/pull/16423 . Prod env shows improvement: 24 min -> 16 min . [#28363](https://github.com/ClickHouse/ClickHouse/pull/28363) ([Amos Bird](https://github.com/amosbird)). +* Reduce default settings for S3 multipart upload part size to lower memory usage. [#28679](https://github.com/ClickHouse/ClickHouse/pull/28679) ([ianton-ru](https://github.com/ianton-ru)). +* Speed up `bitmapAnd` function. [#28332](https://github.com/ClickHouse/ClickHouse/pull/28332) ([dddounaiking](https://github.com/OodounaikingoO)). +* Removed sub-optimal mutation notifications in `StorageMergeTree` when merges are still going. [#27552](https://github.com/ClickHouse/ClickHouse/pull/27552) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Attempt to improve performance of string comparison. [#28767](https://github.com/ClickHouse/ClickHouse/pull/28767) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Primary key index and partition filter can work in tuple. [#29281](https://github.com/ClickHouse/ClickHouse/pull/29281) ([凌涛](https://github.com/lingtaolf)). +* If query has multiple quantile aggregate functions with the same arguments but different level parameter, they will be fused together and executed in one pass if the setting `optimize_syntax_fuse_functions` is enabled. [#26657](https://github.com/ClickHouse/ClickHouse/pull/26657) ([hexiaoting](https://github.com/hexiaoting)). +* Now min-max aggregation over the first expression of primary key is optimized by projection. This is for [#329](https://github.com/ClickHouse/ClickHouse/issues/329). [#29918](https://github.com/ClickHouse/ClickHouse/pull/29918) ([Amos Bird](https://github.com/amosbird)). + +#### Experimental Feature + +* Add ability to change nodes configuration (in `.xml` file) for ClickHouse Keeper. [#30372](https://github.com/ClickHouse/ClickHouse/pull/30372) ([alesapin](https://github.com/alesapin)). +* Add `sparkbar` aggregate function. This closes [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175). [#27481](https://github.com/ClickHouse/ClickHouse/pull/27481) ([小路](https://github.com/nicelulu)). Note: there is one flaw in this function, the behaviour will be changed in future releases. + +#### Improvement + +* Allow user to change log levels without restart. [#29586](https://github.com/ClickHouse/ClickHouse/pull/29586) ([Nikolay Degterinsky](https://github.com/evillique)). +* Multiple improvements for SQL UDF. Queries for manipulation of SQL User Defined Functions now support ON CLUSTER clause. Example `CREATE FUNCTION test_function ON CLUSTER 'cluster' AS x -> x + 1;`. Closes [#30666](https://github.com/ClickHouse/ClickHouse/issues/30666). [#30734](https://github.com/ClickHouse/ClickHouse/pull/30734) ([Maksim Kita](https://github.com/kitaisreal)). Support `CREATE OR REPLACE`, `CREATE IF NOT EXISTS` syntaxes. [#30454](https://github.com/ClickHouse/ClickHouse/pull/30454) ([Maksim Kita](https://github.com/kitaisreal)). Added DROP IF EXISTS support. Example `DROP FUNCTION IF EXISTS test_function`. [#30437](https://github.com/ClickHouse/ClickHouse/pull/30437) ([Maksim Kita](https://github.com/kitaisreal)). Support lambdas. Example `CREATE FUNCTION lambda_function AS x -> arrayMap(element -> element * 2, x);`. [#30435](https://github.com/ClickHouse/ClickHouse/pull/30435) ([Maksim Kita](https://github.com/kitaisreal)). Support SQL user defined functions for `clickhouse-local`. [#30179](https://github.com/ClickHouse/ClickHouse/pull/30179) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable per-query memory profiler (set to `memory_profiler_step` = 4MiB) globally. [#29455](https://github.com/ClickHouse/ClickHouse/pull/29455) ([Azat Khuzhin](https://github.com/azat)). +* Added columns `data_compressed_bytes`, `data_uncompressed_bytes`, `marks_bytes` into `system.data_skipping_indices`. Added columns `secondary_indices_compressed_bytes`, `secondary_indices_uncompressed_bytes`, `secondary_indices_marks_bytes` into `system.parts`. Closes [#29697](https://github.com/ClickHouse/ClickHouse/issues/29697). [#29896](https://github.com/ClickHouse/ClickHouse/pull/29896) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `table` alias to system.tables and `database` alias to system.databases [#29677](https://github.com/ClickHouse/ClickHouse/issues/29677). [#29882](https://github.com/ClickHouse/ClickHouse/pull/29882) ([kevin wan](https://github.com/MaxWk)). +* Correctly resolve interdependencies between tables on server startup. Closes [#8004](https://github.com/ClickHouse/ClickHouse/issues/8004), closes [#15170](https://github.com/ClickHouse/ClickHouse/issues/15170). [#28373](https://github.com/ClickHouse/ClickHouse/pull/28373) ([tavplubix](https://github.com/tavplubix)). +* Avoid error "Division by zero" when denominator is Nullable in functions `divide`, `intDiv` and `modulo`. Closes [#22621](https://github.com/ClickHouse/ClickHouse/issues/22621). [#28352](https://github.com/ClickHouse/ClickHouse/pull/28352) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to parse values of `Date` data type in text formats as `YYYYMMDD` in addition to `YYYY-MM-DD`. This closes [#30870](https://github.com/ClickHouse/ClickHouse/issues/30870). [#30871](https://github.com/ClickHouse/ClickHouse/pull/30871) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Web UI: render bars in table cells. [#29792](https://github.com/ClickHouse/ClickHouse/pull/29792) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* User can now create dictionaries with comments: `CREATE DICTIONARY ... COMMENT 'vaue'` ... [#29899](https://github.com/ClickHouse/ClickHouse/pull/29899) ([Vasily Nemkov](https://github.com/Enmk)). Users now can set comments to database in `CREATE DATABASE` statement ... [#29429](https://github.com/ClickHouse/ClickHouse/pull/29429) ([Vasily Nemkov](https://github.com/Enmk)). +* Introduce `compiled_expression_cache_elements_size` setting. If you will ever want to use this setting, you will already know what it does. [#30667](https://github.com/ClickHouse/ClickHouse/pull/30667) ([Maksim Kita](https://github.com/kitaisreal)). +* clickhouse-format now supports option `--query`. In previous versions you have to pass the query to stdin. [#29325](https://github.com/ClickHouse/ClickHouse/pull/29325) ([凌涛](https://github.com/lingtaolf)). +* Support `ALTER TABLE` for tables in `Memory` databases. Memory databases are used in `clickhouse-local`. [#30866](https://github.com/ClickHouse/ClickHouse/pull/30866) ([tavplubix](https://github.com/tavplubix)). +* Arrays of all serializable types are now supported by `arrayStringConcat`. [#30840](https://github.com/ClickHouse/ClickHouse/pull/30840) ([Nickita Taranov](https://github.com/nickitat)). +* ClickHouse now will account docker/cgroups limitations to get system memory amount. See [#25662](https://github.com/ClickHouse/ClickHouse/issues/25662). [#30574](https://github.com/ClickHouse/ClickHouse/pull/30574) ([Pavel Medvedev](https://github.com/pmed)). +* Fetched table structure for PostgreSQL database is more reliable now. [#30477](https://github.com/ClickHouse/ClickHouse/pull/30477) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Full support of positional arguments in GROUP BY and ORDER BY. [#30433](https://github.com/ClickHouse/ClickHouse/pull/30433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow extracting non-string element as string using JSONExtractString. This is for [pull/25452#issuecomment-927123287](https://github.com/ClickHouse/ClickHouse/pull/25452#issuecomment-927123287). [#30426](https://github.com/ClickHouse/ClickHouse/pull/30426) ([Amos Bird](https://github.com/amosbird)). +* Added an ability to use FINAL clause in SELECT queries from `GraphiteMergeTree`. [#30360](https://github.com/ClickHouse/ClickHouse/pull/30360) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Minor improvements in replica cloning and enqueuing fetch for broken parts, that should avoid extremely rare hanging of `GET_PART` entries in replication queue. [#30346](https://github.com/ClickHouse/ClickHouse/pull/30346) ([tavplubix](https://github.com/tavplubix)). +* Allow symlinks to files in `user_files` directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed comparison of `Date32` with `Date`, `DateTime`, `DateTime64` and `String`. [#30219](https://github.com/ClickHouse/ClickHouse/pull/30219) ([liang.huang](https://github.com/lhuang09287750)). +* Allow to remove `SAMPLE BY` expression from `MergeTree` tables (`ALTER TABLE
REMOVE SAMPLE BY`). [#30180](https://github.com/ClickHouse/ClickHouse/pull/30180) ([Anton Popov](https://github.com/CurtizJ)). +* Now `Keeper` (as part of `clickhouse-server`) will start asynchronously if it can connect to some other node. [#30170](https://github.com/ClickHouse/ClickHouse/pull/30170) ([alesapin](https://github.com/alesapin)). +* Now `clickhouse-client` supports native multi-line editing. [#30143](https://github.com/ClickHouse/ClickHouse/pull/30143) ([Amos Bird](https://github.com/amosbird)). +* `polygon` dictionaries (reverse geocoding): added support for reading the dictionary content with SELECT query method if setting `store_polygon_key_column` = true. Closes [#30090](https://github.com/ClickHouse/ClickHouse/issues/30090). [#30142](https://github.com/ClickHouse/ClickHouse/pull/30142) ([Maksim Kita](https://github.com/kitaisreal)). +* Add ClickHouse logo to Play UI. [#29674](https://github.com/ClickHouse/ClickHouse/pull/29674) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better exception message while reading column from Arrow-supported formats like `Arrow`, `ArrowStream`, `Parquet` and `ORC`. This closes [#29926](https://github.com/ClickHouse/ClickHouse/issues/29926). [#29927](https://github.com/ClickHouse/ClickHouse/pull/29927) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix data-race between flush and startup in `Buffer` tables. This can appear in tests. [#29930](https://github.com/ClickHouse/ClickHouse/pull/29930) ([Azat Khuzhin](https://github.com/azat)). +* Fix `lock-order-inversion` between `DROP TABLE` for `DatabaseMemory` and `LiveView`. Live View is an experimental feature. Memory database is used in clickhouse-local. [#29929](https://github.com/ClickHouse/ClickHouse/pull/29929) ([Azat Khuzhin](https://github.com/azat)). +* Fix lock-order-inversion between periodic dictionary reload and config reload. [#29928](https://github.com/ClickHouse/ClickHouse/pull/29928) ([Azat Khuzhin](https://github.com/azat)). +* Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add ability to configure retries and delays between them for `clickhouse-copier`. [#29921](https://github.com/ClickHouse/ClickHouse/pull/29921) ([Azat Khuzhin](https://github.com/azat)). +* Add `shutdown_wait_unfinished_queries` server setting to allowing waiting for running queries up to `shutdown_wait_unfinished` time. This is for [#24451](https://github.com/ClickHouse/ClickHouse/issues/24451). [#29914](https://github.com/ClickHouse/ClickHouse/pull/29914) ([Amos Bird](https://github.com/amosbird)). +* Add ability to trace peak memory usage (with new trace_type in `system.trace_log` - `MemoryPeak`). [#29858](https://github.com/ClickHouse/ClickHouse/pull/29858) ([Azat Khuzhin](https://github.com/azat)). +* PostgreSQL foreign tables: Added partitioned table prefix 'p' for the query for fetching replica identity index. [#29828](https://github.com/ClickHouse/ClickHouse/pull/29828) ([Shoh Jahon](https://github.com/Shohjahon)). +* Apply `max_untracked_memory`/`memory_profiler_step`/`memory_profiler_sample_probability` during mutate/merge to profile memory usage during merges. [#29681](https://github.com/ClickHouse/ClickHouse/pull/29681) ([Azat Khuzhin](https://github.com/azat)). +* Query obfuscator: `clickhouse-format --obfuscate` now works with more types of queries. [#29672](https://github.com/ClickHouse/ClickHouse/pull/29672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the issue: `clickhouse-format --obfuscate` cannot process queries with embedded dictionaries (functions `regionTo...`). [#29667](https://github.com/ClickHouse/ClickHouse/pull/29667) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect Nullable processing of JSON functions. This fixes [#29615](https://github.com/ClickHouse/ClickHouse/issues/29615) . Mark as improvement because https://github.com/ClickHouse/ClickHouse/pull/28012 is not released. [#29659](https://github.com/ClickHouse/ClickHouse/pull/29659) ([Amos Bird](https://github.com/amosbird)). +* Increase `listen_backlog` by default (to match default in newer linux kernel). [#29643](https://github.com/ClickHouse/ClickHouse/pull/29643) ([Azat Khuzhin](https://github.com/azat)). +* Reload dictionaries, models, user defined executable functions if servers config `dictionaries_config`, `models_config`, `user_defined_executable_functions_config` changes. Closes [#28142](https://github.com/ClickHouse/ClickHouse/issues/28142). [#29529](https://github.com/ClickHouse/ClickHouse/pull/29529) ([Maksim Kita](https://github.com/kitaisreal)). +* Get rid of pointless restriction on projection name. Now projection name can start with `tmp_`. [#29520](https://github.com/ClickHouse/ClickHouse/pull/29520) ([Amos Bird](https://github.com/amosbird)). +* Fixed `There is no query or query context has expired` error in mutations with nested subqueries. Do not allow subqueries in mutation if table is replicated and `allow_nondeterministic_mutations` setting is disabled. [#29495](https://github.com/ClickHouse/ClickHouse/pull/29495) ([tavplubix](https://github.com/tavplubix)). +* Apply config changes to `max_concurrent_queries` during runtime (no need to restart). [#29414](https://github.com/ClickHouse/ClickHouse/pull/29414) ([Raúl Marín](https://github.com/Algunenano)). +* Added setting `use_skip_indexes`. [#29405](https://github.com/ClickHouse/ClickHouse/pull/29405) ([Maksim Kita](https://github.com/kitaisreal)). +* Add support for `FREEZE`ing in-memory parts (for backups). [#29376](https://github.com/ClickHouse/ClickHouse/pull/29376) ([Mo Xuan](https://github.com/mo-avatar)). +* Pass through initial query_id for `clickhouse-benchmark` (previously if you run remote query via `clickhouse-benchmark`, queries on shards will not be linked to the initial query via `initial_query_id`). [#29364](https://github.com/ClickHouse/ClickHouse/pull/29364) ([Azat Khuzhin](https://github.com/azat)). +* Skip indexes `tokenbf_v1` and `ngrambf_v1`: added support for `Array` data type with key of `String` of `FixedString` type. [#29280](https://github.com/ClickHouse/ClickHouse/pull/29280) ([Maksim Kita](https://github.com/kitaisreal)). Skip indexes `tokenbf_v1` and `ngrambf_v1` added support for `Map` data type with key of `String` of `FixedString` type. Author @lingtaolf. [#29220](https://github.com/ClickHouse/ClickHouse/pull/29220) ([Maksim Kita](https://github.com/kitaisreal)). +* Function `has`: added support for `Map` data type. [#29267](https://github.com/ClickHouse/ClickHouse/pull/29267) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `compress_logs` settings for clickhouse-keeper which allow to compress clickhouse-keeper logs (for replicated state machine) in `ZSTD` . Implements: [#26977](https://github.com/ClickHouse/ClickHouse/issues/26977). [#29223](https://github.com/ClickHouse/ClickHouse/pull/29223) ([alesapin](https://github.com/alesapin)). +* Add a setting `external_table_strict_query` - it will force passing the whole WHERE expression in queries to foreign databases even if it is incompatible. [#29206](https://github.com/ClickHouse/ClickHouse/pull/29206) ([Azat Khuzhin](https://github.com/azat)). +* Disable projections when `ARRAY JOIN` is used. In previous versions projection analysis may break aliases in array join. [#29139](https://github.com/ClickHouse/ClickHouse/pull/29139) ([Amos Bird](https://github.com/amosbird)). +* Support more types in `MsgPack` input/output format. [#29077](https://github.com/ClickHouse/ClickHouse/pull/29077) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to input and output `LowCardinality` columns in `ORC` input/output format. [#29062](https://github.com/ClickHouse/ClickHouse/pull/29062) ([Kruglov Pavel](https://github.com/Avogar)). +* Select from `system.distributed_ddl_queue` might show incorrect values, it's fixed. [#29061](https://github.com/ClickHouse/ClickHouse/pull/29061) ([tavplubix](https://github.com/tavplubix)). +* Correct behaviour with unknown methods for HTTP connection. Solves [#29050](https://github.com/ClickHouse/ClickHouse/issues/29050). [#29057](https://github.com/ClickHouse/ClickHouse/pull/29057) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* `clickhouse-keeper`: Fix bug in `clickhouse-keeper-converter` which can lead to some data loss while restoring from ZooKeeper logs (not snapshot). [#29030](https://github.com/ClickHouse/ClickHouse/pull/29030) ([小路](https://github.com/nicelulu)). Fix bug in `clickhouse-keeper-converter` which can lead to incorrect ZooKeeper log deserialization. [#29071](https://github.com/ClickHouse/ClickHouse/pull/29071) ([小路](https://github.com/nicelulu)). +* Apply settings from `CREATE ... AS SELECT` queries (fixes: [#28810](https://github.com/ClickHouse/ClickHouse/issues/28810)). [#28962](https://github.com/ClickHouse/ClickHouse/pull/28962) ([Azat Khuzhin](https://github.com/azat)). +* Respect default database setting for ALTER TABLE ... ON CLUSTER ... REPLACE/MOVE PARTITION FROM/TO ... [#28955](https://github.com/ClickHouse/ClickHouse/pull/28955) ([anneji-dev](https://github.com/anneji-dev)). +* gRPC protocol: Allow change server-side compression from client. [#28953](https://github.com/ClickHouse/ClickHouse/pull/28953) ([Vitaly Baranov](https://github.com/vitlibar)). +* Skip "no data" exception when reading thermal sensors for asynchronous metrics. This closes [#28852](https://github.com/ClickHouse/ClickHouse/issues/28852). [#28882](https://github.com/ClickHouse/ClickHouse/pull/28882) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed logical race condition that might cause `Dictionary not found` error for existing dictionary in rare cases. [#28853](https://github.com/ClickHouse/ClickHouse/pull/28853) ([tavplubix](https://github.com/tavplubix)). +* Relax nested function for If-combinator check (but forbid nested identical combinators). [#28828](https://github.com/ClickHouse/ClickHouse/pull/28828) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible uncaught exception during server termination. [#28761](https://github.com/ClickHouse/ClickHouse/pull/28761) ([Azat Khuzhin](https://github.com/azat)). +* Forbid cleaning of tmp directories that can be used by an active mutation/merge if mutation/merge is extraordinarily long. [#28760](https://github.com/ClickHouse/ClickHouse/pull/28760) ([Azat Khuzhin](https://github.com/azat)). +* Allow optimization `optimize_arithmetic_operations_in_aggregate_functions = 1` when alias is used. [#28746](https://github.com/ClickHouse/ClickHouse/pull/28746) ([Amos Bird](https://github.com/amosbird)). +* Implement `detach_not_byte_identical_parts` setting for `ReplicatedMergeTree`, that will detach instead of remove not byte-identical parts (after mege/mutate). [#28708](https://github.com/ClickHouse/ClickHouse/pull/28708) ([Azat Khuzhin](https://github.com/azat)). +* Implement `max_suspicious_broken_parts_bytes` setting for `MergeTree` (to limit total size of all broken parts, default is `1GiB`). [#28707](https://github.com/ClickHouse/ClickHouse/pull/28707) ([Azat Khuzhin](https://github.com/azat)). +* Enable expanding macros in `RabbitMQ` table settings. [#28683](https://github.com/ClickHouse/ClickHouse/pull/28683) ([Vitaly Baranov](https://github.com/vitlibar)). +* Restore the possibility to read data of a table using the `Log` engine in multiple threads. [#28125](https://github.com/ClickHouse/ClickHouse/pull/28125) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix misbehavior of NULL column handling in JSON functions. This fixes [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930). [#28012](https://github.com/ClickHouse/ClickHouse/pull/28012) ([Amos Bird](https://github.com/amosbird)). +* Allow to set the size of Mark/Uncompressed cache for skip indices separately from columns. [#27961](https://github.com/ClickHouse/ClickHouse/pull/27961) ([Amos Bird](https://github.com/amosbird)). +* Allow to mix JOIN with `USING` with other JOIN types. [#23881](https://github.com/ClickHouse/ClickHouse/pull/23881) ([darkkeks](https://github.com/darkkeks)). +* Update aws-sdk submodule for throttling in Yandex Cloud S3. [#30646](https://github.com/ClickHouse/ClickHouse/pull/30646) ([ianton-ru](https://github.com/ianton-ru)). +* Fix releasing query ID and session ID at the end of query processing while handing gRPC call. [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix shutdown of `AccessControlManager` to fix flaky test. [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix failed assertion in reading from `HDFS`. Update libhdfs3 library to be able to run in tests in debug. Closes [#29251](https://github.com/ClickHouse/ClickHouse/issues/29251). Closes [#27814](https://github.com/ClickHouse/ClickHouse/issues/27814). [#29276](https://github.com/ClickHouse/ClickHouse/pull/29276) ([Kseniia Sumarokova](https://github.com/kssenii)). + + +#### Build/Testing/Packaging Improvement + +* Add support for FreeBSD builds for Aarch64 machines. [#29952](https://github.com/ClickHouse/ClickHouse/pull/29952) ([MikaelUrankar](https://github.com/MikaelUrankar)). +* Recursive submodules are no longer needed for ClickHouse. [#30315](https://github.com/ClickHouse/ClickHouse/pull/30315) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* ClickHouse can be statically built with Musl. This is added as experiment, it does not support building `odbc-bridge`, `library-bridge`, integration with CatBoost and some libraries. [#30248](https://github.com/ClickHouse/ClickHouse/pull/30248) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Enable `Protobuf`, `Arrow`, `ORC`, `Parquet` for `AArch64` and `Darwin` (macOS) builds. This closes [#29248](https://github.com/ClickHouse/ClickHouse/issues/29248). This closes [#28018](https://github.com/ClickHouse/ClickHouse/issues/28018). [#30015](https://github.com/ClickHouse/ClickHouse/pull/30015) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add cross-build for PowerPC (powerpc64le). This closes [#9589](https://github.com/ClickHouse/ClickHouse/issues/9589). Enable support for interaction with MySQL for AArch64 and PowerPC. This closes [#26301](https://github.com/ClickHouse/ClickHouse/issues/26301). [#30010](https://github.com/ClickHouse/ClickHouse/pull/30010) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Leave only required files in cross-compile toolchains. Include them as submodules (earlier they were downloaded as tarballs). [#29974](https://github.com/ClickHouse/ClickHouse/pull/29974) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Implemented structure-aware fuzzing approach in ClickHouse for select statement parser. [#30012](https://github.com/ClickHouse/ClickHouse/pull/30012) ([Paul](https://github.com/PaulCher)). +* Turning on experimental constexpr expressions evaluator for clang to speed up template code compilation. [#29668](https://github.com/ClickHouse/ClickHouse/pull/29668) ([myrrc](https://github.com/myrrc)). +* Add ability to compile using newer version fo glibc without using new symbols. [#29594](https://github.com/ClickHouse/ClickHouse/pull/29594) ([Azat Khuzhin](https://github.com/azat)). +* Reduce Debug build binary size by clang optimization option. [#28736](https://github.com/ClickHouse/ClickHouse/pull/28736) ([flynn](https://github.com/ucasfl)). +* Now all images for CI will be placed in the separate dockerhub repo. [#28656](https://github.com/ClickHouse/ClickHouse/pull/28656) ([alesapin](https://github.com/alesapin)). +* Improve support for build with clang-13. [#28046](https://github.com/ClickHouse/ClickHouse/pull/28046) ([Sergei Semin](https://github.com/syominsergey)). +* Add ability to print raw profile events to `clickhouse-client` (This can be useful for debugging and for testing). [#30064](https://github.com/ClickHouse/ClickHouse/pull/30064) ([Azat Khuzhin](https://github.com/azat)). +* Add time dependency for clickhouse-server unit (systemd and sysvinit init). [#28891](https://github.com/ClickHouse/ClickHouse/pull/28891) ([Azat Khuzhin](https://github.com/azat)). +* Reload stacktrace cache when symbol is reloaded. [#28137](https://github.com/ClickHouse/ClickHouse/pull/28137) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix + +* Functions for case-insensitive search in UTF-8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match in very rare cases, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([tavplubix](https://github.com/tavplubix)). +* Fix reading from empty file on encrypted disk. [#30494](https://github.com/ClickHouse/ClickHouse/pull/30494) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). +* Allow using a materialized column as the sharding key in a distributed table even if `insert_allow_materialized_columns=0`:. [#28637](https://github.com/ClickHouse/ClickHouse/pull/28637) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). +* Fix set index not used in AND/OR expressions when there are more than two operands. This fixes [#30416](https://github.com/ClickHouse/ClickHouse/issues/30416) . [#30887](https://github.com/ClickHouse/ClickHouse/pull/30887) ([Amos Bird](https://github.com/amosbird)). +* Fix crash when projection with hashing function is materialized. This fixes [#30861](https://github.com/ClickHouse/ClickHouse/issues/30861) . The issue is similar to https://github.com/ClickHouse/ClickHouse/pull/28560 which is a lack of proper understanding of the invariant of header's emptyness. [#30877](https://github.com/ClickHouse/ClickHouse/pull/30877) ([Amos Bird](https://github.com/amosbird)). +* Fixed ambiguity when extracting auxiliary ZooKeeper name from ZooKeeper path in `ReplicatedMergeTree`. Previously server might fail to start with `Unknown auxiliary ZooKeeper name` if ZooKeeper path contains a colon. Fixes [#29052](https://github.com/ClickHouse/ClickHouse/issues/29052). Also it was allowed to specify ZooKeeper path that does not start with slash, but now it's deprecated and creation of new tables with such path is not allowed. Slashes and colons in auxiliary ZooKeeper names are not allowed too. [#30822](https://github.com/ClickHouse/ClickHouse/pull/30822) ([tavplubix](https://github.com/tavplubix)). +* Clean temporary directory when localBackup failed by some reason. [#30797](https://github.com/ClickHouse/ClickHouse/pull/30797) ([ianton-ru](https://github.com/ianton-ru)). +* Fixed a race condition between `REPLACE/MOVE PARTITION` and background merge in non-replicated `MergeTree` that might cause a part of moved/replaced data to remain in partition. Fixes [#29327](https://github.com/ClickHouse/ClickHouse/issues/29327). [#30717](https://github.com/ClickHouse/ClickHouse/pull/30717) ([tavplubix](https://github.com/tavplubix)). +* Fix PREWHERE with WHERE in case of always true PREWHERE. [#30668](https://github.com/ClickHouse/ClickHouse/pull/30668) ([Azat Khuzhin](https://github.com/azat)). +* Limit push down optimization could cause a error `Cannot find column`. Fixes [#30438](https://github.com/ClickHouse/ClickHouse/issues/30438). [#30562](https://github.com/ClickHouse/ClickHouse/pull/30562) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing parenthesis for `isNotNull`/`isNull` rewrites to `IS [NOT] NULL` (fixes queries that has something like `isNotNull(1)+isNotNull(2)`). [#30520](https://github.com/ClickHouse/ClickHouse/pull/30520) ([Azat Khuzhin](https://github.com/azat)). +* Fix deadlock on ALTER with scalar subquery to the same table, close [#30461](https://github.com/ClickHouse/ClickHouse/issues/30461). [#30492](https://github.com/ClickHouse/ClickHouse/pull/30492) ([Vladimir C](https://github.com/vdimir)). +* Fixed segfault which might happen if session expired during execution of REPLACE PARTITION. [#30432](https://github.com/ClickHouse/ClickHouse/pull/30432) ([tavplubix](https://github.com/tavplubix)). +* Queries with condition like `IN (subquery)` could return incorrect result in case if aggregate projection applied. Fixed creation of sets for projections. [#30310](https://github.com/ClickHouse/ClickHouse/pull/30310) ([Amos Bird](https://github.com/amosbird)). +* Fix column alias resolution of JOIN queries when projection is enabled. This fixes [#30146](https://github.com/ClickHouse/ClickHouse/issues/30146). [#30293](https://github.com/ClickHouse/ClickHouse/pull/30293) ([Amos Bird](https://github.com/amosbird)). +* Fix some deficiency in `replaceRegexpAll` function. [#30292](https://github.com/ClickHouse/ClickHouse/pull/30292) ([Memo](https://github.com/Joeywzr)). +* Fix ComplexKeyHashedDictionary, ComplexKeySparseHashedDictionary parsing `preallocate` option from layout config. [#30246](https://github.com/ClickHouse/ClickHouse/pull/30246) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `[I]LIKE` function. Closes [#28661](https://github.com/ClickHouse/ClickHouse/issues/28661). [#30244](https://github.com/ClickHouse/ClickHouse/pull/30244) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix crash with shortcircuit and lowcardinality in multiIf. [#30243](https://github.com/ClickHouse/ClickHouse/pull/30243) ([Raúl Marín](https://github.com/Algunenano)). +* FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes. [#30238](https://github.com/ClickHouse/ClickHouse/pull/30238) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow identifiers starting with numbers in multiple joins. [#30230](https://github.com/ClickHouse/ClickHouse/pull/30230) ([Vladimir C](https://github.com/vdimir)). +* Fix reading from `MergeTree` with `max_read_buffer_size = 0` (when the user wants to shoot himself in the foot) (can lead to exceptions `Can't adjust last granule`, `LOGICAL_ERROR`, or even data loss). [#30192](https://github.com/ClickHouse/ClickHouse/pull/30192) ([Azat Khuzhin](https://github.com/azat)). +* Fix `pread_fake_async`/`pread_threadpool` with `min_bytes_to_use_direct_io`. [#30191](https://github.com/ClickHouse/ClickHouse/pull/30191) ([Azat Khuzhin](https://github.com/azat)). +* Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column. [#30189](https://github.com/ClickHouse/ClickHouse/pull/30189) ([Azat Khuzhin](https://github.com/azat)). +* Support nullable arguments in function `initializeAggregation`. [#30177](https://github.com/ClickHouse/ClickHouse/pull/30177) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Port is already connected` for queries with `GLOBAL IN` and `WITH TOTALS`. Only for 21.9 and 21.10. [#30086](https://github.com/ClickHouse/ClickHouse/pull/30086) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race between MOVE PARTITION and merges/mutations for MergeTree. [#30074](https://github.com/ClickHouse/ClickHouse/pull/30074) ([Azat Khuzhin](https://github.com/azat)). +* Dropped `Memory` database might reappear after server restart, it's fixed ([#29795](https://github.com/ClickHouse/ClickHouse/issues/29795)). Also added `force_remove_data_recursively_on_drop` setting as a workaround for `Directory not empty` error when dropping `Ordinary` database (because it's not possible to remove data leftovers manually in cloud environment). [#30054](https://github.com/ClickHouse/ClickHouse/pull/30054) ([tavplubix](https://github.com/tavplubix)). +* Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). +* try to close issue: [#29965](https://github.com/ClickHouse/ClickHouse/issues/29965). [#29976](https://github.com/ClickHouse/ClickHouse/pull/29976) ([hexiaoting](https://github.com/hexiaoting)). +* Fix possible data-race between `FileChecker` and `StorageLog`/`StorageStripeLog`. [#29959](https://github.com/ClickHouse/ClickHouse/pull/29959) ([Azat Khuzhin](https://github.com/azat)). +* Fix data-race between `LogSink::writeMarks()` and `LogSource` in `StorageLog`. [#29946](https://github.com/ClickHouse/ClickHouse/pull/29946) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential resource leak of the concurrent query limit of merge tree tables introduced in https://github.com/ClickHouse/ClickHouse/pull/19544. [#29879](https://github.com/ClickHouse/ClickHouse/pull/29879) ([Amos Bird](https://github.com/amosbird)). +* Fix system tables recreation check (fails to detect changes in enum values). [#29857](https://github.com/ClickHouse/ClickHouse/pull/29857) ([Azat Khuzhin](https://github.com/azat)). +* MaterializedMySQL: Fix an issue where if the connection to MySQL was lost, only parts of a transaction could be processed. [#29837](https://github.com/ClickHouse/ClickHouse/pull/29837) ([Håvard Kvålen](https://github.com/havardk)). +* Avoid `Timeout exceeded: elapsed 18446744073.709553 seconds` error that might happen in extremely rare cases, presumably due to some bug in kernel. Fixes [#29154](https://github.com/ClickHouse/ClickHouse/issues/29154). [#29811](https://github.com/ClickHouse/ClickHouse/pull/29811) ([tavplubix](https://github.com/tavplubix)). +* Fix bad cast in `ATTACH TABLE ... FROM 'path'` query when non-string literal is used instead of path. It may lead to reading of uninitialized memory. [#29790](https://github.com/ClickHouse/ClickHouse/pull/29790) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix concurrent access to `LowCardinality` during `GROUP BY` (in combination with `Buffer` tables it may lead to troubles). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect `GROUP BY` (multiple rows with the same keys in result) in case of distributed query when shards had mixed versions `<= 21.3` and `>= 21.4`, `GROUP BY` key had several columns all with fixed size, and two-level aggregation was activated (see `group_by_two_level_threshold` and `group_by_two_level_threshold_bytes`). Fixes [#29580](https://github.com/ClickHouse/ClickHouse/issues/29580). [#29735](https://github.com/ClickHouse/ClickHouse/pull/29735) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed incorrect behaviour of setting `materialized_postgresql_tables_list` at server restart. Found in [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#29686](https://github.com/ClickHouse/ClickHouse/pull/29686) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix JIT expression compilation with aliases and short-circuit expression evaluation. Closes [#29403](https://github.com/ClickHouse/ClickHouse/issues/29403). [#29574](https://github.com/ClickHouse/ClickHouse/pull/29574) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix rare segfault in `ALTER MODIFY` query when using incorrect table identifier in `DEFAULT` expression like `x.y.z...` Fixes [#29184](https://github.com/ClickHouse/ClickHouse/issues/29184). [#29573](https://github.com/ClickHouse/ClickHouse/pull/29573) ([alesapin](https://github.com/alesapin)). +* Fix nullptr deference for `GROUP BY WITH TOTALS HAVING` (when the column from `HAVING` wasn't selected). [#29553](https://github.com/ClickHouse/ClickHouse/pull/29553) ([Azat Khuzhin](https://github.com/azat)). +* Avoid deadlocks when reading and writting on Join table engine tables at the same time. [#29544](https://github.com/ClickHouse/ClickHouse/pull/29544) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug in check `pathStartsWith` becuase there was bug with the usage of `std::mismatch`: ` The behavior is undefined if the second range is shorter than the first range.`. [#29531](https://github.com/ClickHouse/ClickHouse/pull/29531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* In ODBC bridge add retries for error Invalid cursor state. It is a retriable error. Closes [#29473](https://github.com/ClickHouse/ClickHouse/issues/29473). [#29518](https://github.com/ClickHouse/ClickHouse/pull/29518) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed incorrect table name parsing on loading of `Lazy` database. Fixes [#29456](https://github.com/ClickHouse/ClickHouse/issues/29456). [#29476](https://github.com/ClickHouse/ClickHouse/pull/29476) ([tavplubix](https://github.com/tavplubix)). +* Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Logical error `Cannot capture columns` in functions greatest/least. Closes [#29334](https://github.com/ClickHouse/ClickHouse/issues/29334). [#29454](https://github.com/ClickHouse/ClickHouse/pull/29454) ([Kruglov Pavel](https://github.com/Avogar)). +* RocksDB table engine: fix race condition during multiple DB opening (and get back some tests that triggers the problem on CI). [#29393](https://github.com/ClickHouse/ClickHouse/pull/29393) ([Azat Khuzhin](https://github.com/azat)). +* Fix replicated access storage not shutting down cleanly when misconfigured. [#29388](https://github.com/ClickHouse/ClickHouse/pull/29388) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Remove window function `nth_value` as it is not memory-safe. This closes [#29347](https://github.com/ClickHouse/ClickHouse/issues/29347). [#29348](https://github.com/ClickHouse/ClickHouse/pull/29348) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix vertical merges of projection parts. This fixes [#29253](https://github.com/ClickHouse/ClickHouse/issues/29253) . This PR also fixes several projection merge/mutation issues introduced in https://github.com/ClickHouse/ClickHouse/pull/25165. [#29337](https://github.com/ClickHouse/ClickHouse/pull/29337) ([Amos Bird](https://github.com/amosbird)). +* Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Fix connection timeouts (`send_timeout`/`receive_timeout`). [#29282](https://github.com/ClickHouse/ClickHouse/pull/29282) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible `Table columns structure in ZooKeeper is different from local table structure` exception while recreating or creating new replicas of `ReplicatedMergeTree`, when one of table columns have default expressions with case-insensitive functions. [#29266](https://github.com/ClickHouse/ClickHouse/pull/29266) ([Anton Popov](https://github.com/CurtizJ)). +* Send normal `Database doesn't exist error` (`UNKNOWN_DATABASE`) to the client (via TCP) instead of `Attempt to read after eof` (`ATTEMPT_TO_READ_AFTER_EOF`). [#29229](https://github.com/ClickHouse/ClickHouse/pull/29229) ([Azat Khuzhin](https://github.com/azat)). +* Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not allow to reuse previous credentials in case of inter-server secret (Before INSERT via Buffer/Kafka to Distributed table with interserver secret configured for that cluster, may re-use previously set user for that connection). [#29060](https://github.com/ClickHouse/ClickHouse/pull/29060) ([Azat Khuzhin](https://github.com/azat)). +* Handle `any_join_distinct_right_table_keys` when join with dictionary, close [#29007](https://github.com/ClickHouse/ClickHouse/issues/29007). [#29014](https://github.com/ClickHouse/ClickHouse/pull/29014) ([Vladimir C](https://github.com/vdimir)). +* Fix "Not found column ... in block" error, when join on alias column, close [#26980](https://github.com/ClickHouse/ClickHouse/issues/26980). [#29008](https://github.com/ClickHouse/ClickHouse/pull/29008) ([Vladimir C](https://github.com/vdimir)). +* Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bad optimizations of ORDER BY if it contains WITH FILL. This closes [#28908](https://github.com/ClickHouse/ClickHouse/issues/28908). This closes [#26049](https://github.com/ClickHouse/ClickHouse/issues/26049). [#28910](https://github.com/ClickHouse/ClickHouse/pull/28910) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). +* Fix queries to external databases (i.e. MySQL) with multiple columns in IN ( i.e. `(k,v) IN ((1, 2))` ). [#28888](https://github.com/ClickHouse/ClickHouse/pull/28888) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug with `LowCardinality` in short-curcuit function evaluation. Closes [#28884](https://github.com/ClickHouse/ClickHouse/issues/28884). [#28887](https://github.com/ClickHouse/ClickHouse/pull/28887) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed a race condition between `DROP PART` and `REPLACE/MOVE PARTITION` that might cause replicas to diverge in rare cases. [#28864](https://github.com/ClickHouse/ClickHouse/pull/28864) ([tavplubix](https://github.com/tavplubix)). +* Fix expressions compilation with short circuit evaluation. [#28821](https://github.com/ClickHouse/ClickHouse/pull/28821) ([Azat Khuzhin](https://github.com/azat)). +* Fix extremely rare case when ReplicatedMergeTree replicas can diverge after hard reboot of all replicas. The error looks like `Part ... intersects (previous|next) part ...`. [#28817](https://github.com/ClickHouse/ClickHouse/pull/28817) ([alesapin](https://github.com/alesapin)). +* Better check for connection usability and also catch any exception in `RabbitMQ` shutdown just in case. [#28797](https://github.com/ClickHouse/ClickHouse/pull/28797) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). +* Fix possible crash for `SELECT` with partially created aggregate projection in case of exception. [#28700](https://github.com/ClickHouse/ClickHouse/pull/28700) ([Amos Bird](https://github.com/amosbird)). +* Fix the coredump in the creation of distributed tables, when the parameters passed in are wrong. [#28686](https://github.com/ClickHouse/ClickHouse/pull/28686) ([Zhiyong Wang](https://github.com/ljcui)). +* Add Settings.Names, Settings.Values aliases for system.processes table. [#28685](https://github.com/ClickHouse/ClickHouse/pull/28685) ([Vitaly](https://github.com/orloffv)). +* Support for S2 Geometry library: Fix the number of arguments required by `s2RectAdd` and `s2RectContains` functions. [#28663](https://github.com/ClickHouse/ClickHouse/pull/28663) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix invalid constant type conversion when Nullable or LowCardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). +* Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). + + +### ClickHouse release v21.10, 2021-10-16 + +#### Backward Incompatible Change + +* Now the following MergeTree table-level settings: `replicated_max_parallel_sends`, `replicated_max_parallel_sends_for_table`, `replicated_max_parallel_fetches`, `replicated_max_parallel_fetches_for_table` do nothing. They never worked well and were replaced with `max_replicated_fetches_network_bandwidth`, `max_replicated_sends_network_bandwidth` and `background_fetches_pool_size`. [#28404](https://github.com/ClickHouse/ClickHouse/pull/28404) ([alesapin](https://github.com/alesapin)). + +#### New Feature + +* Add feature for creating user-defined functions (UDF) as lambda expressions. Syntax `CREATE FUNCTION {function_name} as ({parameters}) -> {function core}`. Example `CREATE FUNCTION plus_one as (a) -> a + 1`. Authors @Realist007. [#27796](https://github.com/ClickHouse/ClickHouse/pull/27796) ([Maksim Kita](https://github.com/kitaisreal)) [#23978](https://github.com/ClickHouse/ClickHouse/pull/23978) ([Realist007](https://github.com/Realist007)). +* Added `Executable` storage engine and `executable` table function. It enables data processing with external scripts in streaming fashion. [#28102](https://github.com/ClickHouse/ClickHouse/pull/28102) ([Maksim Kita](https://github.com/kitaisreal)) ([ruct](https://github.com/ruct)). +* Added `ExecutablePool` storage engine. Similar to `Executable` but it's using a pool of long running processes. [#28518](https://github.com/ClickHouse/ClickHouse/pull/28518) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `ALTER TABLE ... MATERIALIZE COLUMN` query. [#27038](https://github.com/ClickHouse/ClickHouse/pull/27038) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Support for partitioned write into `s3` table function. [#23051](https://github.com/ClickHouse/ClickHouse/pull/23051) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Support `lz4` compression format (in addition to `gz`, `bz2`, `xz`, `zstd`) for data import / export. [#25310](https://github.com/ClickHouse/ClickHouse/pull/25310) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow positional arguments under setting `enable_positional_arguments`. Closes [#2592](https://github.com/ClickHouse/ClickHouse/issues/2592). [#27530](https://github.com/ClickHouse/ClickHouse/pull/27530) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Accept user settings related to file formats in `SETTINGS` clause in `CREATE` query for s3 tables. This closes [#27580](https://github.com/ClickHouse/ClickHouse/issues/27580). [#28037](https://github.com/ClickHouse/ClickHouse/pull/28037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow SSL connection for `RabbitMQ` engine. [#28365](https://github.com/ClickHouse/ClickHouse/pull/28365) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `getServerPort` function to allow getting server port. When the port is not used by the server, throw an exception. [#27900](https://github.com/ClickHouse/ClickHouse/pull/27900) ([Amos Bird](https://github.com/amosbird)). +* Add conversion functions between "snowflake id" and `DateTime`, `DateTime64`. See [#27058](https://github.com/ClickHouse/ClickHouse/issues/27058). [#27704](https://github.com/ClickHouse/ClickHouse/pull/27704) ([jasine](https://github.com/jasine)). +* Add function `SHA512`. [#27830](https://github.com/ClickHouse/ClickHouse/pull/27830) ([zhanglistar](https://github.com/zhanglistar)). +* Add `log_queries_probability` setting that allows user to write to query_log only a sample of queries. Closes [#16609](https://github.com/ClickHouse/ClickHouse/issues/16609). [#27527](https://github.com/ClickHouse/ClickHouse/pull/27527) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Experimental Feature + +* `web` type of disks to store readonly tables on web server in form of static files. See [#23982](https://github.com/ClickHouse/ClickHouse/issues/23982). [#25251](https://github.com/ClickHouse/ClickHouse/pull/25251) ([Kseniia Sumarokova](https://github.com/kssenii)). This is mostly needed to faciliate testing of operation on shared storage and for easy importing of datasets. Not recommended to use before release 21.11. +* Added new commands `BACKUP` and `RESTORE`. [#21945](https://github.com/ClickHouse/ClickHouse/pull/21945) ([Vitaly Baranov](https://github.com/vitlibar)). This is under development and not intended to be used in current version. + +#### Performance Improvement + +* Speed up `sumIf` and `countIf` aggregation functions. [#28272](https://github.com/ClickHouse/ClickHouse/pull/28272) ([Raúl Marín](https://github.com/Algunenano)). +* Create virtual projection for `minmax` indices. Now, when `allow_experimental_projection_optimization` is enabled, queries will use minmax index instead of reading the data when possible. [#26286](https://github.com/ClickHouse/ClickHouse/pull/26286) ([Amos Bird](https://github.com/amosbird)). +* Introducing two checks in `sequenceMatch` and `sequenceCount` that allow for early exit when some deterministic part of the sequence pattern is missing from the events list. This change unlocks many queries that would previously fail due to reaching operations cap, and generally speeds up the pipeline. [#27729](https://github.com/ClickHouse/ClickHouse/pull/27729) ([Jakub Kuklis](https://github.com/jkuklis)). +* Enhance primary key analysis with always monotonic information of binary functions, notably non-zero constant division. [#28302](https://github.com/ClickHouse/ClickHouse/pull/28302) ([Amos Bird](https://github.com/amosbird)). +* Make `hasAll` filter condition leverage bloom filter data-skipping indexes. [#27984](https://github.com/ClickHouse/ClickHouse/pull/27984) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Speed up data parts loading by delaying table startup process. [#28313](https://github.com/ClickHouse/ClickHouse/pull/28313) ([Amos Bird](https://github.com/amosbird)). +* Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). +* Enable `optimize_distributed_group_by_sharding_key` by default. [#28105](https://github.com/ClickHouse/ClickHouse/pull/28105) ([Azat Khuzhin](https://github.com/azat)). + +#### Improvement + +* Check cluster name before creating `Distributed` table, do not allow to create a table with incorrect cluster name. Fixes [#27832](https://github.com/ClickHouse/ClickHouse/issues/27832). [#27927](https://github.com/ClickHouse/ClickHouse/pull/27927) ([tavplubix](https://github.com/tavplubix)). +* Add aggregate function `quantileBFloat16Weighted` similarly to other quantile...Weighted functions. This closes [#27745](https://github.com/ClickHouse/ClickHouse/issues/27745). [#27758](https://github.com/ClickHouse/ClickHouse/pull/27758) ([Ivan Novitskiy](https://github.com/RedClusive)). +* Allow to create dictionaries with empty attributes list. [#27905](https://github.com/ClickHouse/ClickHouse/pull/27905) ([Maksim Kita](https://github.com/kitaisreal)). +* Add interactive documentation in `clickhouse-client` about how to reset the password. This is useful in scenario when user has installed ClickHouse, set up the password and instantly forget it. See [#27750](https://github.com/ClickHouse/ClickHouse/issues/27750). [#27903](https://github.com/ClickHouse/ClickHouse/pull/27903) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support the case when the data is enclosed in array in `JSONAsString` input format. Closes [#25517](https://github.com/ClickHouse/ClickHouse/issues/25517). [#25633](https://github.com/ClickHouse/ClickHouse/pull/25633) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new column `last_queue_update_exception` to `system.replicas` table. [#26843](https://github.com/ClickHouse/ClickHouse/pull/26843) ([nvartolomei](https://github.com/nvartolomei)). +* Support reconnections on failover for `MaterializedPostgreSQL` tables. Closes [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#28614](https://github.com/ClickHouse/ClickHouse/pull/28614) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Generate a unique server UUID on first server start. [#20089](https://github.com/ClickHouse/ClickHouse/pull/20089) ([Bharat Nallan](https://github.com/bharatnc)). +* Introduce `connection_wait_timeout` (default to 5 seconds, 0 - do not wait) setting for `MySQL` engine. [#28474](https://github.com/ClickHouse/ClickHouse/pull/28474) ([Azat Khuzhin](https://github.com/azat)). +* Do not allow creating `MaterializedPostgreSQL` with bad arguments. Closes [#28423](https://github.com/ClickHouse/ClickHouse/issues/28423). [#28430](https://github.com/ClickHouse/ClickHouse/pull/28430) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Use real tmp file instead of predefined "rows_sources" for vertical merges. This avoids generating garbage directories in tmp disks. [#28299](https://github.com/ClickHouse/ClickHouse/pull/28299) ([Amos Bird](https://github.com/amosbird)). +* Added `libhdfs3_conf` in server config instead of export env `LIBHDFS3_CONF` in clickhouse-server.service. This is for configuration of interaction with HDFS. [#28268](https://github.com/ClickHouse/ClickHouse/pull/28268) ([Zhichang Yu](https://github.com/yuzhichang)). +* Fix removing of parts in a Temporary state which can lead to an unexpected exception (`Part %name% doesn't exist`). Fixes [#23661](https://github.com/ClickHouse/ClickHouse/issues/23661). [#28221](https://github.com/ClickHouse/ClickHouse/pull/28221) [#28221](https://github.com/ClickHouse/ClickHouse/issues/28221)) ([Azat Khuzhin](https://github.com/azat)). +* Fix `zookeeper_log.address` (before the first patch in this PR the address was always `::`) and reduce number of calls `getpeername(2)` for this column (since each time entry for `zookeeper_log` is added `getpeername()` is called, cache this address in the zookeeper client to avoid this). [#28212](https://github.com/ClickHouse/ClickHouse/pull/28212) ([Azat Khuzhin](https://github.com/azat)). +* Support implicit conversions between index in operator `[]` and key of type `Map` (e.g. different `Int` types, `String` and `FixedString`). [#28096](https://github.com/ClickHouse/ClickHouse/pull/28096) ([Anton Popov](https://github.com/CurtizJ)). +* Support `ON CONFLICT` clause when inserting into PostgreSQL table engine or table function. Closes [#27727](https://github.com/ClickHouse/ClickHouse/issues/27727). [#28081](https://github.com/ClickHouse/ClickHouse/pull/28081) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Lower restrictions for `Enum` data type to allow attaching compatible data. Closes [#26672](https://github.com/ClickHouse/ClickHouse/issues/26672). [#28028](https://github.com/ClickHouse/ClickHouse/pull/28028) ([Dmitry Novik](https://github.com/novikd)). +* Add a setting `empty_result_for_aggregation_by_constant_keys_on_empty_set` to control the behavior of grouping by constant keys on empty set. This is to bring back the old baviour of [#6842](https://github.com/ClickHouse/ClickHouse/issues/6842). [#27932](https://github.com/ClickHouse/ClickHouse/pull/27932) ([Amos Bird](https://github.com/amosbird)). +* Added `replication_wait_for_inactive_replica_timeout` setting. It allows to specify how long to wait for inactive replicas to execute `ALTER`/`OPTIMZE`/`TRUNCATE` query (default is 120 seconds). If `replication_alter_partitions_sync` is 2 and some replicas are not active for more than `replication_wait_for_inactive_replica_timeout` seconds, then `UNFINISHED` will be thrown. [#27931](https://github.com/ClickHouse/ClickHouse/pull/27931) ([tavplubix](https://github.com/tavplubix)). +* Support lambda argument for `APPLY` column transformer which allows applying functions with more than one argument. This is for [#27877](https://github.com/ClickHouse/ClickHouse/issues/27877). [#27901](https://github.com/ClickHouse/ClickHouse/pull/27901) ([Amos Bird](https://github.com/amosbird)). +* Enable `tcp_keep_alive_timeout` by default. [#27882](https://github.com/ClickHouse/ClickHouse/pull/27882) ([Azat Khuzhin](https://github.com/azat)). +* Improve remote query cancelation (in case of remote server abnormaly terminated). [#27881](https://github.com/ClickHouse/ClickHouse/pull/27881) ([Azat Khuzhin](https://github.com/azat)). +* Use Multipart copy upload for large S3 objects. [#27858](https://github.com/ClickHouse/ClickHouse/pull/27858) ([ianton-ru](https://github.com/ianton-ru)). +* Allow symlink traversal for library dictionaty path. [#27815](https://github.com/ClickHouse/ClickHouse/pull/27815) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Now `ALTER MODIFY COLUM` `T` to `Nullable(T)` doesn't require mutation. [#27787](https://github.com/ClickHouse/ClickHouse/pull/27787) ([victorgao](https://github.com/kafka1991)). +* Don't silently ignore errors and don't count delays in `ReadBufferFromS3`. [#27484](https://github.com/ClickHouse/ClickHouse/pull/27484) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Improve `ALTER ... MATERIALIZE TTL` by recalculating metadata only without actual TTL action. [#27019](https://github.com/ClickHouse/ClickHouse/pull/27019) ([lthaooo](https://github.com/lthaooo)). +* Allow reading the list of custom top level domains without a new line at EOF. [#28213](https://github.com/ClickHouse/ClickHouse/pull/28213) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix + +* Fix cases, when reading compressed data from `carbon-clickhouse` fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([FArthur-cmd](https://github.com/FArthur-cmd)). +* Fix checking access grants when executing `GRANT WITH REPLACE` statement with `ON CLUSTER` clause. This PR improves fix [#27001](https://github.com/ClickHouse/ClickHouse/pull/27701). [#27983](https://github.com/ClickHouse/ClickHouse/pull/27983) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow selecting with `extremes = 1` from a column of the type `LowCardinality(UUID)`. [#27918](https://github.com/ClickHouse/ClickHouse/pull/27918) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). +* After [#26864](https://github.com/ClickHouse/ClickHouse/pull/26864). Fix shutdown of `NamedSessionStorage`: session contexts stored in `NamedSessionStorage` are now destroyed before destroying the global context. [#27875](https://github.com/ClickHouse/ClickHouse/pull/27875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Bugfix for `windowFunnel` "strict" mode. This fixes [#27469](https://github.com/ClickHouse/ClickHouse/issues/27469). [#27563](https://github.com/ClickHouse/ClickHouse/pull/27563) ([achimbab](https://github.com/achimbab)). +* Fix infinite loop while reading truncated `bzip2` archive. [#28543](https://github.com/ClickHouse/ClickHouse/pull/28543) ([Azat Khuzhin](https://github.com/azat)). +* Fix UUID overlap in `DROP TABLE` for internal DDL from `MaterializedMySQL`. MaterializedMySQL is an experimental feature. [#28533](https://github.com/ClickHouse/ClickHouse/pull/28533) ([Azat Khuzhin](https://github.com/azat)). +* Fix `There is no subcolumn` error, while select from tables, which have `Nested` columns and scalar columns with dot in name and the same prefix as `Nested` (e.g. `n.id UInt32, n.arr1 Array(UInt64), n.arr2 Array(UInt64)`). [#28531](https://github.com/ClickHouse/ClickHouse/pull/28531) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after ALTER of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). +* Fixed possible ZooKeeper watches leak (minor issue) on background processing of distributed DDL queue. Closes [#26036](https://github.com/ClickHouse/ClickHouse/issues/26036). [#28446](https://github.com/ClickHouse/ClickHouse/pull/28446) ([tavplubix](https://github.com/tavplubix)). +* Fix missing quoting of table names in `MaterializedPostgreSQL` engine. Closes [#28316](https://github.com/ClickHouse/ClickHouse/issues/28316). [#28433](https://github.com/ClickHouse/ClickHouse/pull/28433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix the wrong behaviour of non joined rows from nullable column. Close [#27691](https://github.com/ClickHouse/ClickHouse/issues/27691). [#28349](https://github.com/ClickHouse/ClickHouse/pull/28349) ([vdimir](https://github.com/vdimir)). +* Fix NOT-IN index optimization when not all key columns are used. This fixes [#28120](https://github.com/ClickHouse/ClickHouse/issues/28120). [#28315](https://github.com/ClickHouse/ClickHouse/pull/28315) ([Amos Bird](https://github.com/amosbird)). +* Fix intersecting parts due to new part had been replaced with an empty part. [#28310](https://github.com/ClickHouse/ClickHouse/pull/28310) ([Azat Khuzhin](https://github.com/azat)). +* Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible read of uninitialized memory for queries with `Nullable(LowCardinality)` type and the setting `extremes` set to 1. Fixes [#28165](https://github.com/ClickHouse/ClickHouse/issues/28165). [#28205](https://github.com/ClickHouse/ClickHouse/pull/28205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Multiple small fixes for projections. See detailed description in the PR. [#28178](https://github.com/ClickHouse/ClickHouse/pull/28178) ([Amos Bird](https://github.com/amosbird)). +* Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). +* Fix handling null value with type of `Nullable(String)` in function `JSONExtract`. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930). This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). +* Multiple fixes for the new `clickhouse-keeper` tool. Fix a rare bug in `clickhouse-keeper` when the client can receive a watch response before request-response. [#28197](https://github.com/ClickHouse/ClickHouse/pull/28197) ([alesapin](https://github.com/alesapin)). Fix incorrect behavior in `clickhouse-keeper` when list watches (`getChildren`) triggered with `set` requests for children. [#28190](https://github.com/ClickHouse/ClickHouse/pull/28190) ([alesapin](https://github.com/alesapin)). Fix rare case when changes of `clickhouse-keeper` settings may lead to lost logs and server hung. [#28360](https://github.com/ClickHouse/ClickHouse/pull/28360) ([alesapin](https://github.com/alesapin)). Fix bug in `clickhouse-keeper` which can lead to endless logs when `rotate_logs_interval` decreased. [#28152](https://github.com/ClickHouse/ClickHouse/pull/28152) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement + +* Enable Thread Fuzzer in Stress Test. Thread Fuzzer is ClickHouse feature that allows to test more permutations of thread scheduling and discover more potential issues. This closes [#9813](https://github.com/ClickHouse/ClickHouse/issues/9813). This closes [#9814](https://github.com/ClickHouse/ClickHouse/issues/9814). This closes [#9515](https://github.com/ClickHouse/ClickHouse/issues/9515). This closes [#9516](https://github.com/ClickHouse/ClickHouse/issues/9516). [#27538](https://github.com/ClickHouse/ClickHouse/pull/27538) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add new log level `test` for testing environments. It is even more verbose than the default `trace`. [#28559](https://github.com/ClickHouse/ClickHouse/pull/28559) ([alesapin](https://github.com/alesapin)). +* Print out git status information at CMake configure stage. [#28047](https://github.com/ClickHouse/ClickHouse/pull/28047) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as the default one (archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). + + + +### ClickHouse release v21.9, 2021-09-09 + +#### Backward Incompatible Change + +* Do not output trailing zeros in text representation of `Decimal` types. Example: `1.23` will be printed instead of `1.230000` for decimal with scale 6. This closes [#15794](https://github.com/ClickHouse/ClickHouse/issues/15794). It may introduce slight incompatibility if your applications somehow relied on the trailing zeros. Serialization in output formats can be controlled with the setting `output_format_decimal_trailing_zeros`. Implementation of `toString` and casting to String is changed unconditionally. [#27680](https://github.com/ClickHouse/ClickHouse/pull/27680) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Do not allow to apply parametric aggregate function with `-Merge` combinator to aggregate function state if state was produced by aggregate function with different parameters. For example, state of `fooState(42)(x)` cannot be finalized with `fooMerge(s)` or `fooMerge(123)(s)`, parameters must be specified explicitly like `fooMerge(42)(s)` and must be equal. It does not affect some special aggregate functions like `quantile` and `sequence*` that use parameters for finalization only. [#26847](https://github.com/ClickHouse/ClickHouse/pull/26847) ([tavplubix](https://github.com/tavplubix)). +* Under clickhouse-local, always treat local addresses with a port as remote. [#26736](https://github.com/ClickHouse/ClickHouse/pull/26736) ([Raúl Marín](https://github.com/Algunenano)). +* Fix the issue that in case of some sophisticated query with column aliases identical to the names of expressions, bad cast may happen. This fixes [#25447](https://github.com/ClickHouse/ClickHouse/issues/25447). This fixes [#26914](https://github.com/ClickHouse/ClickHouse/issues/26914). This fix may introduce backward incompatibility: if there are different expressions with identical names, exception will be thrown. It may break some rare cases when `enable_optimize_predicate_expression` is set. [#26639](https://github.com/ClickHouse/ClickHouse/pull/26639) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now, scalar subquery always returns `Nullable` result if it's type can be `Nullable`. It is needed because in case of empty subquery it's result should be `Null`. Previously, it was possible to get error about incompatible types (type deduction does not execute scalar subquery, and it could use not-nullable type). Scalar subquery with empty result which can't be converted to `Nullable` (like `Array` or `Tuple`) now throws error. Fixes [#25411](https://github.com/ClickHouse/ClickHouse/issues/25411). [#26423](https://github.com/ClickHouse/ClickHouse/pull/26423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Introduce syntax for here documents. Example `SELECT $doc$ VALUE $doc$`. [#26671](https://github.com/ClickHouse/ClickHouse/pull/26671) ([Maksim Kita](https://github.com/kitaisreal)). This change is backward incompatible if in query there are identifiers that contain `$` [#28768](https://github.com/ClickHouse/ClickHouse/issues/28768). +* Now indices can handle Nullable types, including `isNull` and `isNotNull`. [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) and [#12455](https://github.com/ClickHouse/ClickHouse/pull/12455) ([Amos Bird](https://github.com/amosbird)) and [#27250](https://github.com/ClickHouse/ClickHouse/pull/27250) ([Azat Khuzhin](https://github.com/azat)). But this was done with on-disk format changes, and even though new server can read old data, old server cannot. Also, in case you have `MINMAX` data skipping indices, you may get `Data after mutation/merge is not byte-identical` error, since new index will have `.idx2` extension while before it was `.idx`. That said, that you should not delay updating all existing replicas, in this case, otherwise, if old replica (<21.9) will download data from new replica with 21.9+ it will not be able to apply index for downloaded part. + +#### New Feature + +* Implementation of short circuit function evaluation, closes [#12587](https://github.com/ClickHouse/ClickHouse/issues/12587). Add settings `short_circuit_function_evaluation` to configure short circuit function evaluation. [#23367](https://github.com/ClickHouse/ClickHouse/pull/23367) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for INTERSECT, EXCEPT, ANY, ALL operators. [#24757](https://github.com/ClickHouse/ClickHouse/pull/24757) ([Kirill Ershov](https://github.com/zdikov)). ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for encryption at the virtual file system level (data encryption at rest) using AES-CTR algorithm. [#24206](https://github.com/ClickHouse/ClickHouse/pull/24206) ([Latysheva Alexandra](https://github.com/alexelex)). ([Vitaly Baranov](https://github.com/vitlibar)) [#26733](https://github.com/ClickHouse/ClickHouse/pull/26733) [#26377](https://github.com/ClickHouse/ClickHouse/pull/26377) [#26465](https://github.com/ClickHouse/ClickHouse/pull/26465). +* Added natural language processing (NLP) functions for tokenization, stemming, lemmatizing and search in synonyms extensions. [#24997](https://github.com/ClickHouse/ClickHouse/pull/24997) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added integration with S2 geometry library. [#24980](https://github.com/ClickHouse/ClickHouse/pull/24980) ([Andr0901](https://github.com/Andr0901)). ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add SQLite table engine, table function, database engine. [#24194](https://github.com/ClickHouse/ClickHouse/pull/24194) ([Arslan Gumerov](https://github.com/g-arslan)). ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added support for custom query for `MySQL`, `PostgreSQL`, `ClickHouse`, `JDBC`, `Cassandra` dictionary source. Closes [#1270](https://github.com/ClickHouse/ClickHouse/issues/1270). [#26995](https://github.com/ClickHouse/ClickHouse/pull/26995) ([Maksim Kita](https://github.com/kitaisreal)). +* Add shared (replicated) storage of user, roles, row policies, quotas and settings profiles through ZooKeeper. [#27426](https://github.com/ClickHouse/ClickHouse/pull/27426) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Add compression for `INTO OUTFILE` that automatically choose compression algorithm. Closes [#3473](https://github.com/ClickHouse/ClickHouse/issues/3473). [#27134](https://github.com/ClickHouse/ClickHouse/pull/27134) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add `INSERT ... FROM INFILE` similarly to `SELECT ... INTO OUTFILE`. [#27655](https://github.com/ClickHouse/ClickHouse/pull/27655) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Added `complex_key_range_hashed` dictionary. Closes [#22029](https://github.com/ClickHouse/ClickHouse/issues/22029). [#27629](https://github.com/ClickHouse/ClickHouse/pull/27629) ([Maksim Kita](https://github.com/kitaisreal)). +* Support expressions in JOIN ON section. Close [#21868](https://github.com/ClickHouse/ClickHouse/issues/21868). [#24420](https://github.com/ClickHouse/ClickHouse/pull/24420) ([Vladimir C](https://github.com/vdimir)). +* When client connects to server, it receives information about all warnings that are already were collected by server. (It can be disabled by using option `--no-warnings`). Add `system.warnings` table to collect warnings about server configuration. [#26246](https://github.com/ClickHouse/ClickHouse/pull/26246) ([Filatenkov Artur](https://github.com/FArthur-cmd)). [#26282](https://github.com/ClickHouse/ClickHouse/pull/26282) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Allow using constant expressions from with and select in aggregate function parameters. Close [#10945](https://github.com/ClickHouse/ClickHouse/issues/10945). [#27531](https://github.com/ClickHouse/ClickHouse/pull/27531) ([abel-cheng](https://github.com/abel-cheng)). +* Add `tupleToNameValuePairs`, a function that turns a named tuple into an array of pairs. [#27505](https://github.com/ClickHouse/ClickHouse/pull/27505) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Add support for `bzip2` compression method for import/export. Closes [#22428](https://github.com/ClickHouse/ClickHouse/issues/22428). [#27377](https://github.com/ClickHouse/ClickHouse/pull/27377) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `bitmapSubsetOffsetLimit(bitmap, offset, cardinality_limit)` function. It creates a subset of bitmap limit the results to `cardinality_limit` with offset of `offset`. [#27234](https://github.com/ClickHouse/ClickHouse/pull/27234) ([DHBin](https://github.com/DHBin)). +* Add column `default_database` to `system.users`. [#27054](https://github.com/ClickHouse/ClickHouse/pull/27054) ([kevin wan](https://github.com/MaxWk)). +* Supported `cluster` macros inside table functions 'cluster' and 'clusterAllReplicas'. [#26913](https://github.com/ClickHouse/ClickHouse/pull/26913) ([polyprogrammist](https://github.com/PolyProgrammist)). +* Add new functions `currentRoles()`, `enabledRoles()`, `defaultRoles()`. [#26780](https://github.com/ClickHouse/ClickHouse/pull/26780) ([Vitaly Baranov](https://github.com/vitlibar)). +* New functions `currentProfiles()`, `enabledProfiles()`, `defaultProfiles()`. [#26714](https://github.com/ClickHouse/ClickHouse/pull/26714) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add functions that return (initial_)query_id of the current query. This closes [#23682](https://github.com/ClickHouse/ClickHouse/issues/23682). [#26410](https://github.com/ClickHouse/ClickHouse/pull/26410) ([Alexey Boykov](https://github.com/mathalex)). +* Add `REPLACE GRANT` feature. [#26384](https://github.com/ClickHouse/ClickHouse/pull/26384) ([Caspian](https://github.com/Cas-pian)). +* `EXPLAIN` query now has `EXPLAIN ESTIMATE ...` mode that will show information about read rows, marks and parts from MergeTree tables. Closes [#23941](https://github.com/ClickHouse/ClickHouse/issues/23941). [#26131](https://github.com/ClickHouse/ClickHouse/pull/26131) ([fastio](https://github.com/fastio)). +* Added `system.zookeeper_log` table. All actions of ZooKeeper client are logged into this table. Implements [#25449](https://github.com/ClickHouse/ClickHouse/issues/25449). [#26129](https://github.com/ClickHouse/ClickHouse/pull/26129) ([tavplubix](https://github.com/tavplubix)). +* Zero-copy replication for `ReplicatedMergeTree` over `HDFS` storage. [#25918](https://github.com/ClickHouse/ClickHouse/pull/25918) ([Zhichang Yu](https://github.com/yuzhichang)). +* Allow to insert Nested type as array of structs in `Arrow`, `ORC` and `Parquet` input format. [#25902](https://github.com/ClickHouse/ClickHouse/pull/25902) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new datatype `Date32` (store data as Int32), support date range same with `DateTime64` support load parquet date32 to ClickHouse `Date32` Add new function `toDate32` like `toDate`. [#25774](https://github.com/ClickHouse/ClickHouse/pull/25774) ([LiuNeng](https://github.com/liuneng1994)). +* Allow setting default database for users. [#25268](https://github.com/ClickHouse/ClickHouse/issues/25268). [#25687](https://github.com/ClickHouse/ClickHouse/pull/25687) ([kevin wan](https://github.com/MaxWk)). +* Add an optional parameter to `MongoDB` engine to accept connection string options and support SSL connection. Closes [#21189](https://github.com/ClickHouse/ClickHouse/issues/21189). Closes [#21041](https://github.com/ClickHouse/ClickHouse/issues/21041). [#22045](https://github.com/ClickHouse/ClickHouse/pull/22045) ([Omar Bazaraa](https://github.com/OmarBazaraa)). + +#### Experimental Feature + +* Added a compression codec `AES_128_GCM_SIV` which encrypts columns instead of compressing them. [#19896](https://github.com/ClickHouse/ClickHouse/pull/19896) ([PHO](https://github.com/depressed-pho)). Will be rewritten, do not use. +* Rename `MaterializeMySQL` to `MaterializedMySQL`. [#26822](https://github.com/ClickHouse/ClickHouse/pull/26822) ([tavplubix](https://github.com/tavplubix)). + +#### Performance Improvement + +* Improve the performance of fast queries when `max_execution_time = 0` by reducing the number of `clock_gettime` system calls. [#27325](https://github.com/ClickHouse/ClickHouse/pull/27325) ([filimonov](https://github.com/filimonov)). +* Specialize date time related comparison to achieve better performance. This fixes [#27083](https://github.com/ClickHouse/ClickHouse/issues/27083) . [#27122](https://github.com/ClickHouse/ClickHouse/pull/27122) ([Amos Bird](https://github.com/amosbird)). +* Share file descriptors in concurrent reads of the same files. There is no noticeable performance difference on Linux. But the number of opened files will be significantly (10..100 times) lower on typical servers and it makes operations easier. See [#26214](https://github.com/ClickHouse/ClickHouse/issues/26214). [#26768](https://github.com/ClickHouse/ClickHouse/pull/26768) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve latency of short queries, that require reading from tables with large number of columns. [#26371](https://github.com/ClickHouse/ClickHouse/pull/26371) ([Anton Popov](https://github.com/CurtizJ)). +* Don't build sets for indices when analyzing a query. [#26365](https://github.com/ClickHouse/ClickHouse/pull/26365) ([Raúl Marín](https://github.com/Algunenano)). +* Vectorize the SUM of Nullable integer types with native representation ([David Manzanares](https://github.com/davidmanzanares), [Raúl Marín](https://github.com/Algunenano)). [#26248](https://github.com/ClickHouse/ClickHouse/pull/26248) ([Raúl Marín](https://github.com/Algunenano)). +* Compile expressions involving columns with `Enum` types. [#26237](https://github.com/ClickHouse/ClickHouse/pull/26237) ([Maksim Kita](https://github.com/kitaisreal)). +* Compile aggregate functions `groupBitOr`, `groupBitAnd`, `groupBitXor`. [#26161](https://github.com/ClickHouse/ClickHouse/pull/26161) ([Maksim Kita](https://github.com/kitaisreal)). +* Improved memory usage with better block size prediction when reading empty DEFAULT columns. Closes [#17317](https://github.com/ClickHouse/ClickHouse/issues/17317). [#25917](https://github.com/ClickHouse/ClickHouse/pull/25917) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Reduce memory usage and number of read rows in queries with `ORDER BY primary_key`. [#25721](https://github.com/ClickHouse/ClickHouse/pull/25721) ([Anton Popov](https://github.com/CurtizJ)). +* Enable `distributed_push_down_limit` by default. [#27104](https://github.com/ClickHouse/ClickHouse/pull/27104) ([Azat Khuzhin](https://github.com/azat)). +* Make `toTimeZone` monotonicity when timeZone is a constant value to support partition puring when use sql like:. [#26261](https://github.com/ClickHouse/ClickHouse/pull/26261) ([huangzhaowei](https://github.com/SaintBacchus)). + +#### Improvement + +* Mark window functions as ready for general use. Remove the `allow_experimental_window_functions` setting. [#27184](https://github.com/ClickHouse/ClickHouse/pull/27184) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Improve compatibility with non-whole-minute timezone offsets. [#27080](https://github.com/ClickHouse/ClickHouse/pull/27080) ([Raúl Marín](https://github.com/Algunenano)). +* If file descriptor in `File` table is regular file - allow to read multiple times from it. It allows `clickhouse-local` to read multiple times from stdin (with multiple SELECT queries or subqueries) if stdin is a regular file like `clickhouse-local --query "SELECT * FROM table UNION ALL SELECT * FROM table" ... < file`. This closes [#11124](https://github.com/ClickHouse/ClickHouse/issues/11124). Co-authored with ([alexey-milovidov](https://github.com/alexey-milovidov)). [#25960](https://github.com/ClickHouse/ClickHouse/pull/25960) ([BoloniniD](https://github.com/BoloniniD)). +* Remove duplicate index analysis and avoid possible invalid limit checks during projection analysis. [#27742](https://github.com/ClickHouse/ClickHouse/pull/27742) ([Amos Bird](https://github.com/amosbird)). +* Enable query parameters to be passed in the body of HTTP requests. [#27706](https://github.com/ClickHouse/ClickHouse/pull/27706) ([Hermano Lustosa](https://github.com/hllustosa)). +* Disallow `arrayJoin` on partition expressions. [#27648](https://github.com/ClickHouse/ClickHouse/pull/27648) ([Raúl Marín](https://github.com/Algunenano)). +* Log client IP address if authentication fails. [#27514](https://github.com/ClickHouse/ClickHouse/pull/27514) ([Misko Lee](https://github.com/imiskolee)). +* Use bytes instead of strings for binary data in the GRPC protocol. [#27431](https://github.com/ClickHouse/ClickHouse/pull/27431) ([Vitaly Baranov](https://github.com/vitlibar)). +* Send response with error message if HTTP port is not set and user tries to send HTTP request to TCP port. [#27385](https://github.com/ClickHouse/ClickHouse/pull/27385) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Add `_CAST` function for internal usage, which will not preserve type nullability, but non-internal cast will preserve according to setting `cast_keep_nullable`. Closes [#12636](https://github.com/ClickHouse/ClickHouse/issues/12636). [#27382](https://github.com/ClickHouse/ClickHouse/pull/27382) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add setting `log_formatted_queries` to log additional formatted query into `system.query_log`. It's useful for normalized query analysis because functions like `normalizeQuery` and `normalizeQueryKeepNames` don't parse/format queries in order to achieve better performance. [#27380](https://github.com/ClickHouse/ClickHouse/pull/27380) ([Amos Bird](https://github.com/amosbird)). +* Add two settings `max_hyperscan_regexp_length` and `max_hyperscan_regexp_total_length` to prevent huge regexp being used in hyperscan related functions, such as `multiMatchAny`. [#27378](https://github.com/ClickHouse/ClickHouse/pull/27378) ([Amos Bird](https://github.com/amosbird)). +* Memory consumed by bitmap aggregate functions now is taken into account for memory limits. This closes [#26555](https://github.com/ClickHouse/ClickHouse/issues/26555). [#27252](https://github.com/ClickHouse/ClickHouse/pull/27252) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add 10 seconds cache for S3 proxy resolver. [#27216](https://github.com/ClickHouse/ClickHouse/pull/27216) ([ianton-ru](https://github.com/ianton-ru)). +* Split global mutex into individual regexp construction. This helps avoid huge regexp construction blocking other related threads. [#27211](https://github.com/ClickHouse/ClickHouse/pull/27211) ([Amos Bird](https://github.com/amosbird)). +* Support schema for PostgreSQL database engine. Closes [#27166](https://github.com/ClickHouse/ClickHouse/issues/27166). [#27198](https://github.com/ClickHouse/ClickHouse/pull/27198) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Track memory usage in clickhouse-client. [#27191](https://github.com/ClickHouse/ClickHouse/pull/27191) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Try recording `query_kind` in `system.query_log` even when query fails to start. [#27182](https://github.com/ClickHouse/ClickHouse/pull/27182) ([Amos Bird](https://github.com/amosbird)). +* Added columns `replica_is_active` that maps replica name to is replica active status to table `system.replicas`. Closes [#27138](https://github.com/ClickHouse/ClickHouse/issues/27138). [#27180](https://github.com/ClickHouse/ClickHouse/pull/27180) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to pass query settings via server URI in Web UI. [#27177](https://github.com/ClickHouse/ClickHouse/pull/27177) ([kolsys](https://github.com/kolsys)). +* Add a new metric called `MaxPushedDDLEntryID` which is the maximum ddl entry id that current node push to zookeeper. [#27174](https://github.com/ClickHouse/ClickHouse/pull/27174) ([Fuwang Hu](https://github.com/fuwhu)). +* Improved the existence condition judgment and empty string node judgment when `clickhouse-keeper` creates znode. [#27125](https://github.com/ClickHouse/ClickHouse/pull/27125) ([小路](https://github.com/nicelulu)). +* Merge JOIN correctly handles empty set in the right. [#27078](https://github.com/ClickHouse/ClickHouse/pull/27078) ([Vladimir C](https://github.com/vdimir)). +* Now functions can be shard-level constants, which means if it's executed in the context of some distributed table, it generates a normal column, otherwise it produces a constant value. Notable functions are: `hostName()`, `tcpPort()`, `version()`, `buildId()`, `uptime()`, etc. [#27020](https://github.com/ClickHouse/ClickHouse/pull/27020) ([Amos Bird](https://github.com/amosbird)). +* Updated `extractAllGroupsHorizontal` - upper limit on the number of matches per row can be set via optional third argument. [#26961](https://github.com/ClickHouse/ClickHouse/pull/26961) ([Vasily Nemkov](https://github.com/Enmk)). +* Expose `RocksDB` statistics via system.rocksdb table. Read rocksdb options from ClickHouse config (`rocksdb...` keys). NOTE: ClickHouse does not rely on RocksDB, it is just one of the additional integration storage engines. [#26821](https://github.com/ClickHouse/ClickHouse/pull/26821) ([Azat Khuzhin](https://github.com/azat)). +* Less verbose internal RocksDB logs. NOTE: ClickHouse does not rely on RocksDB, it is just one of the additional integration storage engines. This closes [#26252](https://github.com/ClickHouse/ClickHouse/issues/26252). [#26789](https://github.com/ClickHouse/ClickHouse/pull/26789) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Changing default roles affects new sessions only. [#26759](https://github.com/ClickHouse/ClickHouse/pull/26759) ([Vitaly Baranov](https://github.com/vitlibar)). +* Watchdog is disabled in docker by default. Fix for not handling ctrl+c. [#26757](https://github.com/ClickHouse/ClickHouse/pull/26757) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* `SET PROFILE` now applies constraints too if they're set for a passed profile. [#26730](https://github.com/ClickHouse/ClickHouse/pull/26730) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve handling of `KILL QUERY` requests. [#26675](https://github.com/ClickHouse/ClickHouse/pull/26675) ([Raúl Marín](https://github.com/Algunenano)). +* `mapPopulatesSeries` function supports `Map` type. [#26663](https://github.com/ClickHouse/ClickHouse/pull/26663) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Fix excessive (x2) connect attempts with `skip_unavailable_shards`. [#26658](https://github.com/ClickHouse/ClickHouse/pull/26658) ([Azat Khuzhin](https://github.com/azat)). +* Avoid hanging `clickhouse-benchmark` if connection fails (i.e. on EMFILE). [#26656](https://github.com/ClickHouse/ClickHouse/pull/26656) ([Azat Khuzhin](https://github.com/azat)). +* Allow more threads to be used by the Kafka engine. [#26642](https://github.com/ClickHouse/ClickHouse/pull/26642) ([feihengye](https://github.com/feihengye)). +* Add round-robin support for `clickhouse-benchmark` (it does not differ from the regular multi host/port run except for statistics report). [#26607](https://github.com/ClickHouse/ClickHouse/pull/26607) ([Azat Khuzhin](https://github.com/azat)). +* Executable dictionaries (`executable`, `executable_pool`) enable creation with DDL query using `clickhouse-local`. Closes [#22355](https://github.com/ClickHouse/ClickHouse/issues/22355). [#26510](https://github.com/ClickHouse/ClickHouse/pull/26510) ([Maksim Kita](https://github.com/kitaisreal)). +* Set client query kind for `mysql` and `postgresql` compatibility protocol handlers. [#26498](https://github.com/ClickHouse/ClickHouse/pull/26498) ([anneji-dev](https://github.com/anneji-dev)). +* Apply `LIMIT` on the shards for queries like `SELECT * FROM dist ORDER BY key LIMIT 10` w/ `distributed_push_down_limit=1`. Avoid running `Distinct`/`LIMIT BY` steps for queries like `SELECT DISTINCT shading_key FROM dist ORDER BY key`. Now `distributed_push_down_limit` is respected by `optimize_distributed_group_by_sharding_key` optimization. [#26466](https://github.com/ClickHouse/ClickHouse/pull/26466) ([Azat Khuzhin](https://github.com/azat)). +* Updated protobuf to 3.17.3. Changelogs are available on https://github.com/protocolbuffers/protobuf/releases. [#26424](https://github.com/ClickHouse/ClickHouse/pull/26424) ([Ilya Yatsishin](https://github.com/qoega)). +* Enable `use_hedged_requests` setting that allows to mitigate tail latencies on large clusters. [#26380](https://github.com/ClickHouse/ClickHouse/pull/26380) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve behaviour with non-existing host in user allowed host list. [#26368](https://github.com/ClickHouse/ClickHouse/pull/26368) ([ianton-ru](https://github.com/ianton-ru)). +* Add ability to set `Distributed` directory monitor settings via CREATE TABLE (i.e. `CREATE TABLE dist (key Int) Engine=Distributed(cluster, db, table) SETTINGS monitor_batch_inserts=1` and similar). [#26336](https://github.com/ClickHouse/ClickHouse/pull/26336) ([Azat Khuzhin](https://github.com/azat)). +* Save server address in history URLs in web UI if it differs from the origin of web UI. This closes [#26044](https://github.com/ClickHouse/ClickHouse/issues/26044). [#26322](https://github.com/ClickHouse/ClickHouse/pull/26322) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add events to profile calls to `sleep` / `sleepEachRow`. [#26320](https://github.com/ClickHouse/ClickHouse/pull/26320) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to reuse connections of shards among different clusters. It also avoids creating new connections when using `cluster` table function. [#26318](https://github.com/ClickHouse/ClickHouse/pull/26318) ([Amos Bird](https://github.com/amosbird)). +* Control the execution period of clear old temporary directories by parameter with default value. [#26212](https://github.com/ClickHouse/ClickHouse/issues/26212). [#26313](https://github.com/ClickHouse/ClickHouse/pull/26313) ([fastio](https://github.com/fastio)). +* Add a setting `function_range_max_elements_in_block` to tune the safety threshold for data volume generated by function `range`. This closes [#26303](https://github.com/ClickHouse/ClickHouse/issues/26303). [#26305](https://github.com/ClickHouse/ClickHouse/pull/26305) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Check hash function at table creation, not at sampling. Add settings for MergeTree, if someone create a table with incorrect sampling column but sampling never be used, disable this settings for starting the server without exception. [#26256](https://github.com/ClickHouse/ClickHouse/pull/26256) ([zhaoyu](https://github.com/zxc111)). +* Added `output_format_avro_string_column_pattern` setting to put specified String columns to Avro as string instead of default bytes. Implements [#22414](https://github.com/ClickHouse/ClickHouse/issues/22414). [#26245](https://github.com/ClickHouse/ClickHouse/pull/26245) ([Ilya Golshtein](https://github.com/ilejn)). +* Add information about column sizes in `system.columns` table for `Log` and `TinyLog` tables. This closes [#9001](https://github.com/ClickHouse/ClickHouse/issues/9001). [#26241](https://github.com/ClickHouse/ClickHouse/pull/26241) ([Nikolay Degterinsky](https://github.com/evillique)). +* Don't throw exception when querying `system.detached_parts` table if there is custom disk configuration and `detached` directory does not exist on some disks. This closes [#26078](https://github.com/ClickHouse/ClickHouse/issues/26078). [#26236](https://github.com/ClickHouse/ClickHouse/pull/26236) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Check for non-deterministic functions in keys, including constant expressions like `now()`, `today()`. This closes [#25875](https://github.com/ClickHouse/ClickHouse/issues/25875). This closes [#11333](https://github.com/ClickHouse/ClickHouse/issues/11333). [#26235](https://github.com/ClickHouse/ClickHouse/pull/26235) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* convert timestamp and timestamptz data types to `DateTime64` in PostgreSQL table engine. [#26234](https://github.com/ClickHouse/ClickHouse/pull/26234) ([jasine](https://github.com/jasine)). +* Apply aggressive IN index analysis for projections so that better projection candidate can be selected. [#26218](https://github.com/ClickHouse/ClickHouse/pull/26218) ([Amos Bird](https://github.com/amosbird)). +* Remove GLOBAL keyword for IN when scalar function is passed. In previous versions, if user specified `GLOBAL IN f(x)` exception was thrown. [#26217](https://github.com/ClickHouse/ClickHouse/pull/26217) ([Amos Bird](https://github.com/amosbird)). +* Add error id (like `BAD_ARGUMENTS`) to exception messages. This closes [#25862](https://github.com/ClickHouse/ClickHouse/issues/25862). [#26172](https://github.com/ClickHouse/ClickHouse/pull/26172) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect output with --progress option for clickhouse-local. Progress bar will be cleared once it gets to 100% - same as it is done for clickhouse-client. Closes [#17484](https://github.com/ClickHouse/ClickHouse/issues/17484). [#26128](https://github.com/ClickHouse/ClickHouse/pull/26128) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `merge_selecting_sleep_ms` setting. [#26120](https://github.com/ClickHouse/ClickHouse/pull/26120) ([lthaooo](https://github.com/lthaooo)). +* Remove complicated usage of Linux AIO with one block readahead and replace it with plain simple synchronous IO with O_DIRECT. In previous versions, the setting `min_bytes_to_use_direct_io` may not work correctly if `max_threads` is greater than one. Reading with direct IO (that is disabled by default for queries and enabled by default for large merges) will work in less efficient way. This closes [#25997](https://github.com/ClickHouse/ClickHouse/issues/25997). [#26003](https://github.com/ClickHouse/ClickHouse/pull/26003) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Flush `Distributed` table on `REPLACE TABLE` query. Resolves [#24566](https://github.com/ClickHouse/ClickHouse/issues/24566) - Do not replace (or create) table on `[CREATE OR] REPLACE TABLE ... AS SELECT` query if insertion into new table fails. Resolves [#23175](https://github.com/ClickHouse/ClickHouse/issues/23175). [#25895](https://github.com/ClickHouse/ClickHouse/pull/25895) ([tavplubix](https://github.com/tavplubix)). +* Add `views` column to system.query_log containing the names of the (materialized or live) views executed by the query. Adds a new log table (`system.query_views_log`) that contains information about each view executed during a query. Modifies view execution: When an exception is thrown while executing a view, any view that has already startedwill continue running until it finishes. This used to be the behaviour under parallel_view_processing=true and now it's always the same behaviour. - Dependent views now report reading progress to the context. [#25714](https://github.com/ClickHouse/ClickHouse/pull/25714) ([Raúl Marín](https://github.com/Algunenano)). +* Do connection draining asynchonously upon finishing executing distributed queries. A new server setting is added `max_threads_for_connection_collector` which specifies the number of workers to recycle connections in background. If the pool is full, connection will be drained synchronously but a bit different than before: It's drained after we send EOS to client, query will succeed immediately after receiving enough data, and any exception will be logged instead of throwing to the client. Added setting `drain_timeout` (3 seconds by default). Connection draining will disconnect upon timeout. [#25674](https://github.com/ClickHouse/ClickHouse/pull/25674) ([Amos Bird](https://github.com/amosbird)). +* Support for multiple includes in configuration. It is possible to include users configuration, remote servers configuration from multiple sources. Simply place `` element with `from_zk`, `from_env` or `incl` attribute and it will be replaced with the substitution. [#24404](https://github.com/ClickHouse/ClickHouse/pull/24404) ([nvartolomei](https://github.com/nvartolomei)). +* Fix multiple block insertion into distributed table with `insert_distributed_one_random_shard = 1`. This is a marginal feature. Mark as improvement. [#23140](https://github.com/ClickHouse/ClickHouse/pull/23140) ([Amos Bird](https://github.com/amosbird)). +* Support `LowCardinality` and `FixedString` keys/values for `Map` type. [#21543](https://github.com/ClickHouse/ClickHouse/pull/21543) ([hexiaoting](https://github.com/hexiaoting)). +* Enable reloading of local disk config. [#19526](https://github.com/ClickHouse/ClickHouse/pull/19526) ([taiyang-li](https://github.com/taiyang-li)). + +#### Bug Fix + +* Fix a couple of bugs that may cause replicas to diverge. [#27808](https://github.com/ClickHouse/ClickHouse/pull/27808) ([tavplubix](https://github.com/tavplubix)). +* Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). +* Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). +* Fix column filtering with union distinct in subquery. Closes [#27578](https://github.com/ClickHouse/ClickHouse/issues/27578). [#27689](https://github.com/ClickHouse/ClickHouse/pull/27689) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bad type cast when functions like `arrayHas` are applied to arrays of LowCardinality of Nullable of different non-numeric types like `DateTime` and `DateTime64`. In previous versions bad cast occurs. In new version it will lead to exception. This closes [#26330](https://github.com/ClickHouse/ClickHouse/issues/26330). [#27682](https://github.com/ClickHouse/ClickHouse/pull/27682) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix postgresql table function resulting in non-closing connections. Closes [#26088](https://github.com/ClickHouse/ClickHouse/issues/26088). [#27662](https://github.com/ClickHouse/ClickHouse/pull/27662) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed another case of `Unexpected merged part ... intersecting drop range ...` error. [#27656](https://github.com/ClickHouse/ClickHouse/pull/27656) ([tavplubix](https://github.com/tavplubix)). +* Fix an error with aliased column in `Distributed` table. [#27652](https://github.com/ClickHouse/ClickHouse/pull/27652) ([Vladimir C](https://github.com/vdimir)). +* After setting `max_memory_usage*` to non-zero value it was not possible to reset it back to 0 (unlimited). It's fixed. [#27638](https://github.com/ClickHouse/ClickHouse/pull/27638) ([tavplubix](https://github.com/tavplubix)). +* Fixed underflow of the time value when constructing it from components. Closes [#27193](https://github.com/ClickHouse/ClickHouse/issues/27193). [#27605](https://github.com/ClickHouse/ClickHouse/pull/27605) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix crash during projection materialization when some parts contain missing columns. This fixes [#27512](https://github.com/ClickHouse/ClickHouse/issues/27512). [#27528](https://github.com/ClickHouse/ClickHouse/pull/27528) ([Amos Bird](https://github.com/amosbird)). +* fix metric `BackgroundMessageBrokerSchedulePoolTask`, maybe mistyped. [#27452](https://github.com/ClickHouse/ClickHouse/pull/27452) ([Ben](https://github.com/benbiti)). +* Fix distributed queries with zero shards and aggregation. [#27427](https://github.com/ClickHouse/ClickHouse/pull/27427) ([Azat Khuzhin](https://github.com/azat)). +* Compatibility when `/proc/meminfo` does not contain KB suffix. [#27361](https://github.com/ClickHouse/ClickHouse/pull/27361) ([Mike Kot](https://github.com/myrrc)). +* Fix incorrect result for query with row-level security, PREWHERE and LowCardinality filter. Fixes [#27179](https://github.com/ClickHouse/ClickHouse/issues/27179). [#27329](https://github.com/ClickHouse/ClickHouse/pull/27329) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed incorrect validation of partition id for MergeTree tables that created with old syntax. [#27328](https://github.com/ClickHouse/ClickHouse/pull/27328) ([tavplubix](https://github.com/tavplubix)). +* Fix MySQL protocol when using parallel formats (CSV / TSV). [#27326](https://github.com/ClickHouse/ClickHouse/pull/27326) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `Cannot find column` error for queries with sampling. Was introduced in [#24574](https://github.com/ClickHouse/ClickHouse/issues/24574). Fixes [#26522](https://github.com/ClickHouse/ClickHouse/issues/26522). [#27301](https://github.com/ClickHouse/ClickHouse/pull/27301) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix errors like `Expected ColumnLowCardinality, gotUInt8` or `Bad cast from type DB::ColumnVector to DB::ColumnLowCardinality` for some queries with `LowCardinality` in `PREWHERE`. And more importantly, fix the lack of whitespace in the error message. Fixes [#23515](https://github.com/ClickHouse/ClickHouse/issues/23515). [#27298](https://github.com/ClickHouse/ClickHouse/pull/27298) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `distributed_group_by_no_merge = 2` with `distributed_push_down_limit = 1` or `optimize_distributed_group_by_sharding_key = 1` with `LIMIT BY` and `LIMIT OFFSET`. [#27249](https://github.com/ClickHouse/ClickHouse/pull/27249) ([Azat Khuzhin](https://github.com/azat)). These are obscure combination of settings that no one is using. +* Fix mutation stuck on invalid partitions in non-replicated MergeTree. [#27248](https://github.com/ClickHouse/ClickHouse/pull/27248) ([Azat Khuzhin](https://github.com/azat)). +* In case of ambiguity, lambda functions prefer its arguments to other aliases or identifiers. [#27235](https://github.com/ClickHouse/ClickHouse/pull/27235) ([Raúl Marín](https://github.com/Algunenano)). +* Fix column structure in merge join, close [#27091](https://github.com/ClickHouse/ClickHouse/issues/27091). [#27217](https://github.com/ClickHouse/ClickHouse/pull/27217) ([Vladimir C](https://github.com/vdimir)). +* In rare cases `system.detached_parts` table might contain incorrect information for some parts, it's fixed. Fixes [#27114](https://github.com/ClickHouse/ClickHouse/issues/27114). [#27183](https://github.com/ClickHouse/ClickHouse/pull/27183) ([tavplubix](https://github.com/tavplubix)). +* Fix uninitialized memory in functions `multiSearch*` with empty array, close [#27169](https://github.com/ClickHouse/ClickHouse/issues/27169). [#27181](https://github.com/ClickHouse/ClickHouse/pull/27181) ([Vladimir C](https://github.com/vdimir)). +* Fix synchronization in GRPCServer. This PR fixes [#27024](https://github.com/ClickHouse/ClickHouse/issues/27024). [#27064](https://github.com/ClickHouse/ClickHouse/pull/27064) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache` configuration parsing. Options `allow_read_expired_keys`, `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds` were not parsed for dictionaries with non `cache` type. [#27032](https://github.com/ClickHouse/ClickHouse/pull/27032) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible mutation stack due to race with DROP_RANGE. [#27002](https://github.com/ClickHouse/ClickHouse/pull/27002) ([Azat Khuzhin](https://github.com/azat)). +* Now partition ID in queries like `ALTER TABLE ... PARTITION ID xxx` validates for correctness. Fixes [#25718](https://github.com/ClickHouse/ClickHouse/issues/25718). [#26963](https://github.com/ClickHouse/ClickHouse/pull/26963) ([alesapin](https://github.com/alesapin)). +* Fix "Unknown column name" error with multiple JOINs in some cases, close [#26899](https://github.com/ClickHouse/ClickHouse/issues/26899). [#26957](https://github.com/ClickHouse/ClickHouse/pull/26957) ([Vladimir C](https://github.com/vdimir)). +* Fix reading of custom TLDs (stops processing with lower buffer or bigger file). [#26948](https://github.com/ClickHouse/ClickHouse/pull/26948) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `Missing columns: 'xxx'` when `DEFAULT` column references other non materialized column without `DEFAULT` expression. Fixes [#26591](https://github.com/ClickHouse/ClickHouse/issues/26591). [#26900](https://github.com/ClickHouse/ClickHouse/pull/26900) ([alesapin](https://github.com/alesapin)). +* Fix loading of dictionary keys in `library-bridge` for `library` dictionary source. [#26834](https://github.com/ClickHouse/ClickHouse/pull/26834) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Aggregate function parameters might be lost when applying some combinators causing exceptions like `Conversion from AggregateFunction(topKArray, Array(String)) to AggregateFunction(topKArray(10), Array(String)) is not supported`. It's fixed. Fixes [#26196](https://github.com/ClickHouse/ClickHouse/issues/26196) and [#26433](https://github.com/ClickHouse/ClickHouse/issues/26433). [#26814](https://github.com/ClickHouse/ClickHouse/pull/26814) ([tavplubix](https://github.com/tavplubix)). +* Add `event_time_microseconds` value for `REMOVE_PART` in `system.part_log`. In previous versions is was not set. [#26720](https://github.com/ClickHouse/ClickHouse/pull/26720) ([Azat Khuzhin](https://github.com/azat)). +* Do not remove data on ReplicatedMergeTree table shutdown to avoid creating data to metadata inconsistency. [#26716](https://github.com/ClickHouse/ClickHouse/pull/26716) ([nvartolomei](https://github.com/nvartolomei)). +* Sometimes `SET ROLE` could work incorrectly, this PR fixes that. [#26707](https://github.com/ClickHouse/ClickHouse/pull/26707) ([Vitaly Baranov](https://github.com/vitlibar)). +* Some fixes for parallel formatting (https://github.com/ClickHouse/ClickHouse/issues/26694). [#26703](https://github.com/ClickHouse/ClickHouse/pull/26703) ([Raúl Marín](https://github.com/Algunenano)). +* Fix potential nullptr dereference in window functions. This fixes [#25276](https://github.com/ClickHouse/ClickHouse/issues/25276). [#26668](https://github.com/ClickHouse/ClickHouse/pull/26668) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix clickhouse-client history file conversion (when upgrading from the format of 3 years old version of clickhouse-client) if file is empty. [#26589](https://github.com/ClickHouse/ClickHouse/pull/26589) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect function names of groupBitmapAnd/Or/Xor (can be displayed in some occasions). This fixes. [#26557](https://github.com/ClickHouse/ClickHouse/pull/26557) ([Amos Bird](https://github.com/amosbird)). +* Update `chown` cmd check in clickhouse-server docker entrypoint. It fixes the bug that cluster pod restart failed (or timeout) on kubernetes. [#26545](https://github.com/ClickHouse/ClickHouse/pull/26545) ([Ky Li](https://github.com/Kylinrix)). +* Fix crash in `RabbitMQ` shutdown in case `RabbitMQ` setup was not started. Closes [#26504](https://github.com/ClickHouse/ClickHouse/issues/26504). [#26529](https://github.com/ClickHouse/ClickHouse/pull/26529) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix issues with `CREATE DICTIONARY` query if dictionary name or database name was quoted. Closes [#26491](https://github.com/ClickHouse/ClickHouse/issues/26491). [#26508](https://github.com/ClickHouse/ClickHouse/pull/26508) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix broken column name resolution after rewriting column aliases. This fixes [#26432](https://github.com/ClickHouse/ClickHouse/issues/26432). [#26475](https://github.com/ClickHouse/ClickHouse/pull/26475) ([Amos Bird](https://github.com/amosbird)). +* Fix some fuzzed msan crash. Fixes [#22517](https://github.com/ClickHouse/ClickHouse/issues/22517). [#26428](https://github.com/ClickHouse/ClickHouse/pull/26428) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix infinite non joined block stream in `partial_merge_join` close [#26325](https://github.com/ClickHouse/ClickHouse/issues/26325). [#26374](https://github.com/ClickHouse/ClickHouse/pull/26374) ([Vladimir C](https://github.com/vdimir)). +* Fix possible crash when login as dropped user. This PR fixes [#26073](https://github.com/ClickHouse/ClickHouse/issues/26073). [#26363](https://github.com/ClickHouse/ClickHouse/pull/26363) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `optimize_distributed_group_by_sharding_key` for multiple columns (leads to incorrect result w/ `optimize_skip_unused_shards=1`/`allow_nondeterministic_optimize_skip_unused_shards=1` and multiple columns in sharding key expression). [#26353](https://github.com/ClickHouse/ClickHouse/pull/26353) ([Azat Khuzhin](https://github.com/azat)). +* Fixed rare bug in lost replica recovery that may cause replicas to diverge. [#26321](https://github.com/ClickHouse/ClickHouse/pull/26321) ([tavplubix](https://github.com/tavplubix)). +* Fix zstd decompression (for import/export in zstd framing format that is unrelated to tables data) in case there are escape sequences at the end of internal buffer. Closes [#26013](https://github.com/ClickHouse/ClickHouse/issues/26013). [#26314](https://github.com/ClickHouse/ClickHouse/pull/26314) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix logical error on join with totals, close [#26017](https://github.com/ClickHouse/ClickHouse/issues/26017). [#26250](https://github.com/ClickHouse/ClickHouse/pull/26250) ([Vladimir C](https://github.com/vdimir)). +* Remove excessive newline in `thread_name` column in `system.stack_trace` table. This fixes [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential crash if more than one `untuple` expression is used. [#26179](https://github.com/ClickHouse/ClickHouse/pull/26179) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Don't throw exception in `toString` for Nullable Enum if Enum does not have a value for zero, close [#25806](https://github.com/ClickHouse/ClickHouse/issues/25806). [#26123](https://github.com/ClickHouse/ClickHouse/pull/26123) ([Vladimir C](https://github.com/vdimir)). +* Fixed incorrect `sequence_id` in MySQL protocol packets that ClickHouse sends on exception during query execution. It might cause MySQL client to reset connection to ClickHouse server. Fixes [#21184](https://github.com/ClickHouse/ClickHouse/issues/21184). [#26051](https://github.com/ClickHouse/ClickHouse/pull/26051) ([tavplubix](https://github.com/tavplubix)). +* Fix for the case that `cutToFirstSignificantSubdomainCustom()`/`cutToFirstSignificantSubdomainCustomWithWWW()`/`firstSignificantSubdomainCustom()` returns incorrect type for consts, and hence `optimize_skip_unused_shards` does not work:. [#26041](https://github.com/ClickHouse/ClickHouse/pull/26041) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible mismatched header when using normal projection with prewhere. This fixes [#26020](https://github.com/ClickHouse/ClickHouse/issues/26020). [#26038](https://github.com/ClickHouse/ClickHouse/pull/26038) ([Amos Bird](https://github.com/amosbird)). +* Fix sharding_key from column w/o function for remote() (before `select * from remote('127.1', system.one, dummy)` leads to `Unknown column: dummy, there are only columns .` error). [#25824](https://github.com/ClickHouse/ClickHouse/pull/25824) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `Not found column ...` and `Missing column ...` errors when selecting from `MaterializeMySQL`. Fixes [#23708](https://github.com/ClickHouse/ClickHouse/issues/23708), [#24830](https://github.com/ClickHouse/ClickHouse/issues/24830), [#25794](https://github.com/ClickHouse/ClickHouse/issues/25794). [#25822](https://github.com/ClickHouse/ClickHouse/pull/25822) ([tavplubix](https://github.com/tavplubix)). +* Fix `optimize_skip_unused_shards_rewrite_in` for non-UInt64 types (may select incorrect shards eventually or throw `Cannot infer type of an empty tuple` or `Function tuple requires at least one argument`). [#25798](https://github.com/ClickHouse/ClickHouse/pull/25798) ([Azat Khuzhin](https://github.com/azat)). + +#### Build/Testing/Packaging Improvement + +* Now we ran stateful and stateless tests in random timezones. Fixes [#12439](https://github.com/ClickHouse/ClickHouse/issues/12439). Reading String as DateTime and writing DateTime as String in Protobuf format now respect timezone. Reading UInt16 as DateTime in Arrow and Parquet formats now treat it as Date and then converts to DateTime with respect to DateTime's timezone, because Date is serialized in Arrow and Parquet as UInt16. GraphiteMergeTree now respect time zone for rounding of times. Fixes [#5098](https://github.com/ClickHouse/ClickHouse/issues/5098). Author: @alexey-milovidov. [#15408](https://github.com/ClickHouse/ClickHouse/pull/15408) ([alesapin](https://github.com/alesapin)). +* `clickhouse-test` supports SQL tests with [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/templates/#synopsis) templates. [#26579](https://github.com/ClickHouse/ClickHouse/pull/26579) ([Vladimir C](https://github.com/vdimir)). +* Add support for build with `clang-13`. This closes [#27705](https://github.com/ClickHouse/ClickHouse/issues/27705). [#27714](https://github.com/ClickHouse/ClickHouse/pull/27714) ([alexey-milovidov](https://github.com/alexey-milovidov)). [#27777](https://github.com/ClickHouse/ClickHouse/pull/27777) ([Sergei Semin](https://github.com/syominsergey)) +* Add CMake options to build with or without specific CPU instruction set. This is for [#17469](https://github.com/ClickHouse/ClickHouse/issues/17469) and [#27509](https://github.com/ClickHouse/ClickHouse/issues/27509). [#27508](https://github.com/ClickHouse/ClickHouse/pull/27508) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix linking of auxiliar programs when using dynamic libraries. [#26958](https://github.com/ClickHouse/ClickHouse/pull/26958) ([Raúl Marín](https://github.com/Algunenano)). +* Update RocksDB to `2021-07-16` master. [#26411](https://github.com/ClickHouse/ClickHouse/pull/26411) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +### ClickHouse release v21.8, 2021-08-12 + +#### Upgrade Notes +* New version is using `Map` data type for system logs tables (`system.query_log`, `system.query_thread_log`, `system.processes`, `system.opentelemetry_span_log`). These tables will be auto-created with new data types. Virtual columns are created to support old queries. Closes [#18698](https://github.com/ClickHouse/ClickHouse/issues/18698). [#23934](https://github.com/ClickHouse/ClickHouse/pull/23934), [#25773](https://github.com/ClickHouse/ClickHouse/pull/25773) ([hexiaoting](https://github.com/hexiaoting), [sundy-li](https://github.com/sundy-li), [Maksim Kita](https://github.com/kitaisreal)). If you want to *downgrade* from version 21.8 to older versions, you will need to cleanup system tables with logs manually. Look at `/var/lib/clickhouse/data/system/*_log`. + +#### New Features + +* Add support for a part of SQL/JSON standard. [#24148](https://github.com/ClickHouse/ClickHouse/pull/24148) ([l1tsolaiki](https://github.com/l1tsolaiki), [Kseniia Sumarokova](https://github.com/kssenii)). +* Collect common system metrics (in `system.asynchronous_metrics` and `system.asynchronous_metric_log`) on CPU usage, disk usage, memory usage, IO, network, files, load average, CPU frequencies, thermal sensors, EDAC counters, system uptime; also added metrics about the scheduling jitter and the time spent collecting the metrics. It works similar to `atop` in ClickHouse and allows access to monitoring data even if you have no additional tools installed. Close [#9430](https://github.com/ClickHouse/ClickHouse/issues/9430). [#24416](https://github.com/ClickHouse/ClickHouse/pull/24416) ([alexey-milovidov](https://github.com/alexey-milovidov), [Yegor Levankov](https://github.com/elevankoff)). +* Add MaterializedPostgreSQL table engine and database engine. This database engine allows replicating a whole database or any subset of database tables. [#20470](https://github.com/ClickHouse/ClickHouse/pull/20470) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add new functions `leftPad()`, `rightPad()`, `leftPadUTF8()`, `rightPadUTF8()`. [#26075](https://github.com/ClickHouse/ClickHouse/pull/26075) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add the `FIRST` keyword to the `ADD INDEX` command to be able to add the index at the beginning of the indices list. [#25904](https://github.com/ClickHouse/ClickHouse/pull/25904) ([xjewer](https://github.com/xjewer)). +* Introduce `system.data_skipping_indices` table containing information about existing data skipping indices. Close [#7659](https://github.com/ClickHouse/ClickHouse/issues/7659). [#25693](https://github.com/ClickHouse/ClickHouse/pull/25693) ([Dmitry Novik](https://github.com/novikd)). +* Add `bin`/`unbin` functions. [#25609](https://github.com/ClickHouse/ClickHouse/pull/25609) ([zhaoyu](https://github.com/zxc111)). +* Support `Map` and `UInt128`, `Int128`, `UInt256`, `Int256` types in `mapAdd` and `mapSubtract` functions. [#25596](https://github.com/ClickHouse/ClickHouse/pull/25596) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Support `DISTINCT ON (columns)` expression, close [#25404](https://github.com/ClickHouse/ClickHouse/issues/25404). [#25589](https://github.com/ClickHouse/ClickHouse/pull/25589) ([Zijie Lu](https://github.com/TszKitLo40)). +* Add an ability to reset a custom setting to default and remove it from the table's metadata. It allows rolling back the change without knowing the system/config's default. Closes [#14449](https://github.com/ClickHouse/ClickHouse/issues/14449). [#17769](https://github.com/ClickHouse/ClickHouse/pull/17769) ([xjewer](https://github.com/xjewer)). +* Render pipelines as graphs in Web UI if `EXPLAIN PIPELINE graph = 1` query is submitted. [#26067](https://github.com/ClickHouse/ClickHouse/pull/26067) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvements + +* Compile aggregate functions. Use option `compile_aggregate_expressions` to enable it. [#24789](https://github.com/ClickHouse/ClickHouse/pull/24789) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve latency of short queries that require reading from tables with many columns. [#26371](https://github.com/ClickHouse/ClickHouse/pull/26371) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvements + +* Use `Map` data type for system logs tables (`system.query_log`, `system.query_thread_log`, `system.processes`, `system.opentelemetry_span_log`). These tables will be auto-created with new data types. Virtual columns are created to support old queries. Closes [#18698](https://github.com/ClickHouse/ClickHouse/issues/18698). [#23934](https://github.com/ClickHouse/ClickHouse/pull/23934), [#25773](https://github.com/ClickHouse/ClickHouse/pull/25773) ([hexiaoting](https://github.com/hexiaoting), [sundy-li](https://github.com/sundy-li), [Maksim Kita](https://github.com/kitaisreal)). +* For a dictionary with a complex key containing only one attribute, allow not wrapping the key expression in tuple for functions `dictGet`, `dictHas`. [#26130](https://github.com/ClickHouse/ClickHouse/pull/26130) ([Maksim Kita](https://github.com/kitaisreal)). +* Implement function `bin`/`hex` from `AggregateFunction` states. [#26094](https://github.com/ClickHouse/ClickHouse/pull/26094) ([zhaoyu](https://github.com/zxc111)). +* Support arguments of `UUID` type for `empty` and `notEmpty` functions. `UUID` is empty if it is all zeros (nil UUID). Closes [#3446](https://github.com/ClickHouse/ClickHouse/issues/3446). [#25974](https://github.com/ClickHouse/ClickHouse/pull/25974) ([zhaoyu](https://github.com/zxc111)). +* Add support for `SET SQL_SELECT_LIMIT` in MySQL protocol. Closes [#17115](https://github.com/ClickHouse/ClickHouse/issues/17115). [#25972](https://github.com/ClickHouse/ClickHouse/pull/25972) ([Kseniia Sumarokova](https://github.com/kssenii)). +* More instrumentation for network interaction: add counters for recv/send bytes; add gauges for recvs/sends. Added missing documentation. Close [#5897](https://github.com/ClickHouse/ClickHouse/issues/5897). [#25962](https://github.com/ClickHouse/ClickHouse/pull/25962) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add setting `optimize_move_to_prewhere_if_final`. If query has `FINAL`, the optimization `move_to_prewhere` will be enabled only if both `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are enabled. Closes [#8684](https://github.com/ClickHouse/ClickHouse/issues/8684). [#25940](https://github.com/ClickHouse/ClickHouse/pull/25940) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow complex quoted identifiers of JOINed tables. Close [#17861](https://github.com/ClickHouse/ClickHouse/issues/17861). [#25924](https://github.com/ClickHouse/ClickHouse/pull/25924) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support for Unicode (e.g. Chinese, Cyrillic) components in `Nested` data types. Close [#25594](https://github.com/ClickHouse/ClickHouse/issues/25594). [#25923](https://github.com/ClickHouse/ClickHouse/pull/25923) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow `quantiles*` functions to work with `aggregate_functions_null_for_empty`. Close [#25892](https://github.com/ClickHouse/ClickHouse/issues/25892). [#25919](https://github.com/ClickHouse/ClickHouse/pull/25919) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow parameters for parametric aggregate functions to be arbitrary constant expressions (e.g., `1 + 2`), not just literals. It also allows using the query parameters (in parameterized queries like `{param:UInt8}`) inside parametric aggregate functions. Closes [#11607](https://github.com/ClickHouse/ClickHouse/issues/11607). [#25910](https://github.com/ClickHouse/ClickHouse/pull/25910) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Correctly throw the exception on the attempt to parse an invalid `Date`. Closes [#6481](https://github.com/ClickHouse/ClickHouse/issues/6481). [#25909](https://github.com/ClickHouse/ClickHouse/pull/25909) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support for multiple includes in configuration. It is possible to include users configuration, remote server configuration from multiple sources. Simply place `` element with `from_zk`, `from_env` or `incl` attribute, and it will be replaced with the substitution. [#24404](https://github.com/ClickHouse/ClickHouse/pull/24404) ([nvartolomei](https://github.com/nvartolomei)). +* Support for queries with a column named `"null"` (it must be specified in back-ticks or double quotes) and `ON CLUSTER`. Closes [#24035](https://github.com/ClickHouse/ClickHouse/issues/24035). [#25907](https://github.com/ClickHouse/ClickHouse/pull/25907) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support `LowCardinality`, `Decimal`, and `UUID` for `JSONExtract`. Closes [#24606](https://github.com/ClickHouse/ClickHouse/issues/24606). [#25900](https://github.com/ClickHouse/ClickHouse/pull/25900) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Convert history file from `readline` format to `replxx` format. [#25888](https://github.com/ClickHouse/ClickHouse/pull/25888) ([Azat Khuzhin](https://github.com/azat)). +* Fix an issue which can lead to intersecting parts after `DROP PART` or background deletion of an empty part. [#25884](https://github.com/ClickHouse/ClickHouse/pull/25884) ([alesapin](https://github.com/alesapin)). +* Better handling of lost parts for `ReplicatedMergeTree` tables. Fixes rare inconsistencies in `ReplicationQueue`. Fixes [#10368](https://github.com/ClickHouse/ClickHouse/issues/10368). [#25820](https://github.com/ClickHouse/ClickHouse/pull/25820) ([alesapin](https://github.com/alesapin)). +* Allow starting clickhouse-client with unreadable working directory. [#25817](https://github.com/ClickHouse/ClickHouse/pull/25817) ([ianton-ru](https://github.com/ianton-ru)). +* Fix "No available columns" error for `Merge` storage. [#25801](https://github.com/ClickHouse/ClickHouse/pull/25801) ([Azat Khuzhin](https://github.com/azat)). +* MySQL Engine now supports the exchange of column comments between MySQL and ClickHouse. [#25795](https://github.com/ClickHouse/ClickHouse/pull/25795) ([Storozhuk Kostiantyn](https://github.com/sand6255)). +* Fix inconsistent behaviour of `GROUP BY` constant on empty set. Closes [#6842](https://github.com/ClickHouse/ClickHouse/issues/6842). [#25786](https://github.com/ClickHouse/ClickHouse/pull/25786) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Cancel already running merges in partition on `DROP PARTITION` and `TRUNCATE` for `ReplicatedMergeTree`. Resolves [#17151](https://github.com/ClickHouse/ClickHouse/issues/17151). [#25684](https://github.com/ClickHouse/ClickHouse/pull/25684) ([tavplubix](https://github.com/tavplubix)). +* Support ENUM` data type for MaterializeMySQL. [#25676](https://github.com/ClickHouse/ClickHouse/pull/25676) ([Storozhuk Kostiantyn](https://github.com/sand6255)). +* Support materialized and aliased columns in JOIN, close [#13274](https://github.com/ClickHouse/ClickHouse/issues/13274). [#25634](https://github.com/ClickHouse/ClickHouse/pull/25634) ([Vladimir C](https://github.com/vdimir)). +* Fix possible logical race condition between `ALTER TABLE ... DETACH` and background merges. [#25605](https://github.com/ClickHouse/ClickHouse/pull/25605) ([Azat Khuzhin](https://github.com/azat)). +* Make `NetworkReceiveElapsedMicroseconds` metric to correctly include the time spent waiting for data from the client to `INSERT`. Close [#9958](https://github.com/ClickHouse/ClickHouse/issues/9958). [#25602](https://github.com/ClickHouse/ClickHouse/pull/25602) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support `TRUNCATE TABLE` for S3 and HDFS. Close [#25530](https://github.com/ClickHouse/ClickHouse/issues/25530). [#25550](https://github.com/ClickHouse/ClickHouse/pull/25550) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support for dynamic reloading of config to change number of threads in pool for background jobs execution (merges, mutations, fetches). [#25548](https://github.com/ClickHouse/ClickHouse/pull/25548) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow extracting of non-string element as string using `JSONExtract`. This is for [#25414](https://github.com/ClickHouse/ClickHouse/issues/25414). [#25452](https://github.com/ClickHouse/ClickHouse/pull/25452) ([Amos Bird](https://github.com/amosbird)). +* Support regular expression in `Database` argument for `StorageMerge`. Close [#776](https://github.com/ClickHouse/ClickHouse/issues/776). [#25064](https://github.com/ClickHouse/ClickHouse/pull/25064) ([flynn](https://github.com/ucasfl)). +* Web UI: if the value looks like a URL, automatically generate a link. [#25965](https://github.com/ClickHouse/ClickHouse/pull/25965) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make `sudo service clickhouse-server start` to work on systems with `systemd` like Centos 8. Close [#14298](https://github.com/ClickHouse/ClickHouse/issues/14298). Close [#17799](https://github.com/ClickHouse/ClickHouse/issues/17799). [#25921](https://github.com/ClickHouse/ClickHouse/pull/25921) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fixes + +* Fix incorrect `SET ROLE` in some cases. [#26707](https://github.com/ClickHouse/ClickHouse/pull/26707) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix potential `nullptr` dereference in window functions. Fix [#25276](https://github.com/ClickHouse/ClickHouse/issues/25276). [#26668](https://github.com/ClickHouse/ClickHouse/pull/26668) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix incorrect function names of `groupBitmapAnd/Or/Xor`. Fix [#26557](https://github.com/ClickHouse/ClickHouse/pull/26557) ([Amos Bird](https://github.com/amosbird)). +* Fix crash in RabbitMQ shutdown in case RabbitMQ setup was not started. Closes [#26504](https://github.com/ClickHouse/ClickHouse/issues/26504). [#26529](https://github.com/ClickHouse/ClickHouse/pull/26529) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix issues with `CREATE DICTIONARY` query if dictionary name or database name was quoted. Closes [#26491](https://github.com/ClickHouse/ClickHouse/issues/26491). [#26508](https://github.com/ClickHouse/ClickHouse/pull/26508) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix broken name resolution after rewriting column aliases. Fix [#26432](https://github.com/ClickHouse/ClickHouse/issues/26432). [#26475](https://github.com/ClickHouse/ClickHouse/pull/26475) ([Amos Bird](https://github.com/amosbird)). +* Fix infinite non-joined block stream in `partial_merge_join` close [#26325](https://github.com/ClickHouse/ClickHouse/issues/26325). [#26374](https://github.com/ClickHouse/ClickHouse/pull/26374) ([Vladimir C](https://github.com/vdimir)). +* Fix possible crash when login as dropped user. Fix [#26073](https://github.com/ClickHouse/ClickHouse/issues/26073). [#26363](https://github.com/ClickHouse/ClickHouse/pull/26363) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `optimize_distributed_group_by_sharding_key` for multiple columns (leads to incorrect result w/ `optimize_skip_unused_shards=1`/`allow_nondeterministic_optimize_skip_unused_shards=1` and multiple columns in sharding key expression). [#26353](https://github.com/ClickHouse/ClickHouse/pull/26353) ([Azat Khuzhin](https://github.com/azat)). +* `CAST` from `Date` to `DateTime` (or `DateTime64`) was not using the timezone of the `DateTime` type. It can also affect the comparison between `Date` and `DateTime`. Inference of the common type for `Date` and `DateTime` also was not using the corresponding timezone. It affected the results of function `if` and array construction. Closes [#24128](https://github.com/ClickHouse/ClickHouse/issues/24128). [#24129](https://github.com/ClickHouse/ClickHouse/pull/24129) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed rare bug in lost replica recovery that may cause replicas to diverge. [#26321](https://github.com/ClickHouse/ClickHouse/pull/26321) ([tavplubix](https://github.com/tavplubix)). +* Fix zstd decompression in case there are escape sequences at the end of internal buffer. Closes [#26013](https://github.com/ClickHouse/ClickHouse/issues/26013). [#26314](https://github.com/ClickHouse/ClickHouse/pull/26314) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix logical error on join with totals, close [#26017](https://github.com/ClickHouse/ClickHouse/issues/26017). [#26250](https://github.com/ClickHouse/ClickHouse/pull/26250) ([Vladimir C](https://github.com/vdimir)). +* Remove excessive newline in `thread_name` column in `system.stack_trace` table. Fix [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `joinGet` with `LowCarinality` columns, close [#25993](https://github.com/ClickHouse/ClickHouse/issues/25993). [#26118](https://github.com/ClickHouse/ClickHouse/pull/26118) ([Vladimir C](https://github.com/vdimir)). +* Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix throwing exception when iterate over non-existing remote directory. [#26087](https://github.com/ClickHouse/ClickHouse/pull/26087) ([ianton-ru](https://github.com/ianton-ru)). +* Fix rare server crash because of `abort` in ZooKeeper client. Fixes [#25813](https://github.com/ClickHouse/ClickHouse/issues/25813). [#26079](https://github.com/ClickHouse/ClickHouse/pull/26079) ([alesapin](https://github.com/alesapin)). +* Fix wrong thread count estimation for right subquery join in some cases. Close [#24075](https://github.com/ClickHouse/ClickHouse/issues/24075). [#26052](https://github.com/ClickHouse/ClickHouse/pull/26052) ([Vladimir C](https://github.com/vdimir)). +* Fixed incorrect `sequence_id` in MySQL protocol packets that ClickHouse sends on exception during query execution. It might cause MySQL client to reset connection to ClickHouse server. Fixes [#21184](https://github.com/ClickHouse/ClickHouse/issues/21184). [#26051](https://github.com/ClickHouse/ClickHouse/pull/26051) ([tavplubix](https://github.com/tavplubix)). +* Fix possible mismatched header when using normal projection with `PREWHERE`. Fix [#26020](https://github.com/ClickHouse/ClickHouse/issues/26020). [#26038](https://github.com/ClickHouse/ClickHouse/pull/26038) ([Amos Bird](https://github.com/amosbird)). +* Fix formatting of type `Map` with integer keys to `JSON`. [#25982](https://github.com/ClickHouse/ClickHouse/pull/25982) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible deadlock during query profiler stack unwinding. Fix [#25968](https://github.com/ClickHouse/ClickHouse/issues/25968). [#25970](https://github.com/ClickHouse/ClickHouse/pull/25970) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash on call `dictGet()` with bad arguments. [#25913](https://github.com/ClickHouse/ClickHouse/pull/25913) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed `scram-sha-256` authentication for PostgreSQL engines. Closes [#24516](https://github.com/ClickHouse/ClickHouse/issues/24516). [#25906](https://github.com/ClickHouse/ClickHouse/pull/25906) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix extremely long backoff for background tasks when the background pool is full. Fixes [#25836](https://github.com/ClickHouse/ClickHouse/issues/25836). [#25893](https://github.com/ClickHouse/ClickHouse/pull/25893) ([alesapin](https://github.com/alesapin)). +* Fix ARM exception handling with non default page size. Fixes [#25512](https://github.com/ClickHouse/ClickHouse/issues/25512), [#25044](https://github.com/ClickHouse/ClickHouse/issues/25044), [#24901](https://github.com/ClickHouse/ClickHouse/issues/24901), [#23183](https://github.com/ClickHouse/ClickHouse/issues/23183), [#20221](https://github.com/ClickHouse/ClickHouse/issues/20221), [#19703](https://github.com/ClickHouse/ClickHouse/issues/19703), [#19028](https://github.com/ClickHouse/ClickHouse/issues/19028), [#18391](https://github.com/ClickHouse/ClickHouse/issues/18391), [#18121](https://github.com/ClickHouse/ClickHouse/issues/18121), [#17994](https://github.com/ClickHouse/ClickHouse/issues/17994), [#12483](https://github.com/ClickHouse/ClickHouse/issues/12483). [#25854](https://github.com/ClickHouse/ClickHouse/pull/25854) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix sharding_key from column w/o function for `remote()` (before `select * from remote('127.1', system.one, dummy)` leads to `Unknown column: dummy, there are only columns .` error). [#25824](https://github.com/ClickHouse/ClickHouse/pull/25824) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `Not found column ...` and `Missing column ...` errors when selecting from `MaterializeMySQL`. Fixes [#23708](https://github.com/ClickHouse/ClickHouse/issues/23708), [#24830](https://github.com/ClickHouse/ClickHouse/issues/24830), [#25794](https://github.com/ClickHouse/ClickHouse/issues/25794). [#25822](https://github.com/ClickHouse/ClickHouse/pull/25822) ([tavplubix](https://github.com/tavplubix)). +* Fix `optimize_skip_unused_shards_rewrite_in` for non-UInt64 types (may select incorrect shards eventually or throw `Cannot infer type of an empty tuple` or `Function tuple requires at least one argument`). [#25798](https://github.com/ClickHouse/ClickHouse/pull/25798) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare bug with `DROP PART` query for `ReplicatedMergeTree` tables which can lead to error message `Unexpected merged part intersecting drop range`. [#25783](https://github.com/ClickHouse/ClickHouse/pull/25783) ([alesapin](https://github.com/alesapin)). +* Fix bug in `TTL` with `GROUP BY` expression which refuses to execute `TTL` after first execution in part. [#25743](https://github.com/ClickHouse/ClickHouse/pull/25743) ([alesapin](https://github.com/alesapin)). +* Allow StorageMerge to access tables with aliases. Closes [#6051](https://github.com/ClickHouse/ClickHouse/issues/6051). [#25694](https://github.com/ClickHouse/ClickHouse/pull/25694) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix slow dict join in some cases, close [#24209](https://github.com/ClickHouse/ClickHouse/issues/24209). [#25618](https://github.com/ClickHouse/ClickHouse/pull/25618) ([Vladimir C](https://github.com/vdimir)). +* Fix `ALTER MODIFY COLUMN` of columns, which participates in TTL expressions. [#25554](https://github.com/ClickHouse/ClickHouse/pull/25554) ([Anton Popov](https://github.com/CurtizJ)). +* Fix assertion in `PREWHERE` with non-UInt8 type, close [#19589](https://github.com/ClickHouse/ClickHouse/issues/19589). [#25484](https://github.com/ClickHouse/ClickHouse/pull/25484) ([Vladimir C](https://github.com/vdimir)). +* Fix some fuzzed msan crash. Fixes [#22517](https://github.com/ClickHouse/ClickHouse/issues/22517). [#26428](https://github.com/ClickHouse/ClickHouse/pull/26428) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update `chown` cmd check in `clickhouse-server` docker entrypoint. It fixes error 'cluster pod restart failed (or timeout)' on kubernetes. [#26545](https://github.com/ClickHouse/ClickHouse/pull/26545) ([Ky Li](https://github.com/Kylinrix)). + + +### ClickHouse release v21.7, 2021-07-09 + +#### Backward Incompatible Change + +* Improved performance of queries with explicitly defined large sets. Added compatibility setting `legacy_column_name_of_tuple_literal`. It makes sense to set it to `true`, while doing rolling update of cluster from version lower than 21.7 to any higher version. Otherwise distributed queries with explicitly defined sets at `IN` clause may fail during update. [#25371](https://github.com/ClickHouse/ClickHouse/pull/25371) ([Anton Popov](https://github.com/CurtizJ)). +* Forward/backward incompatible change of maximum buffer size in clickhouse-keeper (an experimental alternative to ZooKeeper). Better to do it now (before production), than later. [#25421](https://github.com/ClickHouse/ClickHouse/pull/25421) ([alesapin](https://github.com/alesapin)). + +#### New Feature + +* Support configuration in YAML format as alternative to XML. This closes [#3607](https://github.com/ClickHouse/ClickHouse/issues/3607). [#21858](https://github.com/ClickHouse/ClickHouse/pull/21858) ([BoloniniD](https://github.com/BoloniniD)). +* Provides a way to restore replicated table when the data is (possibly) present, but the ZooKeeper metadata is lost. Resolves [#13458](https://github.com/ClickHouse/ClickHouse/issues/13458). [#13652](https://github.com/ClickHouse/ClickHouse/pull/13652) ([Mike Kot](https://github.com/myrrc)). +* Support structs and maps in Arrow/Parquet/ORC and dictionaries in Arrow input/output formats. Present new setting `output_format_arrow_low_cardinality_as_dictionary`. [#24341](https://github.com/ClickHouse/ClickHouse/pull/24341) ([Kruglov Pavel](https://github.com/Avogar)). +* Added support for `Array` type in dictionaries. [#25119](https://github.com/ClickHouse/ClickHouse/pull/25119) ([Maksim Kita](https://github.com/kitaisreal)). +* Added function `bitPositionsToArray`. Closes [#23792](https://github.com/ClickHouse/ClickHouse/issues/23792). Author [Kevin Wan] (@MaxWk). [#25394](https://github.com/ClickHouse/ClickHouse/pull/25394) ([Maksim Kita](https://github.com/kitaisreal)). +* Added function `dateName` to return names like 'Friday' or 'April'. Author [Daniil Kondratyev] (@dankondr). [#25372](https://github.com/ClickHouse/ClickHouse/pull/25372) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `toJSONString` function to serialize columns to their JSON representations. [#25164](https://github.com/ClickHouse/ClickHouse/pull/25164) ([Amos Bird](https://github.com/amosbird)). +* Now `query_log` has two new columns: `initial_query_start_time`, `initial_query_start_time_microsecond` that record the starting time of a distributed query if any. [#25022](https://github.com/ClickHouse/ClickHouse/pull/25022) ([Amos Bird](https://github.com/amosbird)). +* Add aggregate function `segmentLengthSum`. [#24250](https://github.com/ClickHouse/ClickHouse/pull/24250) ([flynn](https://github.com/ucasfl)). +* Add a new boolean setting `prefer_global_in_and_join` which defaults all IN/JOIN as GLOBAL IN/JOIN. [#23434](https://github.com/ClickHouse/ClickHouse/pull/23434) ([Amos Bird](https://github.com/amosbird)). +* Support `ALTER DELETE` queries for `Join` table engine. [#23260](https://github.com/ClickHouse/ClickHouse/pull/23260) ([foolchi](https://github.com/foolchi)). +* Add `quantileBFloat16` aggregate function as well as the corresponding `quantilesBFloat16` and `medianBFloat16`. It is very simple and fast quantile estimator with relative error not more than 0.390625%. This closes [#16641](https://github.com/ClickHouse/ClickHouse/issues/16641). [#23204](https://github.com/ClickHouse/ClickHouse/pull/23204) ([Ivan Novitskiy](https://github.com/RedClusive)). +* Implement `sequenceNextNode()` function useful for `flow analysis`. [#19766](https://github.com/ClickHouse/ClickHouse/pull/19766) ([achimbab](https://github.com/achimbab)). + +#### Experimental Feature + +* Add support for virtual filesystem over HDFS. [#11058](https://github.com/ClickHouse/ClickHouse/pull/11058) ([overshov](https://github.com/overshov)) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Now clickhouse-keeper (an experimental alternative to ZooKeeper) supports ZooKeeper-like `digest` ACLs. [#24448](https://github.com/ClickHouse/ClickHouse/pull/24448) ([alesapin](https://github.com/alesapin)). + +#### Performance Improvement + +* Added optimization that transforms some functions to reading of subcolumns to reduce amount of read data. E.g., statement `col IS NULL` is transformed to reading of subcolumn `col.null`. Optimization can be enabled by setting `optimize_functions_to_subcolumns` which is currently off by default. [#24406](https://github.com/ClickHouse/ClickHouse/pull/24406) ([Anton Popov](https://github.com/CurtizJ)). +* Rewrite more columns to possible alias expressions. This may enable better optimization, such as projections. [#24405](https://github.com/ClickHouse/ClickHouse/pull/24405) ([Amos Bird](https://github.com/amosbird)). +* Index of type `bloom_filter` can be used for expressions with `hasAny` function with constant arrays. This closes: [#24291](https://github.com/ClickHouse/ClickHouse/issues/24291). [#24900](https://github.com/ClickHouse/ClickHouse/pull/24900) ([Vasily Nemkov](https://github.com/Enmk)). +* Add exponential backoff to reschedule read attempt in case RabbitMQ queues are empty. (ClickHouse has support for importing data from RabbitMQ). Closes [#24340](https://github.com/ClickHouse/ClickHouse/issues/24340). [#24415](https://github.com/ClickHouse/ClickHouse/pull/24415) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement + +* Allow to limit bandwidth for replication. Add two Replicated\*MergeTree settings: `max_replicated_fetches_network_bandwidth` and `max_replicated_sends_network_bandwidth` which allows to limit maximum speed of replicated fetches/sends for table. Add two server-wide settings (in `default` user profile): `max_replicated_fetches_network_bandwidth_for_server` and `max_replicated_sends_network_bandwidth_for_server` which limit maximum speed of replication for all tables. The settings are not followed perfectly accurately. Turned off by default. Fixes [#1821](https://github.com/ClickHouse/ClickHouse/issues/1821). [#24573](https://github.com/ClickHouse/ClickHouse/pull/24573) ([alesapin](https://github.com/alesapin)). +* Resource constraints and isolation for ODBC and Library bridges. Use separate `clickhouse-bridge` group and user for bridge processes. Set oom_score_adj so the bridges will be first subjects for OOM killer. Set set maximum RSS to 1 GiB. Closes [#23861](https://github.com/ClickHouse/ClickHouse/issues/23861). [#25280](https://github.com/ClickHouse/ClickHouse/pull/25280) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add standalone `clickhouse-keeper` symlink to the main `clickhouse` binary. Now it's possible to run coordination without the main clickhouse server. [#24059](https://github.com/ClickHouse/ClickHouse/pull/24059) ([alesapin](https://github.com/alesapin)). +* Use global settings for query to `VIEW`. Fixed the behavior when queries to `VIEW` use local settings, that leads to errors if setting on `CREATE VIEW` and `SELECT` were different. As for now, `VIEW` won't use these modified settings, but you can still pass additional settings in `SETTINGS` section of `CREATE VIEW` query. Close [#20551](https://github.com/ClickHouse/ClickHouse/issues/20551). [#24095](https://github.com/ClickHouse/ClickHouse/pull/24095) ([Vladimir](https://github.com/vdimir)). +* On server start, parts with incorrect partition ID would not be ever removed, but always detached. [#25070](https://github.com/ClickHouse/ClickHouse/issues/25070). [#25166](https://github.com/ClickHouse/ClickHouse/pull/25166) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Increase size of background schedule pool to 128 (`background_schedule_pool_size` setting). It allows avoiding replication queue hung on slow zookeeper connection. [#25072](https://github.com/ClickHouse/ClickHouse/pull/25072) ([alesapin](https://github.com/alesapin)). +* Add merge tree setting `max_parts_to_merge_at_once` which limits the number of parts that can be merged in the background at once. Doesn't affect `OPTIMIZE FINAL` query. Fixes [#1820](https://github.com/ClickHouse/ClickHouse/issues/1820). [#24496](https://github.com/ClickHouse/ClickHouse/pull/24496) ([alesapin](https://github.com/alesapin)). +* Allow `NOT IN` operator to be used in partition pruning. [#24894](https://github.com/ClickHouse/ClickHouse/pull/24894) ([Amos Bird](https://github.com/amosbird)). +* Recognize IPv4 addresses like `127.0.1.1` as local. This is controversial and closes [#23504](https://github.com/ClickHouse/ClickHouse/issues/23504). Michael Filimonov will test this feature. [#24316](https://github.com/ClickHouse/ClickHouse/pull/24316) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* ClickHouse database created with MaterializeMySQL (it is an experimental feature) now contains all column comments from the MySQL database that materialized. [#25199](https://github.com/ClickHouse/ClickHouse/pull/25199) ([Storozhuk Kostiantyn](https://github.com/sand6255)). +* Add settings (`connection_auto_close`/`connection_max_tries`/`connection_pool_size`) for MySQL storage engine. [#24146](https://github.com/ClickHouse/ClickHouse/pull/24146) ([Azat Khuzhin](https://github.com/azat)). +* Improve startup time of Distributed engine. [#25663](https://github.com/ClickHouse/ClickHouse/pull/25663) ([Azat Khuzhin](https://github.com/azat)). +* Improvement for Distributed tables. Drop replicas from dirname for internal_replication=true (allows INSERT into Distributed with cluster from any number of replicas, before only 15 replicas was supported, everything more will fail with ENAMETOOLONG while creating directory for async blocks). [#25513](https://github.com/ClickHouse/ClickHouse/pull/25513) ([Azat Khuzhin](https://github.com/azat)). +* Added support `Interval` type for `LowCardinality`. It is needed for intermediate values of some expressions. Closes [#21730](https://github.com/ClickHouse/ClickHouse/issues/21730). [#25410](https://github.com/ClickHouse/ClickHouse/pull/25410) ([Vladimir](https://github.com/vdimir)). +* Add `==` operator on time conditions for `sequenceMatch` and `sequenceCount` functions. For eg: sequenceMatch('(?1)(?t==1)(?2)')(time, data = 1, data = 2). [#25299](https://github.com/ClickHouse/ClickHouse/pull/25299) ([Christophe Kalenzaga](https://github.com/mga-chka)). +* Add settings `http_max_fields`, `http_max_field_name_size`, `http_max_field_value_size`. [#25296](https://github.com/ClickHouse/ClickHouse/pull/25296) ([Ivan](https://github.com/abyss7)). +* Add support for function `if` with `Decimal` and `Int` types on its branches. This closes [#20549](https://github.com/ClickHouse/ClickHouse/issues/20549). This closes [#10142](https://github.com/ClickHouse/ClickHouse/issues/10142). [#25283](https://github.com/ClickHouse/ClickHouse/pull/25283) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update prompt in `clickhouse-client` and display a message when reconnecting. This closes [#10577](https://github.com/ClickHouse/ClickHouse/issues/10577). [#25281](https://github.com/ClickHouse/ClickHouse/pull/25281) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Correct memory tracking in aggregate function `topK`. This closes [#25259](https://github.com/ClickHouse/ClickHouse/issues/25259). [#25260](https://github.com/ClickHouse/ClickHouse/pull/25260) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `topLevelDomain` for IDN hosts (i.e. `example.рф`), before it returns empty string for such hosts. [#25103](https://github.com/ClickHouse/ClickHouse/pull/25103) ([Azat Khuzhin](https://github.com/azat)). +* Detect Linux kernel version at runtime (for worked nested epoll, that is required for `async_socket_for_remote`/`use_hedged_requests`, otherwise remote queries may stuck). [#25067](https://github.com/ClickHouse/ClickHouse/pull/25067) ([Azat Khuzhin](https://github.com/azat)). +* For distributed query, when `optimize_skip_unused_shards=1`, allow to skip shard with condition like `(sharding key) IN (one-element-tuple)`. (Tuples with many elements were supported. Tuple with single element did not work because it is parsed as literal). [#24930](https://github.com/ClickHouse/ClickHouse/pull/24930) ([Amos Bird](https://github.com/amosbird)). +* Improved log messages of S3 errors, no more double whitespaces in case of empty keys and buckets. [#24897](https://github.com/ClickHouse/ClickHouse/pull/24897) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Some queries require multi-pass semantic analysis. Try reusing built sets for `IN` in this case. [#24874](https://github.com/ClickHouse/ClickHouse/pull/24874) ([Amos Bird](https://github.com/amosbird)). +* Respect `max_distributed_connections` for `insert_distributed_sync` (otherwise for huge clusters and sync insert it may run out of `max_thread_pool_size`). [#24754](https://github.com/ClickHouse/ClickHouse/pull/24754) ([Azat Khuzhin](https://github.com/azat)). +* Avoid hiding errors like `Limit for rows or bytes to read exceeded` for scalar subqueries. [#24545](https://github.com/ClickHouse/ClickHouse/pull/24545) ([nvartolomei](https://github.com/nvartolomei)). +* Make String-to-Int parser stricter so that `toInt64('+')` will throw. [#24475](https://github.com/ClickHouse/ClickHouse/pull/24475) ([Amos Bird](https://github.com/amosbird)). +* If `SSD_CACHE` is created with DDL query, it can be created only inside `user_files` directory. [#24466](https://github.com/ClickHouse/ClickHouse/pull/24466) ([Maksim Kita](https://github.com/kitaisreal)). +* PostgreSQL support for specifying non default schema for insert queries. Closes [#24149](https://github.com/ClickHouse/ClickHouse/issues/24149). [#24413](https://github.com/ClickHouse/ClickHouse/pull/24413) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix IPv6 addresses resolving (i.e. fixes `select * from remote('[::1]', system.one)`). [#24319](https://github.com/ClickHouse/ClickHouse/pull/24319) ([Azat Khuzhin](https://github.com/azat)). +* Fix trailing whitespaces in FROM clause with subqueries in multiline mode, and also changes the output of the queries slightly in a more human friendly way. [#24151](https://github.com/ClickHouse/ClickHouse/pull/24151) ([Azat Khuzhin](https://github.com/azat)). +* Improvement for Distributed tables. Add ability to split distributed batch on failures (i.e. due to memory limits, corruptions), under `distributed_directory_monitor_split_batch_on_failure` (OFF by default). [#23864](https://github.com/ClickHouse/ClickHouse/pull/23864) ([Azat Khuzhin](https://github.com/azat)). +* Handle column name clashes for `Join` table engine. Closes [#20309](https://github.com/ClickHouse/ClickHouse/issues/20309). [#23769](https://github.com/ClickHouse/ClickHouse/pull/23769) ([Vladimir](https://github.com/vdimir)). +* Display progress for `File` table engine in `clickhouse-local` and on INSERT query in `clickhouse-client` when data is passed to stdin. Closes [#18209](https://github.com/ClickHouse/ClickHouse/issues/18209). [#23656](https://github.com/ClickHouse/ClickHouse/pull/23656) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Bugfixes and improvements of `clickhouse-copier`. Allow to copy tables with different (but compatible schemas). Closes [#9159](https://github.com/ClickHouse/ClickHouse/issues/9159). Added test to copy ReplacingMergeTree. Closes [#22711](https://github.com/ClickHouse/ClickHouse/issues/22711). Support TTL on columns and Data Skipping Indices. It simply removes it to create internal Distributed table (underlying table will have TTL and skipping indices). Closes [#19384](https://github.com/ClickHouse/ClickHouse/issues/19384). Allow to copy MATERIALIZED and ALIAS columns. There are some cases in which it could be helpful (e.g. if this column is in PRIMARY KEY). Now it could be allowed by setting `allow_to_copy_alias_and_materialized_columns` property to true in task configuration. Closes [#9177](https://github.com/ClickHouse/ClickHouse/issues/9177). Closes [#11007] (https://github.com/ClickHouse/ClickHouse/issues/11007). Closes [#9514](https://github.com/ClickHouse/ClickHouse/issues/9514). Added a property `allow_to_drop_target_partitions` in task configuration to drop partition in original table before moving helping tables. Closes [#20957](https://github.com/ClickHouse/ClickHouse/issues/20957). Get rid of `OPTIMIZE DEDUPLICATE` query. This hack was needed, because `ALTER TABLE MOVE PARTITION` was retried many times and plain MergeTree tables don't have deduplication. Closes [#17966](https://github.com/ClickHouse/ClickHouse/issues/17966). Write progress to ZooKeeper node on path `task_path + /status` in JSON format. Closes [#20955](https://github.com/ClickHouse/ClickHouse/issues/20955). Support for ReplicatedTables without arguments. Closes [#24834](https://github.com/ClickHouse/ClickHouse/issues/24834) .[#23518](https://github.com/ClickHouse/ClickHouse/pull/23518) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added sleep with backoff between read retries from S3. [#23461](https://github.com/ClickHouse/ClickHouse/pull/23461) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Respect `insert_allow_materialized_columns` (allows materialized columns) for INSERT into `Distributed` table. [#23349](https://github.com/ClickHouse/ClickHouse/pull/23349) ([Azat Khuzhin](https://github.com/azat)). +* Add ability to push down LIMIT for distributed queries. [#23027](https://github.com/ClickHouse/ClickHouse/pull/23027) ([Azat Khuzhin](https://github.com/azat)). +* Fix zero-copy replication with several S3 volumes (Fixes [#22679](https://github.com/ClickHouse/ClickHouse/issues/22679)). [#22864](https://github.com/ClickHouse/ClickHouse/pull/22864) ([ianton-ru](https://github.com/ianton-ru)). +* Resolve the actual port number bound when a user requests any available port from the operating system to show it in the log message. [#25569](https://github.com/ClickHouse/ClickHouse/pull/25569) ([bnaecker](https://github.com/bnaecker)). +* Fixed case, when sometimes conversion of postgres arrays resulted in String data type, not n-dimensional array, because `attndims` works incorrectly in some cases. Closes [#24804](https://github.com/ClickHouse/ClickHouse/issues/24804). [#25538](https://github.com/ClickHouse/ClickHouse/pull/25538) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix convertion of DateTime with timezone for MySQL, PostgreSQL, ODBC. Closes [#5057](https://github.com/ClickHouse/ClickHouse/issues/5057). [#25528](https://github.com/ClickHouse/ClickHouse/pull/25528) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Distinguish KILL MUTATION for different tables (fixes unexpected `Cancelled mutating parts` error). [#25025](https://github.com/ClickHouse/ClickHouse/pull/25025) ([Azat Khuzhin](https://github.com/azat)). +* Allow to declare S3 disk at root of bucket (S3 virtual filesystem is an experimental feature under development). [#24898](https://github.com/ClickHouse/ClickHouse/pull/24898) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Enable reading of subcolumns (e.g. components of Tuples) for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). +* A feature for MySQL compatibility protocol: make `user` function to return correct output. Closes [#25697](https://github.com/ClickHouse/ClickHouse/pull/25697). [#25697](https://github.com/ClickHouse/ClickHouse/pull/25697) ([sundyli](https://github.com/sundy-li)). + +#### Bug Fix + +* Improvement for backward compatibility. Use old modulo function version when used in partition key. Closes [#23508](https://github.com/ClickHouse/ClickHouse/issues/23508). [#24157](https://github.com/ClickHouse/ClickHouse/pull/24157) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix extremely rare bug on low-memory servers which can lead to the inability to perform merges without restart. Possibly fixes [#24603](https://github.com/ClickHouse/ClickHouse/issues/24603). [#24872](https://github.com/ClickHouse/ClickHouse/pull/24872) ([alesapin](https://github.com/alesapin)). +* Fix extremely rare error `Tagging already tagged part` in replication queue during concurrent `alter move/replace partition`. Possibly fixes [#22142](https://github.com/ClickHouse/ClickHouse/issues/22142). [#24961](https://github.com/ClickHouse/ClickHouse/pull/24961) ([alesapin](https://github.com/alesapin)). +* Fix potential crash when calculating aggregate function states by aggregation of aggregate function states of other aggregate functions (not a practical use case). See [#24523](https://github.com/ClickHouse/ClickHouse/issues/24523). [#25015](https://github.com/ClickHouse/ClickHouse/pull/25015) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` does not finish. This was detected on server with extremely low amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix bug which can lead to ZooKeeper client hung inside clickhouse-server. [#24721](https://github.com/ClickHouse/ClickHouse/pull/24721) ([alesapin](https://github.com/alesapin)). +* If ZooKeeper connection was lost and replica was cloned after restoring the connection, its replication queue might contain outdated entries. Fixed failed assertion when replication queue contains intersecting virtual parts. It may rarely happen if some data part was lost. Print error in log instead of terminating. [#24777](https://github.com/ClickHouse/ClickHouse/pull/24777) ([tavplubix](https://github.com/tavplubix)). +* Fix lost `WHERE` condition in expression-push-down optimization of query plan (setting `query_plan_filter_push_down = 1` by default). Fixes [#25368](https://github.com/ClickHouse/ClickHouse/issues/25368). [#25370](https://github.com/ClickHouse/ClickHouse/pull/25370) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which can lead to intersecting parts after merges with TTL: `Part all_40_40_0 is covered by all_40_40_1 but should be merged into all_40_41_1. This shouldn't happen often.`. [#25549](https://github.com/ClickHouse/ClickHouse/pull/25549) ([alesapin](https://github.com/alesapin)). +* On ZooKeeper connection loss `ReplicatedMergeTree` table might wait for background operations to complete before trying to reconnect. It's fixed, now background operations are stopped forcefully. [#25306](https://github.com/ClickHouse/ClickHouse/pull/25306) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Key expression contains comparison between inconvertible types` for queries with `ARRAY JOIN` in case if array is used in primary key. Fixes [#8247](https://github.com/ClickHouse/ClickHouse/issues/8247). [#25546](https://github.com/ClickHouse/ClickHouse/pull/25546) ([Anton Popov](https://github.com/CurtizJ)). +* Fix wrong totals for query `WITH TOTALS` and `WITH FILL`. Fixes [#20872](https://github.com/ClickHouse/ClickHouse/issues/20872). [#25539](https://github.com/ClickHouse/ClickHouse/pull/25539) ([Anton Popov](https://github.com/CurtizJ)). +* Fix data race when querying `system.clusters` while reloading the cluster configuration at the same time. [#25737](https://github.com/ClickHouse/ClickHouse/pull/25737) ([Amos Bird](https://github.com/amosbird)). +* Fixed `No such file or directory` error on moving `Distributed` table between databases. Fixes [#24971](https://github.com/ClickHouse/ClickHouse/issues/24971). [#25667](https://github.com/ClickHouse/ClickHouse/pull/25667) ([tavplubix](https://github.com/tavplubix)). +* `REPLACE PARTITION` might be ignored in rare cases if the source partition was empty. It's fixed. Fixes [#24869](https://github.com/ClickHouse/ClickHouse/issues/24869). [#25665](https://github.com/ClickHouse/ClickHouse/pull/25665) ([tavplubix](https://github.com/tavplubix)). +* Fixed a bug in `Replicated` database engine that might rarely cause some replica to skip enqueued DDL query. [#24805](https://github.com/ClickHouse/ClickHouse/pull/24805) ([tavplubix](https://github.com/tavplubix)). +* Fix null pointer dereference in `EXPLAIN AST` without query. [#25631](https://github.com/ClickHouse/ClickHouse/pull/25631) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix waiting of automatic dropping of empty parts. It could lead to full filling of background pool and stuck of replication. [#23315](https://github.com/ClickHouse/ClickHouse/pull/23315) ([Anton Popov](https://github.com/CurtizJ)). +* Fix restore of a table stored in S3 virtual filesystem (it is an experimental feature not ready for production). [#25601](https://github.com/ClickHouse/ClickHouse/pull/25601) ([ianton-ru](https://github.com/ianton-ru)). +* Fix nullptr dereference in `Arrow` format when using `Decimal256`. Add `Decimal256` support for `Arrow` format. [#25531](https://github.com/ClickHouse/ClickHouse/pull/25531) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix excessive underscore before the names of the preprocessed configuration files. [#25431](https://github.com/ClickHouse/ClickHouse/pull/25431) ([Vitaly Baranov](https://github.com/vitlibar)). +* A fix for `clickhouse-copier` tool: Fix segfault when sharding_key is absent in task config for copier. [#25419](https://github.com/ClickHouse/ClickHouse/pull/25419) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `REPLACE` column transformer when used in DDL by correctly quoting the formated query. This fixes [#23925](https://github.com/ClickHouse/ClickHouse/issues/23925). [#25391](https://github.com/ClickHouse/ClickHouse/pull/25391) ([Amos Bird](https://github.com/amosbird)). +* Fix the possibility of non-deterministic behaviour of the `quantileDeterministic` function and similar. This closes [#20480](https://github.com/ClickHouse/ClickHouse/issues/20480). [#25313](https://github.com/ClickHouse/ClickHouse/pull/25313) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support `SimpleAggregateFunction(LowCardinality)` for `SummingMergeTree`. Fixes [#25134](https://github.com/ClickHouse/ClickHouse/issues/25134). [#25300](https://github.com/ClickHouse/ClickHouse/pull/25300) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix logical error with exception message "Cannot sum Array/Tuple in min/maxMap". [#25298](https://github.com/ClickHouse/ClickHouse/pull/25298) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix error `Bad cast from type DB::ColumnLowCardinality to DB::ColumnVector` for queries where `LowCardinality` argument was used for IN (this bug appeared in 21.6). Fixes [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187). [#25290](https://github.com/ClickHouse/ClickHouse/pull/25290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect behaviour of `joinGetOrNull` with not-nullable columns. This fixes [#24261](https://github.com/ClickHouse/ClickHouse/issues/24261). [#25288](https://github.com/ClickHouse/ClickHouse/pull/25288) ([Amos Bird](https://github.com/amosbird)). +* Fix incorrect behaviour and UBSan report in big integers. In previous versions `CAST(1e19 AS UInt128)` returned zero. [#25279](https://github.com/ClickHouse/ClickHouse/pull/25279) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed an error which occurred while inserting a subset of columns using CSVWithNames format. Fixes [#25129](https://github.com/ClickHouse/ClickHouse/issues/25129). [#25169](https://github.com/ClickHouse/ClickHouse/pull/25169) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not use table's projection for `SELECT` with `FINAL`. It is not supported yet. [#25163](https://github.com/ClickHouse/ClickHouse/pull/25163) ([Amos Bird](https://github.com/amosbird)). +* Fix possible parts loss after updating up to 21.5 in case table used `UUID` in partition key. (It is not recommended to use `UUID` in partition key). Fixes [#25070](https://github.com/ClickHouse/ClickHouse/issues/25070). [#25127](https://github.com/ClickHouse/ClickHouse/pull/25127) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in query with cross join and `joined_subquery_requires_alias = 0`. Fixes [#24011](https://github.com/ClickHouse/ClickHouse/issues/24011). [#25082](https://github.com/ClickHouse/ClickHouse/pull/25082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug with constant maps in mapContains function that lead to error `empty column was returned by function mapContains`. Closes [#25077](https://github.com/ClickHouse/ClickHouse/issues/25077). [#25080](https://github.com/ClickHouse/ClickHouse/pull/25080) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove possibility to create tables with columns referencing themselves like `a UInt32 ALIAS a + 1` or `b UInt32 MATERIALIZED b`. Fixes [#24910](https://github.com/ClickHouse/ClickHouse/issues/24910), [#24292](https://github.com/ClickHouse/ClickHouse/issues/24292). [#25059](https://github.com/ClickHouse/ClickHouse/pull/25059) ([alesapin](https://github.com/alesapin)). +* Fix wrong result when using aggregate projection with *not empty* `GROUP BY` key to execute query with `GROUP BY` by *empty* key. [#25055](https://github.com/ClickHouse/ClickHouse/pull/25055) ([Amos Bird](https://github.com/amosbird)). +* Fix serialization of splitted nested messages in Protobuf format. This PR fixes [#24647](https://github.com/ClickHouse/ClickHouse/issues/24647). [#25000](https://github.com/ClickHouse/ClickHouse/pull/25000) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix limit/offset settings for distributed queries (ignore on the remote nodes). [#24940](https://github.com/ClickHouse/ClickHouse/pull/24940) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible heap-buffer-overflow in `Arrow` format. [#24922](https://github.com/ClickHouse/ClickHouse/pull/24922) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed possible error 'Cannot read from istream at offset 0' when reading a file from DiskS3 (S3 virtual filesystem is an experimental feature under development that should not be used in production). [#24885](https://github.com/ClickHouse/ClickHouse/pull/24885) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix "Missing columns" exception when joining Distributed Materialized View. [#24870](https://github.com/ClickHouse/ClickHouse/pull/24870) ([Azat Khuzhin](https://github.com/azat)). +* Allow `NULL` values in postgresql compatibility protocol. Closes [#22622](https://github.com/ClickHouse/ClickHouse/issues/22622). [#24857](https://github.com/ClickHouse/ClickHouse/pull/24857) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug when exception `Mutation was killed` can be thrown to the client on mutation wait when mutation not loaded into memory yet. [#24809](https://github.com/ClickHouse/ClickHouse/pull/24809) ([alesapin](https://github.com/alesapin)). +* Fixed bug in deserialization of random generator state with might cause some data types such as `AggregateFunction(groupArraySample(N), T))` to behave in a non-deterministic way. [#24538](https://github.com/ClickHouse/ClickHouse/pull/24538) ([tavplubix](https://github.com/tavplubix)). +* Disallow building uniqXXXXStates of other aggregation states. [#24523](https://github.com/ClickHouse/ClickHouse/pull/24523) ([Raúl Marín](https://github.com/Algunenano)). Then allow it back by actually eliminating the root cause of the related issue. ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix usage of tuples in `CREATE .. AS SELECT` queries. [#24464](https://github.com/ClickHouse/ClickHouse/pull/24464) ([Anton Popov](https://github.com/CurtizJ)). +* Fix computation of total bytes in `Buffer` table. In current ClickHouse version total_writes.bytes counter decreases too much during the buffer flush. It leads to counter overflow and totalBytes return something around 17.44 EB some time after the flush. [#24450](https://github.com/ClickHouse/ClickHouse/pull/24450) ([DimasKovas](https://github.com/DimasKovas)). +* Fix incorrect information about the monotonicity of toWeek function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/5212 , and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). +* When user authentication is managed by LDAP. Fixed potential deadlock that can happen during LDAP role (re)mapping, when LDAP group is mapped to a nonexistent local role. [#24431](https://github.com/ClickHouse/ClickHouse/pull/24431) ([Denis Glazachev](https://github.com/traceon)). +* In "multipart/form-data" message consider the CRLF preceding a boundary as part of it. Fixes [#23905](https://github.com/ClickHouse/ClickHouse/issues/23905). [#24399](https://github.com/ClickHouse/ClickHouse/pull/24399) ([Ivan](https://github.com/abyss7)). +* Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). +* Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([tavplubix](https://github.com/tavplubix)). +* Allow empty HTTP headers. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). +* Correct processing of mutations (ALTER UPDATE/DELETE) in Memory tables. Closes [#24274](https://github.com/ClickHouse/ClickHouse/issues/24274). [#24275](https://github.com/ClickHouse/ClickHouse/pull/24275) ([flynn](https://github.com/ucasfl)). +* Make column LowCardinality property in JOIN output the same as in the input, close [#23351](https://github.com/ClickHouse/ClickHouse/issues/23351), close [#20315](https://github.com/ClickHouse/ClickHouse/issues/20315). [#24061](https://github.com/ClickHouse/ClickHouse/pull/24061) ([Vladimir](https://github.com/vdimir)). +* A fix for Kafka tables. Fix the bug in failover behavior when Engine = Kafka was not able to start consumption if the same consumer had an empty assignment previously. Closes [#21118](https://github.com/ClickHouse/ClickHouse/issues/21118). [#21267](https://github.com/ClickHouse/ClickHouse/pull/21267) ([filimonov](https://github.com/filimonov)). + +#### Build/Testing/Packaging Improvement + +* Add `darwin-aarch64` (Mac M1 / Apple Silicon) builds in CI [#25560](https://github.com/ClickHouse/ClickHouse/pull/25560) ([Ivan](https://github.com/abyss7)) and put the links to the docs and website ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Adds cross-platform embedding of binary resources into executables. It works on Illumos. [#25146](https://github.com/ClickHouse/ClickHouse/pull/25146) ([bnaecker](https://github.com/bnaecker)). +* Add join related options to stress tests to improve fuzzing. [#25200](https://github.com/ClickHouse/ClickHouse/pull/25200) ([Vladimir](https://github.com/vdimir)). +* Enable build with s3 module in osx [#25217](https://github.com/ClickHouse/ClickHouse/issues/25217). [#25218](https://github.com/ClickHouse/ClickHouse/pull/25218) ([kevin wan](https://github.com/MaxWk)). +* Add integration test cases to cover JDBC bridge. [#25047](https://github.com/ClickHouse/ClickHouse/pull/25047) ([Zhichun Wu](https://github.com/zhicwu)). +* Integration tests configuration has special treatment for dictionaries. Removed remaining dictionaries manual setup. [#24728](https://github.com/ClickHouse/ClickHouse/pull/24728) ([Ilya Yatsishin](https://github.com/qoega)). +* Add libfuzzer tests for YAMLParser class. [#24480](https://github.com/ClickHouse/ClickHouse/pull/24480) ([BoloniniD](https://github.com/BoloniniD)). +* Ubuntu 20.04 is now used to run integration tests, docker-compose version used to run integration tests is updated to 1.28.2. Environment variables now take effect on docker-compose. Rework test_dictionaries_all_layouts_separate_sources to allow parallel run. [#20393](https://github.com/ClickHouse/ClickHouse/pull/20393) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix TOCTOU error in installation script. [#25277](https://github.com/ClickHouse/ClickHouse/pull/25277) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +### ClickHouse release 21.6, 2021-06-05 + +#### Upgrade Notes + +* `zstd` compression library is updated to v1.5.0. You may get messages about "checksum does not match" in replication. These messages are expected due to update of compression algorithm and you can ignore them. These messages are informational and do not indicate any kinds of undesired behaviour. +* The setting `compile_expressions` is enabled by default. Although it has been heavily tested on variety of scenarios, if you find some undesired behaviour on your servers, you can try turning this setting off. +* Values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`. + +#### New Feature + +* Add Postgres-like cast operator (`::`). E.g.: `[1, 2]::Array(UInt8)`, `0.1::Decimal(4, 4)`, `number::UInt16`. [#23871](https://github.com/ClickHouse/ClickHouse/pull/23871) ([Anton Popov](https://github.com/CurtizJ)). +* Make big integers production ready. Add support for `UInt128` data type. Fix known issues with the `Decimal256` data type. Support big integers in dictionaries. Support `gcd`/`lcm` functions for big integers. Support big integers in array search and conditional functions. Support `LowCardinality(UUID)`. Support big integers in `generateRandom` table function and `clickhouse-obfuscator`. Fix error with returning `UUID` from scalar subqueries. This fixes [#7834](https://github.com/ClickHouse/ClickHouse/issues/7834). This fixes [#23936](https://github.com/ClickHouse/ClickHouse/issues/23936). This fixes [#4176](https://github.com/ClickHouse/ClickHouse/issues/4176). This fixes [#24018](https://github.com/ClickHouse/ClickHouse/issues/24018). Backward incompatible change: values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`. [#23631](https://github.com/ClickHouse/ClickHouse/pull/23631) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support `Array` data type for inserting and selecting data in `Arrow`, `Parquet` and `ORC` formats. [#21770](https://github.com/ClickHouse/ClickHouse/pull/21770) ([taylor12805](https://github.com/taylor12805)). +* Implement table comments. Closes [#23225](https://github.com/ClickHouse/ClickHouse/issues/23225). [#23548](https://github.com/ClickHouse/ClickHouse/pull/23548) ([flynn](https://github.com/ucasFL)). +* Support creating dictionaries with DDL queries in `clickhouse-local`. Closes [#22354](https://github.com/ClickHouse/ClickHouse/issues/22354). Added support for `DETACH DICTIONARY PERMANENTLY`. Added support for `EXCHANGE DICTIONARIES` for `Atomic` database engine. Added support for moving dictionaries between databases using `RENAME DICTIONARY`. [#23436](https://github.com/ClickHouse/ClickHouse/pull/23436) ([Maksim Kita](https://github.com/kitaisreal)). +* Add aggregate function `uniqTheta` to support [Theta Sketch](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html) in ClickHouse. [#23894](https://github.com/ClickHouse/ClickHouse/pull/23894). [#22609](https://github.com/ClickHouse/ClickHouse/pull/22609) ([Ping Yu](https://github.com/pingyu)). +* Add function `splitByRegexp`. [#24077](https://github.com/ClickHouse/ClickHouse/pull/24077) ([abel-cheng](https://github.com/abel-cheng)). +* Add function `arrayProduct` which accept an array as the parameter, and return the product of all the elements in array. Closes [#21613](https://github.com/ClickHouse/ClickHouse/issues/21613). [#23782](https://github.com/ClickHouse/ClickHouse/pull/23782) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `thread_name` column in `system.stack_trace`. This closes [#23256](https://github.com/ClickHouse/ClickHouse/issues/23256). [#24124](https://github.com/ClickHouse/ClickHouse/pull/24124) ([abel-cheng](https://github.com/abel-cheng)). +* If `insert_null_as_default` = 1, insert default values instead of NULL in `INSERT ... SELECT` and `INSERT ... SELECT ... UNION ALL ...` queries. Closes [#22832](https://github.com/ClickHouse/ClickHouse/issues/22832). [#23524](https://github.com/ClickHouse/ClickHouse/pull/23524) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for progress indication in `clickhouse-local` with `--progress` option. [#23196](https://github.com/ClickHouse/ClickHouse/pull/23196) ([Egor Savin](https://github.com/Amesaru)). +* Add support for HTTP compression (determined by `Content-Encoding` HTTP header) in `http` dictionary source. This fixes [#8912](https://github.com/ClickHouse/ClickHouse/issues/8912). [#23946](https://github.com/ClickHouse/ClickHouse/pull/23946) ([FArthur-cmd](https://github.com/FArthur-cmd)). +* Added `SYSTEM QUERY RELOAD MODEL`, `SYSTEM QUERY RELOAD MODELS`. Closes [#18722](https://github.com/ClickHouse/ClickHouse/issues/18722). [#23182](https://github.com/ClickHouse/ClickHouse/pull/23182) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `json` (boolean, 0 by default) for `EXPLAIN PLAN` query. When enabled, query output will be a single `JSON` row. It is recommended to use `TSVRaw` format to avoid unnecessary escaping. [#23082](https://github.com/ClickHouse/ClickHouse/pull/23082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add setting `indexes` (boolean, disabled by default) to `EXPLAIN PIPELINE` query. When enabled, shows used indexes, number of filtered parts and granules for every index applied. Supported for `MergeTree*` tables. [#22352](https://github.com/ClickHouse/ClickHouse/pull/22352) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* LDAP: implemented user DN detection functionality to use when mapping Active Directory groups to ClickHouse roles. [#22228](https://github.com/ClickHouse/ClickHouse/pull/22228) ([Denis Glazachev](https://github.com/traceon)). +* New aggregate function `deltaSumTimestamp` for summing the difference between consecutive rows while maintaining ordering during merge by storing timestamps. [#21888](https://github.com/ClickHouse/ClickHouse/pull/21888) ([Russ Frank](https://github.com/rf)). +* Added less secure IMDS credentials provider for S3 which works under docker correctly. [#21852](https://github.com/ClickHouse/ClickHouse/pull/21852) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add back `indexHint` function. This is for [#21238](https://github.com/ClickHouse/ClickHouse/issues/21238). This reverts [#9542](https://github.com/ClickHouse/ClickHouse/pull/9542). This fixes [#9540](https://github.com/ClickHouse/ClickHouse/issues/9540). [#21304](https://github.com/ClickHouse/ClickHouse/pull/21304) ([Amos Bird](https://github.com/amosbird)). + +#### Experimental Feature + +* Add `PROJECTION` support for `MergeTree*` tables. [#20202](https://github.com/ClickHouse/ClickHouse/pull/20202) ([Amos Bird](https://github.com/amosbird)). + +#### Performance Improvement + +* Enable `compile_expressions` setting by default. When this setting enabled, compositions of simple functions and operators will be compiled to native code with LLVM at runtime. [#8482](https://github.com/ClickHouse/ClickHouse/pull/8482) ([Maksim Kita](https://github.com/kitaisreal), [alexey-milovidov](https://github.com/alexey-milovidov)). Note: if you feel in trouble, turn this option off. +* Update `re2` library. Performance of regular expressions matching is improved. Also this PR adds compatibility with gcc-11. [#24196](https://github.com/ClickHouse/ClickHouse/pull/24196) ([Raúl Marín](https://github.com/Algunenano)). +* ORC input format reading by stripe instead of reading entire table into memory by once which is cost memory when file size is huge. [#23102](https://github.com/ClickHouse/ClickHouse/pull/23102) ([Chao Ma](https://github.com/godliness)). +* Fusion of aggregate functions `sum`, `count` and `avg` in a query into single aggregate function. The optimization is controlled with the `optimize_fuse_sum_count_avg` setting. This is implemented with a new aggregate function `sumCount`. This function returns a tuple of two fields: `sum` and `count`. [#21337](https://github.com/ClickHouse/ClickHouse/pull/21337) ([hexiaoting](https://github.com/hexiaoting)). +* Update `zstd` to v1.5.0. The performance of compression is improved for single digits percentage. [#24135](https://github.com/ClickHouse/ClickHouse/pull/24135) ([Raúl Marín](https://github.com/Algunenano)). Note: you may get messages about "checksum does not match" in replication. These messages are expected due to update of compression algorithm and you can ignore them. +* Improved performance of `Buffer` tables: do not acquire lock for total_bytes/total_rows for `Buffer` engine. [#24066](https://github.com/ClickHouse/ClickHouse/pull/24066) ([Azat Khuzhin](https://github.com/azat)). +* Preallocate support for hashed/sparse_hashed dictionaries is returned. [#23979](https://github.com/ClickHouse/ClickHouse/pull/23979) ([Azat Khuzhin](https://github.com/azat)). +* Enable `async_socket_for_remote` by default (lower amount of threads in querying Distributed tables with large fanout). [#23683](https://github.com/ClickHouse/ClickHouse/pull/23683) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Improvement + +* Add `_partition_value` virtual column to MergeTree table family. It can be used to prune partition in a deterministic way. It's needed to implement partition matcher for mutations. [#23673](https://github.com/ClickHouse/ClickHouse/pull/23673) ([Amos Bird](https://github.com/amosbird)). +* Added `region` parameter for S3 storage and disk. [#23846](https://github.com/ClickHouse/ClickHouse/pull/23846) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Allow configuring different log levels for different logging channels. Closes [#19569](https://github.com/ClickHouse/ClickHouse/issues/19569). [#23857](https://github.com/ClickHouse/ClickHouse/pull/23857) ([filimonov](https://github.com/filimonov)). +* Keep default timezone on `DateTime` operations if it was not provided explicitly. For example, if you add one second to a value of `DateTime` type without timezone it will remain `DateTime` without timezone. In previous versions the value of default timezone was placed to the returned data type explicitly so it becomes DateTime('something'). This closes [#4854](https://github.com/ClickHouse/ClickHouse/issues/4854). [#23392](https://github.com/ClickHouse/ClickHouse/pull/23392) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow user to specify empty string instead of database name for `MySQL` storage. Default database will be used for queries. In previous versions it was working for SELECT queries and not support for INSERT was also added. This closes [#19281](https://github.com/ClickHouse/ClickHouse/issues/19281). This can be useful working with `Sphinx` or other MySQL-compatible foreign databases. [#23319](https://github.com/ClickHouse/ClickHouse/pull/23319) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Function `now64` now supports optional timezone argument. [#24091](https://github.com/ClickHouse/ClickHouse/pull/24091) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix the case when a progress bar in interactive mode in clickhouse-client that appear in the middle of the data may rewrite some parts of visible data in terminal. This closes [#19283](https://github.com/ClickHouse/ClickHouse/issues/19283). [#23050](https://github.com/ClickHouse/ClickHouse/pull/23050) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash when memory allocation fails in simdjson. https://github.com/simdjson/simdjson/pull/1567 . Mark as improvement because it's a very rare bug. [#24147](https://github.com/ClickHouse/ClickHouse/pull/24147) ([Amos Bird](https://github.com/amosbird)). +* Preserve dictionaries until storage shutdown (this will avoid possible `external dictionary 'DICT' not found` errors at server shutdown during final flush of the `Buffer` engine). [#24068](https://github.com/ClickHouse/ClickHouse/pull/24068) ([Azat Khuzhin](https://github.com/azat)). +* Flush `Buffer` tables before shutting down tables (within one database), to avoid discarding blocks due to underlying table had been already detached (and `Destination table default.a_data_01870 doesn't exist. Block of data is discarded` error in the log). [#24067](https://github.com/ClickHouse/ClickHouse/pull/24067) ([Azat Khuzhin](https://github.com/azat)). +* Now `prefer_column_name_to_alias = 1` will also favor column names for `group by`, `having` and `order by`. This fixes [#23882](https://github.com/ClickHouse/ClickHouse/issues/23882). [#24022](https://github.com/ClickHouse/ClickHouse/pull/24022) ([Amos Bird](https://github.com/amosbird)). +* Add support for `ORDER BY WITH FILL` with `DateTime64`. [#24016](https://github.com/ClickHouse/ClickHouse/pull/24016) ([kevin wan](https://github.com/MaxWk)). +* Enable `DateTime64` to be a version column in `ReplacingMergeTree`. [#23992](https://github.com/ClickHouse/ClickHouse/pull/23992) ([kevin wan](https://github.com/MaxWk)). +* Log information about OS name, kernel version and CPU architecture on server startup. [#23988](https://github.com/ClickHouse/ClickHouse/pull/23988) ([Azat Khuzhin](https://github.com/azat)). +* Support specifying table schema for `postgresql` dictionary source. Closes [#23958](https://github.com/ClickHouse/ClickHouse/issues/23958). [#23980](https://github.com/ClickHouse/ClickHouse/pull/23980) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add hints for names of `Enum` elements (suggest names in case of typos). Closes [#17112](https://github.com/ClickHouse/ClickHouse/issues/17112). [#23919](https://github.com/ClickHouse/ClickHouse/pull/23919) ([flynn](https://github.com/ucasFL)). +* Measure found rate (the percentage for which the value was found) for dictionaries (see `found_rate` in `system.dictionaries`). [#23916](https://github.com/ClickHouse/ClickHouse/pull/23916) ([Azat Khuzhin](https://github.com/azat)). +* Allow to add specific queue settings via table settng `rabbitmq_queue_settings_list`. (Closes [#23737](https://github.com/ClickHouse/ClickHouse/issues/23737) and [#23918](https://github.com/ClickHouse/ClickHouse/issues/23918)). Allow user to control all RabbitMQ setup: if table setting `rabbitmq_queue_consume` is set to `1` - RabbitMQ table engine will only connect to specified queue and will not perform any RabbitMQ consumer-side setup like declaring exchange, queues, bindings. (Closes [#21757](https://github.com/ClickHouse/ClickHouse/issues/21757)). Add proper cleanup when RabbitMQ table is dropped - delete queues, which the table has declared and all bound exchanges - if they were created by the table. [#23887](https://github.com/ClickHouse/ClickHouse/pull/23887) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `broken_data_files`/`broken_data_compressed_bytes` into `system.distribution_queue`. Add metric for number of files for asynchronous insertion into Distributed tables that has been marked as broken (`BrokenDistributedFilesToInsert`). [#23885](https://github.com/ClickHouse/ClickHouse/pull/23885) ([Azat Khuzhin](https://github.com/azat)). +* Querying `system.tables` does not go to ZooKeeper anymore. [#23793](https://github.com/ClickHouse/ClickHouse/pull/23793) ([Fuwang Hu](https://github.com/fuwhu)). +* Respect `lock_acquire_timeout_for_background_operations` for `OPTIMIZE` queries. [#23623](https://github.com/ClickHouse/ClickHouse/pull/23623) ([Azat Khuzhin](https://github.com/azat)). +* Possibility to change `S3` disk settings in runtime via new `SYSTEM RESTART DISK` SQL command. [#23429](https://github.com/ClickHouse/ClickHouse/pull/23429) ([Pavel Kovalenko](https://github.com/Jokser)). +* If user applied a misconfiguration by mistakenly setting `max_distributed_connections` to value zero, every query to a `Distributed` table will throw exception with a message containing "logical error". But it's really an expected behaviour, not a logical error, so the exception message was slightly incorrect. It also triggered checks in our CI enviroment that ensures that no logical errors ever happen. Instead we will treat `max_distributed_connections` misconfigured to zero as the minimum possible value (one). [#23348](https://github.com/ClickHouse/ClickHouse/pull/23348) ([Azat Khuzhin](https://github.com/azat)). +* Disable `min_bytes_to_use_mmap_io` by default. [#23322](https://github.com/ClickHouse/ClickHouse/pull/23322) ([Azat Khuzhin](https://github.com/azat)). +* Support `LowCardinality` nullability with `join_use_nulls`, close [#15101](https://github.com/ClickHouse/ClickHouse/issues/15101). [#23237](https://github.com/ClickHouse/ClickHouse/pull/23237) ([vdimir](https://github.com/vdimir)). +* Added possibility to restore `MergeTree` parts to `detached` directory for `S3` disk. [#23112](https://github.com/ClickHouse/ClickHouse/pull/23112) ([Pavel Kovalenko](https://github.com/Jokser)). +* Retries on HTTP connection drops in S3. [#22988](https://github.com/ClickHouse/ClickHouse/pull/22988) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add settings `external_storage_max_read_rows` and `external_storage_max_read_rows` for MySQL table engine, dictionary source and MaterializeMySQL minor data fetches. [#22697](https://github.com/ClickHouse/ClickHouse/pull/22697) ([TCeason](https://github.com/TCeason)). +* `MaterializeMySQL` (experimental feature): Previously, MySQL 5.7.9 was not supported due to SQL incompatibility. Now leave MySQL parameter verification to the MaterializeMySQL. [#23413](https://github.com/ClickHouse/ClickHouse/pull/23413) ([TCeason](https://github.com/TCeason)). +* Enable reading of subcolumns for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). +* Fix usage of tuples in `CREATE .. AS SELECT` queries. [#24464](https://github.com/ClickHouse/ClickHouse/pull/24464) ([Anton Popov](https://github.com/CurtizJ)). +* Support for `Parquet` format in `Kafka` tables. [#23412](https://github.com/ClickHouse/ClickHouse/pull/23412) ([Chao Ma](https://github.com/godliness)). + +#### Bug Fix + +* Use old modulo function version when used in partition key and primary key. Closes [#23508](https://github.com/ClickHouse/ClickHouse/issues/23508). [#24157](https://github.com/ClickHouse/ClickHouse/pull/24157) ([Kseniia Sumarokova](https://github.com/kssenii)). It was a source of backward incompatibility in previous releases. +* Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` is being processed infinitely. This was detected on server with extremely little amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix incorrect monotonicity of `toWeek` function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in [#5212](https://github.com/ClickHouse/ClickHouse/pull/5212), and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). +* Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). +* Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([tavplubix](https://github.com/tavplubix)). +* Allow empty HTTP headers in client requests. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). +* Set `max_threads = 1` to fix mutation fail of `Memory` tables. Closes [#24274](https://github.com/ClickHouse/ClickHouse/issues/24274). [#24275](https://github.com/ClickHouse/ClickHouse/pull/24275) ([flynn](https://github.com/ucasFL)). +* Fix typo in implementation of `Memory` tables, this bug was introduced at [#15127](https://github.com/ClickHouse/ClickHouse/issues/15127). Closes [#24192](https://github.com/ClickHouse/ClickHouse/issues/24192). [#24193](https://github.com/ClickHouse/ClickHouse/pull/24193) ([张中南](https://github.com/plugine)). +* Fix abnormal server termination due to `HDFS` becoming not accessible during query execution. Closes [#24117](https://github.com/ClickHouse/ClickHouse/issues/24117). [#24191](https://github.com/ClickHouse/ClickHouse/pull/24191) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash on updating of `Nested` column with const condition. [#24183](https://github.com/ClickHouse/ClickHouse/pull/24183) ([hexiaoting](https://github.com/hexiaoting)). +* Fix race condition which could happen in RBAC under a heavy load. This PR fixes [#24090](https://github.com/ClickHouse/ClickHouse/issues/24090), [#24134](https://github.com/ClickHouse/ClickHouse/issues/24134),. [#24176](https://github.com/ClickHouse/ClickHouse/pull/24176) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix a rare bug that could lead to a partially initialized table that can serve write requests (insert/alter/so on). Now such tables will be in readonly mode. [#24122](https://github.com/ClickHouse/ClickHouse/pull/24122) ([alesapin](https://github.com/alesapin)). +* Fix an issue: `EXPLAIN PIPELINE` with `SELECT xxx FINAL` showed a wrong pipeline. ([hexiaoting](https://github.com/hexiaoting)). +* Fixed using const `DateTime` value vs `DateTime64` column in `WHERE`. [#24100](https://github.com/ClickHouse/ClickHouse/pull/24100) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix crash in merge JOIN, closes [#24010](https://github.com/ClickHouse/ClickHouse/issues/24010). [#24013](https://github.com/ClickHouse/ClickHouse/pull/24013) ([vdimir](https://github.com/vdimir)). +* Some `ALTER PARTITION` queries might cause `Part A intersects previous part B` and `Unexpected merged part C intersecting drop range D` errors in replication queue. It's fixed. Fixes [#23296](https://github.com/ClickHouse/ClickHouse/issues/23296). [#23997](https://github.com/ClickHouse/ClickHouse/pull/23997) ([tavplubix](https://github.com/tavplubix)). +* Fix SIGSEGV for external GROUP BY and overflow row (i.e. queries like `SELECT FROM GROUP BY WITH TOTALS SETTINGS max_bytes_before_external_group_by>0, max_rows_to_group_by>0, group_by_overflow_mode='any', totals_mode='before_having'`). [#23962](https://github.com/ClickHouse/ClickHouse/pull/23962) ([Azat Khuzhin](https://github.com/azat)). +* Fix keys metrics accounting for `CACHE` dictionary with duplicates in the source (leads to `DictCacheKeysRequestedMiss` overflows). [#23929](https://github.com/ClickHouse/ClickHouse/pull/23929) ([Azat Khuzhin](https://github.com/azat)). +* Fix implementation of connection pool of `PostgreSQL` engine. Closes [#23897](https://github.com/ClickHouse/ClickHouse/issues/23897). [#23909](https://github.com/ClickHouse/ClickHouse/pull/23909) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix `distributed_group_by_no_merge = 2` with `GROUP BY` and aggregate function wrapped into regular function (had been broken in [#23546](https://github.com/ClickHouse/ClickHouse/issues/23546)). Throw exception in case of someone trying to use `distributed_group_by_no_merge = 2` with window functions. Disable `optimize_distributed_group_by_sharding_key` for queries with window functions. [#23906](https://github.com/ClickHouse/ClickHouse/pull/23906) ([Azat Khuzhin](https://github.com/azat)). +* A fix for `s3` table function: better handling of HTTP errors. Response bodies of HTTP errors were being ignored earlier. [#23844](https://github.com/ClickHouse/ClickHouse/pull/23844) ([Vladimir Chebotarev](https://github.com/excitoon)). +* A fix for `s3` table function: better handling of URI's. Fixed an incompatibility with URLs containing `+` symbol, data with such keys could not be read previously. [#23822](https://github.com/ClickHouse/ClickHouse/pull/23822) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` and `use_hedged_requests`. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `CLEAR COLUMN` does not work when it is referenced by materialized view. Close [#23764](https://github.com/ClickHouse/ClickHouse/issues/23764). [#23781](https://github.com/ClickHouse/ClickHouse/pull/23781) ([flynn](https://github.com/ucasFL)). +* Fix heap use after free when reading from HDFS if `Values` format is used. [#23761](https://github.com/ClickHouse/ClickHouse/pull/23761) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a bug in recovery of staled `ReplicatedMergeTree` replica. Some metadata updates could be ignored by staled replica if `ALTER` query was executed during downtime of the replica. [#23742](https://github.com/ClickHouse/ClickHouse/pull/23742) ([tavplubix](https://github.com/tavplubix)). +* Fix a bug with `Join` and `WITH TOTALS`, close [#17718](https://github.com/ClickHouse/ClickHouse/issues/17718). [#23549](https://github.com/ClickHouse/ClickHouse/pull/23549) ([vdimir](https://github.com/vdimir)). +* Fix possible `Block structure mismatch` error for queries with `UNION` which could possibly happen after filter-pushdown optimization. Fixes [#23029](https://github.com/ClickHouse/ClickHouse/issues/23029). [#23359](https://github.com/ClickHouse/ClickHouse/pull/23359) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add type conversion when the setting `optimize_skip_unused_shards_rewrite_in` is enabled. This fixes MSan report. [#23219](https://github.com/ClickHouse/ClickHouse/pull/23219) ([Azat Khuzhin](https://github.com/azat)). +* Add a missing check when updating nested subcolumns, close issue: [#22353](https://github.com/ClickHouse/ClickHouse/issues/22353). [#22503](https://github.com/ClickHouse/ClickHouse/pull/22503) ([hexiaoting](https://github.com/hexiaoting)). + +#### Build/Testing/Packaging Improvement + +* Support building on Illumos. [#24144](https://github.com/ClickHouse/ClickHouse/pull/24144). Adds support for building on Solaris-derived operating systems. [#23746](https://github.com/ClickHouse/ClickHouse/pull/23746) ([bnaecker](https://github.com/bnaecker)). +* Add more benchmarks for hash tables, including the Swiss Table from Google (that appeared to be slower than ClickHouse hash map in our specific usage scenario). [#24111](https://github.com/ClickHouse/ClickHouse/pull/24111) ([Maksim Kita](https://github.com/kitaisreal)). +* Update librdkafka 1.6.0-RC3 to 1.6.1. [#23874](https://github.com/ClickHouse/ClickHouse/pull/23874) ([filimonov](https://github.com/filimonov)). +* Always enable `asynchronous-unwind-tables` explicitly. It may fix query profiler on AArch64. [#23602](https://github.com/ClickHouse/ClickHouse/pull/23602) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Avoid possible build dependency on locale and filesystem order. This allows reproducible builds. [#23600](https://github.com/ClickHouse/ClickHouse/pull/23600) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove a source of nondeterminism from build. Now builds at different point of time will produce byte-identical binaries. Partially addressed [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#23559](https://github.com/ClickHouse/ClickHouse/pull/23559) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add simple tool for benchmarking (Zoo)Keeper. [#23038](https://github.com/ClickHouse/ClickHouse/pull/23038) ([alesapin](https://github.com/alesapin)). + + +## ClickHouse release 21.5, 2021-05-20 + +#### Backward Incompatible Change + +* Change comparison of integers and floating point numbers when integer is not exactly representable in the floating point data type. In new version comparison will return false as the rounding error will occur. Example: `9223372036854775808.0 != 9223372036854775808`, because the number `9223372036854775808` is not representable as floating point number exactly (and `9223372036854775808.0` is rounded to `9223372036854776000.0`). But in previous version the comparison will return as the numbers are equal, because if the floating point number `9223372036854776000.0` get converted back to UInt64, it will yield `9223372036854775808`. For the reference, the Python programming language also treats these numbers as equal. But this behaviour was dependend on CPU model (different results on AMD64 and AArch64 for some out-of-range numbers), so we make the comparison more precise. It will treat int and float numbers equal only if int is represented in floating point type exactly. [#22595](https://github.com/ClickHouse/ClickHouse/pull/22595) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove support for `argMin` and `argMax` for single `Tuple` argument. The code was not memory-safe. The feature was added by mistake and it is confusing for people. These functions can be reintroduced under different names later. This fixes [#22384](https://github.com/ClickHouse/ClickHouse/issues/22384) and reverts [#17359](https://github.com/ClickHouse/ClickHouse/issues/17359). [#23393](https://github.com/ClickHouse/ClickHouse/pull/23393) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### New Feature + +* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Improved performance of `dictGetHierarchy`, `dictIsIn` functions. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)). +* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)). +* Added a table function `s3Cluster`, which allows to process files from `s3` in parallel on every node of a specified cluster. [#22012](https://github.com/ClickHouse/ClickHouse/pull/22012) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added support for replicas and shards in MySQL/PostgreSQL table engine / table function. You can write `SELECT * FROM mysql('host{1,2}-{1|2}', ...)`. Closes [#20969](https://github.com/ClickHouse/ClickHouse/issues/20969). [#22217](https://github.com/ClickHouse/ClickHouse/pull/22217) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `ALTER TABLE ... FETCH PART ...` query. It's similar to `FETCH PARTITION`, but fetches only one part. [#22706](https://github.com/ClickHouse/ClickHouse/pull/22706) ([turbo jason](https://github.com/songenjie)). +* Added a setting `max_distributed_depth` that limits the depth of recursive queries to `Distributed` tables. Closes [#20229](https://github.com/ClickHouse/ClickHouse/issues/20229). [#21942](https://github.com/ClickHouse/ClickHouse/pull/21942) ([flynn](https://github.com/ucasFL)). + +#### Performance Improvement + +* Improved performance of `intDiv` by dynamic dispatch for AVX2. This closes [#22314](https://github.com/ClickHouse/ClickHouse/issues/22314). [#23000](https://github.com/ClickHouse/ClickHouse/pull/23000) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improved performance of reading from `ArrowStream` input format for sources other then local file (e.g. URL). [#22673](https://github.com/ClickHouse/ClickHouse/pull/22673) ([nvartolomei](https://github.com/nvartolomei)). +* Disabled compression by default when interacting with localhost (with clickhouse-client or server to server with distributed queries) via native protocol. It may improve performance of some import/export operations. This closes [#22234](https://github.com/ClickHouse/ClickHouse/issues/22234). [#22237](https://github.com/ClickHouse/ClickHouse/pull/22237) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Exclude values that does not belong to the shard from right part of IN section for distributed queries (under `optimize_skip_unused_shards_rewrite_in`, enabled by default, since it still requires `optimize_skip_unused_shards`). [#21511](https://github.com/ClickHouse/ClickHouse/pull/21511) ([Azat Khuzhin](https://github.com/azat)). +* Improved performance of reading a subset of columns with File-like table engine and column-oriented format like Parquet, Arrow or ORC. This closes [#issue:20129](https://github.com/ClickHouse/ClickHouse/issues/20129). [#21302](https://github.com/ClickHouse/ClickHouse/pull/21302) ([keenwolf](https://github.com/keen-wolf)). +* Allow to move more conditions to `PREWHERE` as it was before version 21.1 (adjustment of internal heuristics). Insufficient number of moved condtions could lead to worse performance. [#23397](https://github.com/ClickHouse/ClickHouse/pull/23397) ([Anton Popov](https://github.com/CurtizJ)). +* Improved performance of ODBC connections and fixed all the outstanding issues from the backlog. Using `nanodbc` library instead of `Poco::ODBC`. Closes [#9678](https://github.com/ClickHouse/ClickHouse/issues/9678). Add support for DateTime64 and Decimal* for ODBC table engine. Closes [#21961](https://github.com/ClickHouse/ClickHouse/issues/21961). Fixed issue with cyrillic text being truncated. Closes [#16246](https://github.com/ClickHouse/ClickHouse/issues/16246). Added connection pools for odbc bridge. [#21972](https://github.com/ClickHouse/ClickHouse/pull/21972) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement + +* Increase `max_uri_size` (the maximum size of URL in HTTP interface) to 1 MiB by default. This closes [#21197](https://github.com/ClickHouse/ClickHouse/issues/21197). [#22997](https://github.com/ClickHouse/ClickHouse/pull/22997) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Set `background_fetches_pool_size` to `8` that is better for production usage with frequent small insertions or slow ZooKeeper cluster. [#22945](https://github.com/ClickHouse/ClickHouse/pull/22945) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* FlatDictionary added `initial_array_size`, `max_array_size` options. [#22521](https://github.com/ClickHouse/ClickHouse/pull/22521) ([Maksim Kita](https://github.com/kitaisreal)). +* Add new setting `non_replicated_deduplication_window` for non-replicated MergeTree inserts deduplication. [#22514](https://github.com/ClickHouse/ClickHouse/pull/22514) ([alesapin](https://github.com/alesapin)). +* Update paths to the `CatBoost` model configs in config reloading. [#22434](https://github.com/ClickHouse/ClickHouse/pull/22434) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `Decimal256` type support in dictionaries. `Decimal256` is experimental feature. Closes [#20979](https://github.com/ClickHouse/ClickHouse/issues/20979). [#22960](https://github.com/ClickHouse/ClickHouse/pull/22960) ([Maksim Kita](https://github.com/kitaisreal)). +* Enabled `async_socket_for_remote` by default (using less amount of OS threads for distributed queries). [#23683](https://github.com/ClickHouse/ClickHouse/pull/23683) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Make function name `unhex` case insensitive for compatibility with MySQL. [#23229](https://github.com/ClickHouse/ClickHouse/pull/23229) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Implement functions `arrayHasAny`, `arrayHasAll`, `has`, `indexOf`, `countEqual` for generic case when types of array elements are different. In previous versions the functions `arrayHasAny`, `arrayHasAll` returned false and `has`, `indexOf`, `countEqual` thrown exception. Also add support for `Decimal` and big integer types in functions `has` and similar. This closes [#20272](https://github.com/ClickHouse/ClickHouse/issues/20272). [#23044](https://github.com/ClickHouse/ClickHouse/pull/23044) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Raised the threshold on max number of matches in result of the function `extractAllGroupsHorizontal`. [#23036](https://github.com/ClickHouse/ClickHouse/pull/23036) ([Vasily Nemkov](https://github.com/Enmk)). +* Do not perform `optimize_skip_unused_shards` for cluster with one node. [#22999](https://github.com/ClickHouse/ClickHouse/pull/22999) ([Azat Khuzhin](https://github.com/azat)). +* Added ability to run clickhouse-keeper (experimental drop-in replacement to ZooKeeper) with SSL. Config settings `keeper_server.tcp_port_secure` can be used for secure interaction between client and keeper-server. `keeper_server.raft_configuration.secure` can be used to enable internal secure communication between nodes. [#22992](https://github.com/ClickHouse/ClickHouse/pull/22992) ([alesapin](https://github.com/alesapin)). +* Added ability to flush buffer only in background for `Buffer` tables. [#22986](https://github.com/ClickHouse/ClickHouse/pull/22986) ([Azat Khuzhin](https://github.com/azat)). +* When selecting from MergeTree table with NULL in WHERE condition, in rare cases, exception was thrown. This closes [#20019](https://github.com/ClickHouse/ClickHouse/issues/20019). [#22978](https://github.com/ClickHouse/ClickHouse/pull/22978) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error handling in Poco HTTP Client for AWS. [#22973](https://github.com/ClickHouse/ClickHouse/pull/22973) ([kreuzerkrieg](https://github.com/kreuzerkrieg)). +* Respect `max_part_removal_threads` for `ReplicatedMergeTree`. [#22971](https://github.com/ClickHouse/ClickHouse/pull/22971) ([Azat Khuzhin](https://github.com/azat)). +* Fix obscure corner case of MergeTree settings inactive_parts_to_throw_insert = 0 with inactive_parts_to_delay_insert > 0. [#22947](https://github.com/ClickHouse/ClickHouse/pull/22947) ([Azat Khuzhin](https://github.com/azat)). +* `dateDiff` now works with `DateTime64` arguments (even for values outside of `DateTime` range) [#22931](https://github.com/ClickHouse/ClickHouse/pull/22931) ([Vasily Nemkov](https://github.com/Enmk)). +* MaterializeMySQL (experimental feature): added an ability to replicate MySQL databases containing views without failing. This is accomplished by ignoring the views. [#22760](https://github.com/ClickHouse/ClickHouse/pull/22760) ([Christian](https://github.com/cfroystad)). +* Allow RBAC row policy via postgresql protocol. Closes [#22658](https://github.com/ClickHouse/ClickHouse/issues/22658). PostgreSQL protocol is enabled in configuration by default. [#22755](https://github.com/ClickHouse/ClickHouse/pull/22755) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add metric to track how much time is spend during waiting for Buffer layer lock. [#22725](https://github.com/ClickHouse/ClickHouse/pull/22725) ([Azat Khuzhin](https://github.com/azat)). +* Allow to use CTE in VIEW definition. This closes [#22491](https://github.com/ClickHouse/ClickHouse/issues/22491). [#22657](https://github.com/ClickHouse/ClickHouse/pull/22657) ([Amos Bird](https://github.com/amosbird)). +* Clear the rest of the screen and show cursor in `clickhouse-client` if previous program has left garbage in terminal. This closes [#16518](https://github.com/ClickHouse/ClickHouse/issues/16518). [#22634](https://github.com/ClickHouse/ClickHouse/pull/22634) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make `round` function to behave consistently on non-x86_64 platforms. Rounding half to nearest even (Banker's rounding) is used. [#22582](https://github.com/ClickHouse/ClickHouse/pull/22582) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Correctly check structure of blocks of data that are sending by Distributed tables. [#22325](https://github.com/ClickHouse/ClickHouse/pull/22325) ([Azat Khuzhin](https://github.com/azat)). +* Allow publishing Kafka errors to a virtual column of Kafka engine, controlled by the `kafka_handle_error_mode` setting. [#21850](https://github.com/ClickHouse/ClickHouse/pull/21850) ([fastio](https://github.com/fastio)). +* Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes [#21383](https://github.com/ClickHouse/ClickHouse/issues/21383). [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)). +* Add `clickhouse-library-bridge` for library dictionary source. Closes [#9502](https://github.com/ClickHouse/ClickHouse/issues/9502). [#21509](https://github.com/ClickHouse/ClickHouse/pull/21509) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Forbid to drop a column if it's referenced by materialized view. Closes [#21164](https://github.com/ClickHouse/ClickHouse/issues/21164). [#21303](https://github.com/ClickHouse/ClickHouse/pull/21303) ([flynn](https://github.com/ucasFL)). +* Support dynamic interserver credentials (rotating credentials without downtime). [#14113](https://github.com/ClickHouse/ClickHouse/pull/14113) ([johnskopis](https://github.com/johnskopis)). +* Add support for Kafka storage with `Arrow` and `ArrowStream` format messages. [#23415](https://github.com/ClickHouse/ClickHouse/pull/23415) ([Chao Ma](https://github.com/godliness)). +* Fixed missing semicolon in exception message. The user may find this exception message unpleasant to read. [#23208](https://github.com/ClickHouse/ClickHouse/pull/23208) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed missing whitespace in some exception messages about `LowCardinality` type. [#23207](https://github.com/ClickHouse/ClickHouse/pull/23207) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Some values were formatted with alignment in center in table cells in `Markdown` format. Not anymore. [#23096](https://github.com/ClickHouse/ClickHouse/pull/23096) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove non-essential details from suggestions in clickhouse-client. This closes [#22158](https://github.com/ClickHouse/ClickHouse/issues/22158). [#23040](https://github.com/ClickHouse/ClickHouse/pull/23040) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Correct calculation of `bytes_allocated` field in system.dictionaries for sparse_hashed dictionaries. [#22867](https://github.com/ClickHouse/ClickHouse/pull/22867) ([Azat Khuzhin](https://github.com/azat)). +* Fixed approximate total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). +* Fix the case when it was possible to configure dictionary with clickhouse source that was looking to itself that leads to infinite loop. Closes [#14314](https://github.com/ClickHouse/ClickHouse/issues/14314). [#22479](https://github.com/ClickHouse/ClickHouse/pull/22479) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Bug Fix + +* Multiple fixes for hedged requests. Fixed an error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` when the setting `use_hedged_requests` is enabled. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). Fixed a race condition in hedged connections which leads to crash. This fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)). Fix possible crash in case if `unknown packet` was received from remote query (with `async_socket_for_remote` enabled). Fixes [#21167](https://github.com/ClickHouse/ClickHouse/issues/21167). [#23309](https://github.com/ClickHouse/ClickHouse/pull/23309) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed the behavior when disabling `input_format_with_names_use_header ` setting discards all the input with CSVWithNames format. This fixes [#22406](https://github.com/ClickHouse/ClickHouse/issues/22406). [#23202](https://github.com/ClickHouse/ClickHouse/pull/23202) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed remote JDBC bridge timeout connection issue. Closes [#9609](https://github.com/ClickHouse/ClickHouse/issues/9609). [#23771](https://github.com/ClickHouse/ClickHouse/pull/23771) ([Maksim Kita](https://github.com/kitaisreal), [alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the logic of initial load of `complex_key_hashed` if `update_field` is specified. Closes [#23800](https://github.com/ClickHouse/ClickHouse/issues/23800). [#23824](https://github.com/ClickHouse/ClickHouse/pull/23824) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed crash when `PREWHERE` and row policy filter are both in effect with empty result. [#23763](https://github.com/ClickHouse/ClickHouse/pull/23763) ([Amos Bird](https://github.com/amosbird)). +* Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). +* Added an exception in case of completely the same values in both samples in aggregate function `mannWhitneyUTest`. This fixes [#23646](https://github.com/ClickHouse/ClickHouse/issues/23646). [#23654](https://github.com/ClickHouse/ClickHouse/pull/23654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed server fault when inserting data through HTTP caused an exception. This fixes [#23512](https://github.com/ClickHouse/ClickHouse/issues/23512). [#23643](https://github.com/ClickHouse/ClickHouse/pull/23643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed misinterpretation of some `LIKE` expressions with escape sequences. [#23610](https://github.com/ClickHouse/ClickHouse/pull/23610) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed restart / stop command hanging. Closes [#20214](https://github.com/ClickHouse/ClickHouse/issues/20214). [#23552](https://github.com/ClickHouse/ClickHouse/pull/23552) ([filimonov](https://github.com/filimonov)). +* Fixed `COLUMNS` matcher in case of multiple JOINs in select query. Closes [#22736](https://github.com/ClickHouse/ClickHouse/issues/22736). [#23501](https://github.com/ClickHouse/ClickHouse/pull/23501) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed a crash when modifying column's default value when a column itself is used as `ReplacingMergeTree`'s parameter. [#23483](https://github.com/ClickHouse/ClickHouse/pull/23483) ([hexiaoting](https://github.com/hexiaoting)). +* Fixed corner cases in vertical merges with `ReplacingMergeTree`. In rare cases they could lead to fails of merges with exceptions like `Incomplete granules are not allowed while blocks are granules size`. [#23459](https://github.com/ClickHouse/ClickHouse/pull/23459) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed bug that does not allow cast from empty array literal, to array with dimensions greater than 1, e.g. `CAST([] AS Array(Array(String)))`. Closes [#14476](https://github.com/ClickHouse/ClickHouse/issues/14476). [#23456](https://github.com/ClickHouse/ClickHouse/pull/23456) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed a bug when `deltaSum` aggregate function produced incorrect result after resetting the counter. [#23437](https://github.com/ClickHouse/ClickHouse/pull/23437) ([Russ Frank](https://github.com/rf)). +* Fixed `Cannot unlink file` error on unsuccessful creation of ReplicatedMergeTree table with multidisk configuration. This closes [#21755](https://github.com/ClickHouse/ClickHouse/issues/21755). [#23433](https://github.com/ClickHouse/ClickHouse/pull/23433) ([tavplubix](https://github.com/tavplubix)). +* Fixed incompatible constant expression generation during partition pruning based on virtual columns. This fixes https://github.com/ClickHouse/ClickHouse/pull/21401#discussion_r611888913. [#23366](https://github.com/ClickHouse/ClickHouse/pull/23366) ([Amos Bird](https://github.com/amosbird)). +* Fixed a crash when setting join_algorithm is set to 'auto' and Join is performed with a Dictionary. Close [#23002](https://github.com/ClickHouse/ClickHouse/issues/23002). [#23312](https://github.com/ClickHouse/ClickHouse/pull/23312) ([Vladimir](https://github.com/vdimir)). +* Don't relax NOT conditions during partition pruning. This fixes [#23305](https://github.com/ClickHouse/ClickHouse/issues/23305) and [#21539](https://github.com/ClickHouse/ClickHouse/issues/21539). [#23310](https://github.com/ClickHouse/ClickHouse/pull/23310) ([Amos Bird](https://github.com/amosbird)). +* Fixed very rare race condition on background cleanup of old blocks. It might cause a block not to be deduplicated if it's too close to the end of deduplication window. [#23301](https://github.com/ClickHouse/ClickHouse/pull/23301) ([tavplubix](https://github.com/tavplubix)). +* Fixed very rare (distributed) race condition between creation and removal of ReplicatedMergeTree tables. It might cause exceptions like `node doesn't exist` on attempt to create replicated table. Fixes [#21419](https://github.com/ClickHouse/ClickHouse/issues/21419). [#23294](https://github.com/ClickHouse/ClickHouse/pull/23294) ([tavplubix](https://github.com/tavplubix)). +* Fixed simple key dictionary from DDL creation if primary key is not first attribute. Fixes [#23236](https://github.com/ClickHouse/ClickHouse/issues/23236). [#23262](https://github.com/ClickHouse/ClickHouse/pull/23262) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed reading from ODBC when there are many long column names in a table. Closes [#8853](https://github.com/ClickHouse/ClickHouse/issues/8853). [#23215](https://github.com/ClickHouse/ClickHouse/pull/23215) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializeMySQL (experimental feature): fixed `Not found column` error when selecting from `MaterializeMySQL` with condition on key column. Fixes [#22432](https://github.com/ClickHouse/ClickHouse/issues/22432). [#23200](https://github.com/ClickHouse/ClickHouse/pull/23200) ([tavplubix](https://github.com/tavplubix)). +* Correct aliases handling if subquery was optimized to constant. Fixes [#22924](https://github.com/ClickHouse/ClickHouse/issues/22924). Fixes [#10401](https://github.com/ClickHouse/ClickHouse/issues/10401). [#23191](https://github.com/ClickHouse/ClickHouse/pull/23191) ([Maksim Kita](https://github.com/kitaisreal)). +* Server might fail to start if `data_type_default_nullable` setting is enabled in default profile, it's fixed. Fixes [#22573](https://github.com/ClickHouse/ClickHouse/issues/22573). [#23185](https://github.com/ClickHouse/ClickHouse/pull/23185) ([tavplubix](https://github.com/tavplubix)). +* Fixed a crash on shutdown which happened because of wrong accounting of current connections. [#23154](https://github.com/ClickHouse/ClickHouse/pull/23154) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed `Table .inner_id... doesn't exist` error when selecting from Materialized View after detaching it from Atomic database and attaching back. [#23047](https://github.com/ClickHouse/ClickHouse/pull/23047) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot find column in ActionsDAG result` which may happen if subquery uses `untuple`. Fixes [#22290](https://github.com/ClickHouse/ClickHouse/issues/22290). [#22991](https://github.com/ClickHouse/ClickHouse/pull/22991) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix usage of constant columns of type `Map` with nullable values. [#22939](https://github.com/ClickHouse/ClickHouse/pull/22939) ([Anton Popov](https://github.com/CurtizJ)). +* fixed `formatDateTime()` on `DateTime64` and "%C" format specifier fixed `toDateTime64()` for large values and non-zero scale. [#22937](https://github.com/ClickHouse/ClickHouse/pull/22937) ([Vasily Nemkov](https://github.com/Enmk)). +* Fixed a crash when using `mannWhitneyUTest` and `rankCorr` with window functions. This fixes [#22728](https://github.com/ClickHouse/ClickHouse/issues/22728). [#22876](https://github.com/ClickHouse/ClickHouse/pull/22876) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* LIVE VIEW (experimental feature): fixed possible hanging in concurrent DROP/CREATE of TEMPORARY LIVE VIEW in `TemporaryLiveViewCleaner`, [see](https://gist.github.com/vzakaznikov/0c03195960fc86b56bfe2bc73a90019e). [#22858](https://github.com/ClickHouse/ClickHouse/pull/22858) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed pushdown of `HAVING` in case, when filter column is used in aggregation. [#22763](https://github.com/ClickHouse/ClickHouse/pull/22763) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed possible hangs in Zookeeper requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). +* Fixed exception for Log with nested types without columns in the SELECT clause. [#22654](https://github.com/ClickHouse/ClickHouse/pull/22654) ([Azat Khuzhin](https://github.com/azat)). +* Fix unlimited wait for auxiliary AWS requests. [#22594](https://github.com/ClickHouse/ClickHouse/pull/22594) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fixed a crash when client closes connection very early [#22579](https://github.com/ClickHouse/ClickHouse/issues/22579). [#22591](https://github.com/ClickHouse/ClickHouse/pull/22591) ([nvartolomei](https://github.com/nvartolomei)). +* `Map` data type (experimental feature): fixed an incorrect formatting of function `map` in distributed queries. [#22588](https://github.com/ClickHouse/ClickHouse/pull/22588) ([foolchi](https://github.com/foolchi)). +* Fixed deserialization of empty string without newline at end of TSV format. This closes [#20244](https://github.com/ClickHouse/ClickHouse/issues/20244). Possible workaround without version update: set `input_format_null_as_default` to zero. It was zero in old versions. [#22527](https://github.com/ClickHouse/ClickHouse/pull/22527) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed wrong cast of a column of `LowCardinality` type in Merge Join algorithm. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir](https://github.com/vdimir)). +* Buffer overflow (on read) was possible in `tokenbf_v1` full text index. The excessive bytes are not used but the read operation may lead to crash in rare cases. This closes [#19233](https://github.com/ClickHouse/ClickHouse/issues/19233). [#22421](https://github.com/ClickHouse/ClickHouse/pull/22421) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Do not limit HTTP chunk size. Fixes [#21907](https://github.com/ClickHouse/ClickHouse/issues/21907). [#22322](https://github.com/ClickHouse/ClickHouse/pull/22322) ([Ivan](https://github.com/abyss7)). +* Fixed a bug, which leads to underaggregation of data in case of enabled `optimize_aggregation_in_order` and many parts in table. Slightly improve performance of aggregation with enabled `optimize_aggregation_in_order`. [#21889](https://github.com/ClickHouse/ClickHouse/pull/21889) ([Anton Popov](https://github.com/CurtizJ)). +* Check if table function view is used as a column. This complements #20350. [#21465](https://github.com/ClickHouse/ClickHouse/pull/21465) ([Amos Bird](https://github.com/amosbird)). +* Fix "unknown column" error for tables with `Merge` engine in queris with `JOIN` and aggregation. Closes [#18368](https://github.com/ClickHouse/ClickHouse/issues/18368), close [#22226](https://github.com/ClickHouse/ClickHouse/issues/22226). [#21370](https://github.com/ClickHouse/ClickHouse/pull/21370) ([Vladimir](https://github.com/vdimir)). +* Fixed name clashes in pushdown optimization. It caused incorrect `WHERE` filtration after FULL JOIN. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)). +* Fixed very rare bug when quorum insert with `quorum_parallel=1` is not really "quorum" because of deduplication. [#18215](https://github.com/ClickHouse/ClickHouse/pull/18215) ([filimonov](https://github.com/filimonov) - reported, [alesapin](https://github.com/alesapin) - fixed). + +#### Build/Testing/Packaging Improvement + +* Run stateless tests in parallel in CI. [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)). +* Simplify debian packages. This fixes [#21698](https://github.com/ClickHouse/ClickHouse/issues/21698). [#22976](https://github.com/ClickHouse/ClickHouse/pull/22976) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added support for ClickHouse build on Apple M1. [#21639](https://github.com/ClickHouse/ClickHouse/pull/21639) ([changvvb](https://github.com/changvvb)). +* Fixed ClickHouse Keeper build for MacOS. [#22860](https://github.com/ClickHouse/ClickHouse/pull/22860) ([alesapin](https://github.com/alesapin)). +* Fixed some tests on AArch64 platform. [#22596](https://github.com/ClickHouse/ClickHouse/pull/22596) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added function alignment for possibly better performance. [#21431](https://github.com/ClickHouse/ClickHouse/pull/21431) ([Danila Kutenin](https://github.com/danlark1)). +* Adjust some tests to output identical results on amd64 and aarch64 (qemu). The result was depending on implementation specific CPU behaviour. [#22590](https://github.com/ClickHouse/ClickHouse/pull/22590) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow query profiling only on x86_64. See [#15174](https://github.com/ClickHouse/ClickHouse/issues/15174#issuecomment-812954965) and [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638#issuecomment-703805337). This closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#22580](https://github.com/ClickHouse/ClickHouse/pull/22580) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow building with unbundled xz (lzma) using `USE_INTERNAL_XZ_LIBRARY=OFF` CMake option. [#22571](https://github.com/ClickHouse/ClickHouse/pull/22571) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Enable bundled `openldap` on `ppc64le` [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Disable incompatible libraries (platform specific typically) on `ppc64le` [#22475](https://github.com/ClickHouse/ClickHouse/pull/22475) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Add Jepsen test in CI for clickhouse Keeper. [#22373](https://github.com/ClickHouse/ClickHouse/pull/22373) ([alesapin](https://github.com/alesapin)). +* Build `jemalloc` with support for [heap profiling](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Heap-Profiling). [#22834](https://github.com/ClickHouse/ClickHouse/pull/22834) ([nvartolomei](https://github.com/nvartolomei)). +* Avoid UB in `*Log` engines for rwlock unlock due to unlock from another thread. [#22583](https://github.com/ClickHouse/ClickHouse/pull/22583) ([Azat Khuzhin](https://github.com/azat)). +* Fixed UB by unlocking the rwlock of the TinyLog from the same thread. [#22560](https://github.com/ClickHouse/ClickHouse/pull/22560) ([Azat Khuzhin](https://github.com/azat)). + + +## ClickHouse release 21.4 + +### ClickHouse release 21.4.1 2021-04-12 + +#### Backward Incompatible Change + +* The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: `00:00:00..10:59:59`, `11:00:00..21:59:59` and `22:00:00..23:59:59`. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `Age` and `Precision` in graphite rollup configs should increase from retention to retention. Now it's checked and the wrong config raises an exception. [#21496](https://github.com/ClickHouse/ClickHouse/pull/21496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)). +* Column `keys` in table `system.dictionaries` was replaced to columns `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)). +* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**: + * `ATTACH PART[ITION]` queries may not work during cluster upgrade. + * It's not possible to rollback to older ClickHouse version after executing `ALTER ... ATTACH` query in new version as the old servers would fail to pass the `ATTACH_PART` entry in the replicated log. +* In this version, empty `` will block all access to remote hosts while in previous versions it did nothing. If you want to keep old behaviour and you have empty `remote_url_allow_hosts` element in configuration file, remove it. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)). + + +#### New Feature + +* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)). Not every time and date functions are working for extended range of dates. +* Added support of Kerberos authentication for preconfigured users and HTTP requests (GSS-SPNEGO). [#14995](https://github.com/ClickHouse/ClickHouse/pull/14995) ([Denis Glazachev](https://github.com/traceon)). +* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)). +* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `executable_pool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)). +* Added table function `dictionary`. It works the same way as `Dictionary` engine. Closes [#21560](https://github.com/ClickHouse/ClickHouse/issues/21560). [#21910](https://github.com/ClickHouse/ClickHouse/pull/21910) ([Maksim Kita](https://github.com/kitaisreal)). +* Support `Nullable` type for `PolygonDictionary` attribute. [#21890](https://github.com/ClickHouse/ClickHouse/pull/21890) ([Maksim Kita](https://github.com/kitaisreal)). +* Functions `dictGet`, `dictHas` use current database name if it is not specified for dictionaries created with DDL. Closes [#21632](https://github.com/ClickHouse/ClickHouse/issues/21632). [#21859](https://github.com/ClickHouse/ClickHouse/pull/21859) ([Maksim Kita](https://github.com/kitaisreal)). +* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)). +* Added async update in `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for `Nullable` type in `Cache`, `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for multiple attributes fetch with `dictGet`, `dictGetOrDefault` functions. Fixes [#21517](https://github.com/ClickHouse/ClickHouse/issues/21517). [#20595](https://github.com/ClickHouse/ClickHouse/pull/20595) ([Maksim Kita](https://github.com/kitaisreal)). +* Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)). +* Add function `timezoneOf` that returns the timezone name of `DateTime` or `DateTime64` data types. This does not close [#9959](https://github.com/ClickHouse/ClickHouse/issues/9959). Fix inconsistencies in function names: add aliases `timezone` and `timeZone` as well as `toTimezone` and `toTimeZone` and `timezoneOf` and `timeZoneOf`. [#22001](https://github.com/ClickHouse/ClickHouse/pull/22001) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add new optional clause `GRANTEES` for `CREATE/ALTER USER` commands. It specifies users or roles which are allowed to receive grants from this user on condition this user has also all required access granted with grant option. By default `GRANTEES ANY` is used which means a user with grant option can grant to anyone. Syntax: `CREATE USER ... GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]`. [#21641](https://github.com/ClickHouse/ClickHouse/pull/21641) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add new column `slowdowns_count` to `system.clusters`. When using hedged requests, it shows how many times we switched to another replica because this replica was responding slowly. Also show actual value of `errors_count` in `system.clusters`. [#21480](https://github.com/ClickHouse/ClickHouse/pull/21480) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `_partition_id` virtual column for `MergeTree*` engines. Allow to prune partitions by `_partition_id`. Add `partitionID()` function to calculate partition id string. [#21401](https://github.com/ClickHouse/ClickHouse/pull/21401) ([Amos Bird](https://github.com/amosbird)). +* Add function `isIPAddressInRange` to test if an IPv4 or IPv6 address is contained in a given CIDR network prefix. [#21329](https://github.com/ClickHouse/ClickHouse/pull/21329) ([PHO](https://github.com/depressed-pho)). +* Added new SQL command `ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'`. This command is needed to properly remove 'freezed' partitions from all disks. [#21142](https://github.com/ClickHouse/ClickHouse/pull/21142) ([Pavel Kovalenko](https://github.com/Jokser)). +* Supports implicit key type conversion for JOIN. [#19885](https://github.com/ClickHouse/ClickHouse/pull/19885) ([Vladimir](https://github.com/vdimir)). + +#### Experimental Feature + +* Support `RANGE OFFSET` frame (for window functions) for floating point types. Implement `lagInFrame`/`leadInFrame` window functions, which are analogous to `lag`/`lead`, but respect the window frame. They are identical when the frame is `between unbounded preceding and unbounded following`. This closes [#5485](https://github.com/ClickHouse/ClickHouse/issues/5485). [#21895](https://github.com/ClickHouse/ClickHouse/pull/21895) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Zero-copy replication for `ReplicatedMergeTree` over S3 storage. [#16240](https://github.com/ClickHouse/ClickHouse/pull/16240) ([ianton-ru](https://github.com/ianton-ru)). +* Added possibility to migrate existing S3 disk to the schema with backup-restore capabilities. [#22070](https://github.com/ClickHouse/ClickHouse/pull/22070) ([Pavel Kovalenko](https://github.com/Jokser)). + +#### Performance Improvement + +* Supported parallel formatting in `clickhouse-local` and everywhere else. [#21630](https://github.com/ClickHouse/ClickHouse/pull/21630) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support parallel parsing for `CSVWithNames` and `TSVWithNames` formats. This closes [#21085](https://github.com/ClickHouse/ClickHouse/issues/21085). [#21149](https://github.com/ClickHouse/ClickHouse/pull/21149) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Enable read with mmap IO for file ranges from 64 MiB (the settings `min_bytes_to_use_mmap_io`). It may lead to moderate performance improvement. [#22326](https://github.com/ClickHouse/ClickHouse/pull/22326) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add cache for files read with `min_bytes_to_use_mmap_io` setting. It makes significant (2x and more) performance improvement when the value of the setting is small by avoiding frequent mmap/munmap calls and the consequent page faults. Note that mmap IO has major drawbacks that makes it less reliable in production (e.g. hung or SIGBUS on faulty disks; less controllable memory usage). Nevertheless it is good in benchmarks. [#22206](https://github.com/ClickHouse/ClickHouse/pull/22206) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Avoid unnecessary data copy when using codec `NONE`. Please note that codec `NONE` is mostly useless - it's recommended to always use compression (`LZ4` is by default). Despite the common belief, disabling compression may not improve performance (the opposite effect is possible). The `NONE` codec is useful in some cases: - when data is uncompressable; - for synthetic benchmarks. [#22145](https://github.com/ClickHouse/ClickHouse/pull/22145) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Faster `GROUP BY` with small `max_rows_to_group_by` and `group_by_overflow_mode='any'`. [#21856](https://github.com/ClickHouse/ClickHouse/pull/21856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Optimize performance of queries like `SELECT ... FINAL ... WHERE`. Now in queries with `FINAL` it's allowed to move to `PREWHERE` columns, which are in sorting key. [#21830](https://github.com/ClickHouse/ClickHouse/pull/21830) ([foolchi](https://github.com/foolchi)). +* Improved performance by replacing `memcpy` to another implementation. This closes [#18583](https://github.com/ClickHouse/ClickHouse/issues/18583). [#21520](https://github.com/ClickHouse/ClickHouse/pull/21520) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of aggregation in order of sorting key (with enabled setting `optimize_aggregation_in_order`). [#19401](https://github.com/ClickHouse/ClickHouse/pull/19401) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement + +* Add connection pool for PostgreSQL table/database engine and dictionary source. Should fix [#21444](https://github.com/ClickHouse/ClickHouse/issues/21444). [#21839](https://github.com/ClickHouse/ClickHouse/pull/21839) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support non-default table schema for postgres storage/table-function. Closes [#21701](https://github.com/ClickHouse/ClickHouse/issues/21701). [#21711](https://github.com/ClickHouse/ClickHouse/pull/21711) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support replicas priority for postgres dictionary source. [#21710](https://github.com/ClickHouse/ClickHouse/pull/21710) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Introduce a new merge tree setting `min_bytes_to_rebalance_partition_over_jbod` which allows assigning new parts to different disks of a JBOD volume in a balanced way. [#16481](https://github.com/ClickHouse/ClickHouse/pull/16481) ([Amos Bird](https://github.com/amosbird)). +* Added `Grant`, `Revoke` and `System` values of `query_kind` column for corresponding queries in `system.query_log`. [#21102](https://github.com/ClickHouse/ClickHouse/pull/21102) ([Vasily Nemkov](https://github.com/Enmk)). +* Allow customizing timeouts for HTTP connections used for replication independently from other HTTP timeouts. [#20088](https://github.com/ClickHouse/ClickHouse/pull/20088) ([nvartolomei](https://github.com/nvartolomei)). +* Better exception message in client in case of exception while server is writing blocks. In previous versions client may get misleading message like `Data compressed with different methods`. [#22427](https://github.com/ClickHouse/ClickHouse/pull/22427) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error `Directory tmp_fetch_XXX already exists` which could happen after failed fetch part. Delete temporary fetch directory if it already exists. Fixes [#14197](https://github.com/ClickHouse/ClickHouse/issues/14197). [#22411](https://github.com/ClickHouse/ClickHouse/pull/22411) ([nvartolomei](https://github.com/nvartolomei)). +* Fix MSan report for function `range` with `UInt256` argument (support for large integers is experimental). This closes [#22157](https://github.com/ClickHouse/ClickHouse/issues/22157). [#22387](https://github.com/ClickHouse/ClickHouse/pull/22387) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `current_database` column to `system.processes` table. It contains the current database of the query. [#22365](https://github.com/ClickHouse/ClickHouse/pull/22365) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add case-insensitive history search/navigation and subword movement features to `clickhouse-client`. [#22105](https://github.com/ClickHouse/ClickHouse/pull/22105) ([Amos Bird](https://github.com/amosbird)). +* If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update used version of simdjson to 0.9.1. This fixes [#21984](https://github.com/ClickHouse/ClickHouse/issues/21984). [#22057](https://github.com/ClickHouse/ClickHouse/pull/22057) ([Vitaly Baranov](https://github.com/vitlibar)). +* Added case insensitive aliases for `CONNECTION_ID()` and `VERSION()` functions. This fixes [#22028](https://github.com/ClickHouse/ClickHouse/issues/22028). [#22042](https://github.com/ClickHouse/ClickHouse/pull/22042) ([Eugene Klimov](https://github.com/Slach)). +* Add option `strict_increase` to `windowFunnel` function to calculate each event once (resolve [#21835](https://github.com/ClickHouse/ClickHouse/issues/21835)). [#22025](https://github.com/ClickHouse/ClickHouse/pull/22025) ([Vladimir](https://github.com/vdimir)). +* If partition key of a `MergeTree` table does not include `Date` or `DateTime` columns but includes exactly one `DateTime64` column, expose its values in the `min_time` and `max_time` columns in `system.parts` and `system.parts_columns` tables. Add `min_time` and `max_time` columns to `system.parts_columns` table (these was inconsistency to the `system.parts` table). This closes [#18244](https://github.com/ClickHouse/ClickHouse/issues/18244). [#22011](https://github.com/ClickHouse/ClickHouse/pull/22011) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Supported `replication_alter_partitions_sync=1` setting in `clickhouse-copier` for moving partitions from helping table to destination. Decreased default timeouts. Fixes [#21911](https://github.com/ClickHouse/ClickHouse/issues/21911). [#21912](https://github.com/ClickHouse/ClickHouse/pull/21912) ([turbo jason](https://github.com/songenjie)). +* Show path to data directory of `EmbeddedRocksDB` tables in system tables. [#21903](https://github.com/ClickHouse/ClickHouse/pull/21903) ([tavplubix](https://github.com/tavplubix)). +* Add profile event `HedgedRequestsChangeReplica`, change read data timeout from sec to ms. [#21886](https://github.com/ClickHouse/ClickHouse/pull/21886) ([Kruglov Pavel](https://github.com/Avogar)). +* DiskS3 (experimental feature under development). Fixed bug with the impossibility to move directory if the destination is not empty and cache disk is used. [#21837](https://github.com/ClickHouse/ClickHouse/pull/21837) ([Pavel Kovalenko](https://github.com/Jokser)). +* Better formatting for `Array` and `Map` data types in Web UI. [#21798](https://github.com/ClickHouse/ClickHouse/pull/21798) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update clusters only if their configurations were updated. [#21685](https://github.com/ClickHouse/ClickHouse/pull/21685) ([Kruglov Pavel](https://github.com/Avogar)). +* Propagate query and session settings for distributed DDL queries. Set `distributed_ddl_entry_format_version` to 2 to enable this. Added `distributed_ddl_output_mode` setting. Supported modes: `none`, `throw` (default), `null_status_on_timeout` and `never_throw`. Miscellaneous fixes and improvements for `Replicated` database engine. [#21535](https://github.com/ClickHouse/ClickHouse/pull/21535) ([tavplubix](https://github.com/tavplubix)). +* If `PODArray` was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `last_error_time`/`last_error_message`/`last_error_stacktrace`/`remote` columns for `system.errors`. [#21529](https://github.com/ClickHouse/ClickHouse/pull/21529) ([Azat Khuzhin](https://github.com/azat)). +* Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes #21383. [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)). +* Add setting `optimize_skip_unused_shards_limit` to limit the number of sharding key values for `optimize_skip_unused_shards`. [#21512](https://github.com/ClickHouse/ClickHouse/pull/21512) ([Azat Khuzhin](https://github.com/azat)). +* Improve `clickhouse-format` to not throw exception when there are extra spaces or comment after the last query, and throw exception early with readable message when format `ASTInsertQuery` with data . [#21311](https://github.com/ClickHouse/ClickHouse/pull/21311) ([flynn](https://github.com/ucasFL)). +* Improve support of integer keys in data type `Map`. [#21157](https://github.com/ClickHouse/ClickHouse/pull/21157) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializeMySQL: attempt to reconnect to MySQL if the connection is lost. [#20961](https://github.com/ClickHouse/ClickHouse/pull/20961) ([Håvard Kvålen](https://github.com/havardk)). +* Support more cases to rewrite `CROSS JOIN` to `INNER JOIN`. [#20392](https://github.com/ClickHouse/ClickHouse/pull/20392) ([Vladimir](https://github.com/vdimir)). +* Do not create empty parts on INSERT when `optimize_on_insert` setting enabled. Fixes [#20304](https://github.com/ClickHouse/ClickHouse/issues/20304). [#20387](https://github.com/ClickHouse/ClickHouse/pull/20387) ([Kruglov Pavel](https://github.com/Avogar)). +* `MaterializeMySQL`: add minmax skipping index for `_version` column. [#20382](https://github.com/ClickHouse/ClickHouse/pull/20382) ([Stig Bakken](https://github.com/stigsb)). +* Add option `--backslash` for `clickhouse-format`, which can add a backslash at the end of each line of the formatted query. [#21494](https://github.com/ClickHouse/ClickHouse/pull/21494) ([flynn](https://github.com/ucasFL)). +* Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix + +* Remove socket from epoll before cancelling packet receiver in `HedgedConnections` to prevent possible race. Fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)). +* Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix exception which may happen when `SELECT` has constant `WHERE` condition and source table has columns which names are digits. [#22270](https://github.com/ClickHouse/ClickHouse/pull/22270) ([LiuNeng](https://github.com/liuneng1994)). +* Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)). +* Fix uncaught exception in `InterserverIOHTTPHandler`. [#22146](https://github.com/ClickHouse/ClickHouse/pull/22146) ([Azat Khuzhin](https://github.com/azat)). +* Fix docker entrypoint in case `http_port` is not in the config. [#22132](https://github.com/ClickHouse/ClickHouse/pull/22132) ([Ewout](https://github.com/devwout)). +* Fix error `Invalid number of rows in Chunk` in `JOIN` with `TOTALS` and `arrayJoin`. Closes [#19303](https://github.com/ClickHouse/ClickHouse/issues/19303). [#22129](https://github.com/ClickHouse/ClickHouse/pull/22129) ([Vladimir](https://github.com/vdimir)). +* Fix the background thread pool name which used to poll message from Kafka. The Kafka engine with the broken thread pool will not consume the message from message queue. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)). +* Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). +* Disable `async_socket_for_remote`/`use_hedged_requests` for buggy Linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)). +* Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). +* The function `decrypt` was lacking a check for the minimal size of data encrypted in `AEAD` mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)). +* Prevent hedged connections overlaps (`Unknown packet 9 from server` error). [#21941](https://github.com/ClickHouse/ClickHouse/pull/21941) ([Azat Khuzhin](https://github.com/azat)). +* Fix reading the HTTP POST request with "multipart/form-data" content type in some cases. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)). +* Fix wrong `ORDER BY` results when a query contains window functions, and optimization for reading in primary key order is applied. Fixes [#21828](https://github.com/ClickHouse/ClickHouse/issues/21828). [#21915](https://github.com/ClickHouse/ClickHouse/pull/21915) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix incorrect query result (and possible crash) which could happen when `WHERE` or `HAVING` condition is pushed before `GROUP BY`. Fixes [#21773](https://github.com/ClickHouse/ClickHouse/issues/21773). [#21841](https://github.com/ClickHouse/ClickHouse/pull/21841) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Better error handling and logging in `WriteBufferFromS3`. [#21836](https://github.com/ClickHouse/ClickHouse/pull/21836) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. This is a follow-up fix of [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) . Can only reproduced in production env. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)). +* Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896). [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)). +* Fix bug for `ReplicatedMerge` table engines when `ALTER MODIFY COLUMN` query doesn't change the type of `Decimal` column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). +* Fix possible infinite waiting when concurrent `OPTIMIZE` and `DROP` are run for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)). +* Fix function `arrayElement` with type `Map` for constant integer arguments. [#21699](https://github.com/ClickHouse/ClickHouse/pull/21699) ([Anton Popov](https://github.com/CurtizJ)). +* Fix SIGSEGV on not existing attributes from `ip_trie` with `access_to_key_from_attributes`. [#21692](https://github.com/ClickHouse/ClickHouse/pull/21692) ([Azat Khuzhin](https://github.com/azat)). +* Server now start accepting connections only after `DDLWorker` and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)). +* Add type conversion for keys of tables of type `Join` (previously led to SIGSEGV). [#21646](https://github.com/ClickHouse/ClickHouse/pull/21646) ([Azat Khuzhin](https://github.com/azat)). +* Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)). +* Fix `fsync_part_directory` for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)). +* Remove unknown columns from joined table in `WHERE` for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir](https://github.com/vdimir)). +* `std::terminate` was called if there is an error writing data into s3. [#21624](https://github.com/ClickHouse/ClickHouse/pull/21624) ([Vladimir](https://github.com/vdimir)). +* Fix possible error `Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)). +* In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). +* Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasFL)). +* Fix that S3 table holds old credentials after config update. [#21457](https://github.com/ClickHouse/ClickHouse/pull/21457) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). +* Fixed race on SSL object inside `SecureSocket` in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `Avro` format parsing for `Kafka`. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)). +* Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)). +* `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([tavplubix](https://github.com/tavplubix)). +* Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)). + +#### Build/Testing/Packaging Improvement + +* Add [Jepsen](https://github.com/jepsen-io/jepsen) tests for ClickHouse Keeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)). +* Run stateless tests in parallel in CI. Depends on [#22181](https://github.com/ClickHouse/ClickHouse/issues/22181). [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)). +* Enable status check for [SQLancer](https://github.com/sqlancer/sqlancer) CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)). +* Multiple preparations for PowerPC builds: Enable the bundled openldap on `ppc64le`. [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)). Enable compiling on `ppc64le` with Clang. [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix compiling boost on `ppc64le`. [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix CMake error about internal CMake variable `CMAKE_ASM_COMPILE_OBJECT` not set on `ppc64le`. [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix Fedora/RHEL/CentOS not finding `libclang_rt.builtins` on `ppc64le`. [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)). Enable building with `jemalloc` on `ppc64le`. [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix ClickHouse's config embedding and cctz's timezone embedding on `ppc64le`. [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)). Fixed compiling on `ppc64le` and use the correct instruction pointer register on `ppc64le`. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Re-enable the S3 (AWS) library on `aarch64`. [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Add `tzdata` to Docker containers because reading `ORC` formats requires it. This closes [#14156](https://github.com/ClickHouse/ClickHouse/issues/14156). [#22000](https://github.com/ClickHouse/ClickHouse/pull/22000) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Introduce 2 arguments for `clickhouse-server` image Dockerfile: `deb_location` & `single_binary_location`. [#21977](https://github.com/ClickHouse/ClickHouse/pull/21977) ([filimonov](https://github.com/filimonov)). +* Allow to use clang-tidy with release builds by enabling assertions if it is used. [#21914](https://github.com/ClickHouse/ClickHouse/pull/21914) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add llvm-12 binaries name to search in cmake scripts. Implicit constants conversions to mute clang warnings. Updated submodules to build with CMake 3.19. Mute recursion in macro expansion in `readpassphrase` library. Deprecated `-fuse-ld` changed to `--ld-path` for clang. [#21597](https://github.com/ClickHouse/ClickHouse/pull/21597) ([Ilya Yatsishin](https://github.com/qoega)). +* Updating `docker/test/testflows/runner/dockerd-entrypoint.sh` to use Yandex dockerhub-proxy, because Docker Hub has enabled very restrictive rate limits [#21551](https://github.com/ClickHouse/ClickHouse/pull/21551) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fix macOS shared lib build. [#20184](https://github.com/ClickHouse/ClickHouse/pull/20184) ([nvartolomei](https://github.com/nvartolomei)). +* Add `ctime` option to `zookeeper-dump-tree`. It allows to dump node creation time. [#21842](https://github.com/ClickHouse/ClickHouse/pull/21842) ([Ilya](https://github.com/HumanUser)). + + +## ClickHouse release 21.3 (LTS) + +### ClickHouse release v21.3, 2021-03-12 + +#### Backward Incompatible Change + +* Now it's not allowed to create MergeTree tables in old syntax with table TTL because it's just ignored. Attach of old tables is still possible. [#20282](https://github.com/ClickHouse/ClickHouse/pull/20282) ([alesapin](https://github.com/alesapin)). +* Now all case-insensitive function names will be rewritten to their canonical representations. This is needed for projection query routing (the upcoming feature). [#20174](https://github.com/ClickHouse/ClickHouse/pull/20174) ([Amos Bird](https://github.com/amosbird)). +* Fix creation of `TTL` in cases, when its expression is a function and it is the same as `ORDER BY` key. Now it's allowed to set custom aggregation to primary key columns in `TTL` with `GROUP BY`. Backward incompatible: For primary key columns, which are not in `GROUP BY` and aren't set explicitly now is applied function `any` instead of `max`, when TTL is expired. Also if you use TTL with `WHERE` or `GROUP BY` you can see exceptions at merges, while making rolling update. [#15450](https://github.com/ClickHouse/ClickHouse/pull/15450) ([Anton Popov](https://github.com/CurtizJ)). + +#### New Feature + +* Add file engine settings: `engine_file_empty_if_not_exists` and `engine_file_truncate_on_insert`. [#20620](https://github.com/ClickHouse/ClickHouse/pull/20620) ([M0r64n](https://github.com/M0r64n)). +* Add aggregate function `deltaSum` for summing the differences between consecutive rows. [#20057](https://github.com/ClickHouse/ClickHouse/pull/20057) ([Russ Frank](https://github.com/rf)). +* New `event_time_microseconds` column in `system.part_log` table. [#20027](https://github.com/ClickHouse/ClickHouse/pull/20027) ([Bharat Nallan](https://github.com/bharatnc)). +* Added `timezoneOffset(datetime)` function which will give the offset from UTC in seconds. This close [#issue:19850](https://github.com/ClickHouse/ClickHouse/issues/19850). [#19962](https://github.com/ClickHouse/ClickHouse/pull/19962) ([keenwolf](https://github.com/keen-wolf)). +* Add setting `insert_shard_id` to support insert data into specific shard from distributed table. [#19961](https://github.com/ClickHouse/ClickHouse/pull/19961) ([flynn](https://github.com/ucasFL)). +* Function `reinterpretAs` updated to support big integers. Fixes [#19691](https://github.com/ClickHouse/ClickHouse/issues/19691). [#19858](https://github.com/ClickHouse/ClickHouse/pull/19858) ([Maksim Kita](https://github.com/kitaisreal)). +* Added Server Side Encryption Customer Keys (the `x-amz-server-side-encryption-customer-(key/md5)` header) support in S3 client. See [the link](https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html). Closes [#19428](https://github.com/ClickHouse/ClickHouse/issues/19428). [#19748](https://github.com/ClickHouse/ClickHouse/pull/19748) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Added `implicit_key` option for `executable` dictionary source. It allows to avoid printing key for every record if records comes in the same order as the input keys. Implements [#14527](https://github.com/ClickHouse/ClickHouse/issues/14527). [#19677](https://github.com/ClickHouse/ClickHouse/pull/19677) ([Maksim Kita](https://github.com/kitaisreal)). +* Add quota type `query_selects` and `query_inserts`. [#19603](https://github.com/ClickHouse/ClickHouse/pull/19603) ([JackyWoo](https://github.com/JackyWoo)). +* Add function `extractTextFromHTML` [#19600](https://github.com/ClickHouse/ClickHouse/pull/19600) ([zlx19950903](https://github.com/zlx19950903)), ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Tables with `MergeTree*` engine now have two new table-level settings for query concurrency control. Setting `max_concurrent_queries` limits the number of concurrently executed queries which are related to this table. Setting `min_marks_to_honor_max_concurrent_queries` tells to apply previous setting only if query reads at least this number of marks. [#19544](https://github.com/ClickHouse/ClickHouse/pull/19544) ([Amos Bird](https://github.com/amosbird)). +* Added `file` function to read file from user_files directory as a String. This is different from the `file` table function. This implements [#issue:18851](https://github.com/ClickHouse/ClickHouse/issues/18851). [#19204](https://github.com/ClickHouse/ClickHouse/pull/19204) ([keenwolf](https://github.com/keen-wolf)). + +#### Experimental feature + +* Add experimental `Replicated` database engine. It replicates DDL queries across multiple hosts. [#16193](https://github.com/ClickHouse/ClickHouse/pull/16193) ([tavplubix](https://github.com/tavplubix)). +* Introduce experimental support for window functions, enabled with `allow_experimental_window_functions = 1`. This is a preliminary, alpha-quality implementation that is not suitable for production use and will change in backward-incompatible ways in future releases. Please see [the documentation](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/sql-reference/window-functions/index.md#experimental-window-functions) for the list of supported features. [#20337](https://github.com/ClickHouse/ClickHouse/pull/20337) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add the ability to backup/restore metadata files for DiskS3. [#18377](https://github.com/ClickHouse/ClickHouse/pull/18377) ([Pavel Kovalenko](https://github.com/Jokser)). + +#### Performance Improvement + +* Hedged requests for remote queries. When setting `use_hedged_requests` enabled (off by default), allow to establish many connections with different replicas for query. New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout` or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`); other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported. [#19291](https://github.com/ClickHouse/ClickHouse/pull/19291) ([Kruglov Pavel](https://github.com/Avogar)). This allows to significantly reduce tail latencies on very large clusters. +* Added support for `PREWHERE` (and enable the corresponding optimization) when tables have row-level security expressions specified. [#19576](https://github.com/ClickHouse/ClickHouse/pull/19576) ([Denis Glazachev](https://github.com/traceon)). +* The setting `distributed_aggregation_memory_efficient` is enabled by default. It will lower memory usage and improve performance of distributed queries. [#20599](https://github.com/ClickHouse/ClickHouse/pull/20599) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of GROUP BY multiple fixed size keys. [#20472](https://github.com/ClickHouse/ClickHouse/pull/20472) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of aggregate functions by more strict aliasing. [#19946](https://github.com/ClickHouse/ClickHouse/pull/19946) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Speed up reading from `Memory` tables in extreme cases (when reading speed is in order of 50 GB/sec) by simplification of pipeline and (consequently) less lock contention in pipeline scheduling. [#20468](https://github.com/ClickHouse/ClickHouse/pull/20468) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Partially reimplement HTTP server to make it making less copies of incoming and outgoing data. It gives up to 1.5 performance improvement on inserting long records over HTTP. [#19516](https://github.com/ClickHouse/ClickHouse/pull/19516) ([Ivan](https://github.com/abyss7)). +* Add `compress` setting for `Memory` tables. If it's enabled the table will use less RAM. On some machines and datasets it can also work faster on SELECT, but it is not always the case. This closes [#20093](https://github.com/ClickHouse/ClickHouse/issues/20093). Note: there are reasons why Memory tables can work slower than MergeTree: (1) lack of compression (2) static size of blocks (3) lack of indices and prewhere... [#20168](https://github.com/ClickHouse/ClickHouse/pull/20168) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Slightly better code in aggregation. [#20978](https://github.com/ClickHouse/ClickHouse/pull/20978) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add back `intDiv`/`modulo` specializations for better performance. This fixes [#21293](https://github.com/ClickHouse/ClickHouse/issues/21293) . The regression was introduced in https://github.com/ClickHouse/ClickHouse/pull/18145 . [#21307](https://github.com/ClickHouse/ClickHouse/pull/21307) ([Amos Bird](https://github.com/amosbird)). +* Do not squash blocks too much on INSERT SELECT if inserting into Memory table. In previous versions inefficient data representation was created in Memory table after INSERT SELECT. This closes [#13052](https://github.com/ClickHouse/ClickHouse/issues/13052). [#20169](https://github.com/ClickHouse/ClickHouse/pull/20169) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix at least one case when DataType parser may have exponential complexity (found by fuzzer). This closes [#20096](https://github.com/ClickHouse/ClickHouse/issues/20096). [#20132](https://github.com/ClickHouse/ClickHouse/pull/20132) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Parallelize SELECT with FINAL for single part with level > 0 when `do_not_merge_across_partitions_select_final` setting is 1. [#19375](https://github.com/ClickHouse/ClickHouse/pull/19375) ([Kruglov Pavel](https://github.com/Avogar)). +* Fill only requested columns when querying `system.parts` and `system.parts_columns`. Closes [#19570](https://github.com/ClickHouse/ClickHouse/issues/19570). [#21035](https://github.com/ClickHouse/ClickHouse/pull/21035) ([Anmol Arora](https://github.com/anmolarora)). +* Perform algebraic optimizations of arithmetic expressions inside `avg` aggregate function. close [#20092](https://github.com/ClickHouse/ClickHouse/issues/20092). [#20183](https://github.com/ClickHouse/ClickHouse/pull/20183) ([flynn](https://github.com/ucasFL)). + +#### Improvement + +* Case-insensitive compression methods for table functions. Also fixed LZMA compression method which was checked in upper case. [#21416](https://github.com/ClickHouse/ClickHouse/pull/21416) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add two settings to delay or throw error during insertion when there are too many inactive parts. This is useful when server fails to clean up parts quickly enough. [#20178](https://github.com/ClickHouse/ClickHouse/pull/20178) ([Amos Bird](https://github.com/amosbird)). +* Provide better compatibility for mysql clients. 1. mysql jdbc 2. mycli. [#21367](https://github.com/ClickHouse/ClickHouse/pull/21367) ([Amos Bird](https://github.com/amosbird)). +* Forbid to drop a column if it's referenced by materialized view. Closes [#21164](https://github.com/ClickHouse/ClickHouse/issues/21164). [#21303](https://github.com/ClickHouse/ClickHouse/pull/21303) ([flynn](https://github.com/ucasFL)). +* MySQL dictionary source will now retry unexpected connection failures (Lost connection to MySQL server during query) which sometimes happen on SSL/TLS connections. [#21237](https://github.com/ClickHouse/ClickHouse/pull/21237) ([Alexander Kazakov](https://github.com/Akazz)). +* Usability improvement: more consistent `DateTime64` parsing: recognize the case when unix timestamp with subsecond resolution is specified as scaled integer (like `1111111111222` instead of `1111111111.222`). This closes [#13194](https://github.com/ClickHouse/ClickHouse/issues/13194). [#21053](https://github.com/ClickHouse/ClickHouse/pull/21053) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Do only merging of sorted blocks on initiator with distributed_group_by_no_merge. [#20882](https://github.com/ClickHouse/ClickHouse/pull/20882) ([Azat Khuzhin](https://github.com/azat)). +* When loading config for mysql source ClickHouse will now randomize the list of replicas with the same priority to ensure the round-robin logics of picking mysql endpoint. This closes [#20629](https://github.com/ClickHouse/ClickHouse/issues/20629). [#20632](https://github.com/ClickHouse/ClickHouse/pull/20632) ([Alexander Kazakov](https://github.com/Akazz)). +* Function 'reinterpretAs(x, Type)' renamed into 'reinterpret(x, Type)'. [#20611](https://github.com/ClickHouse/ClickHouse/pull/20611) ([Maksim Kita](https://github.com/kitaisreal)). +* Support vhost for RabbitMQ engine [#20576](https://github.com/ClickHouse/ClickHouse/issues/20576). [#20596](https://github.com/ClickHouse/ClickHouse/pull/20596) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improved serialization for data types combined of Arrays and Tuples. Improved matching enum data types to protobuf enum type. Fixed serialization of the `Map` data type. Omitted values are now set by default. [#20506](https://github.com/ClickHouse/ClickHouse/pull/20506) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed race between execution of distributed DDL tasks and cleanup of DDL queue. Now DDL task cannot be removed from ZooKeeper if there are active workers. Fixes [#20016](https://github.com/ClickHouse/ClickHouse/issues/20016). [#20448](https://github.com/ClickHouse/ClickHouse/pull/20448) ([tavplubix](https://github.com/tavplubix)). +* Make FQDN and other DNS related functions work correctly in alpine images. [#20336](https://github.com/ClickHouse/ClickHouse/pull/20336) ([filimonov](https://github.com/filimonov)). +* Do not allow early constant folding of explicitly forbidden functions. [#20303](https://github.com/ClickHouse/ClickHouse/pull/20303) ([Azat Khuzhin](https://github.com/azat)). +* Implicit conversion from integer to Decimal type might succeeded if integer value doe not fit into Decimal type. Now it throws `ARGUMENT_OUT_OF_BOUND`. [#20232](https://github.com/ClickHouse/ClickHouse/pull/20232) ([tavplubix](https://github.com/tavplubix)). +* Lockless `SYSTEM FLUSH DISTRIBUTED`. [#20215](https://github.com/ClickHouse/ClickHouse/pull/20215) ([Azat Khuzhin](https://github.com/azat)). +* Normalize count(constant), sum(1) to count(). This is needed for projection query routing. [#20175](https://github.com/ClickHouse/ClickHouse/pull/20175) ([Amos Bird](https://github.com/amosbird)). +* Support all native integer types in bitmap functions. [#20171](https://github.com/ClickHouse/ClickHouse/pull/20171) ([Amos Bird](https://github.com/amosbird)). +* Updated `CacheDictionary`, `ComplexCacheDictionary`, `SSDCacheDictionary`, `SSDComplexKeyDictionary` to use LRUHashMap as underlying index. [#20164](https://github.com/ClickHouse/ClickHouse/pull/20164) ([Maksim Kita](https://github.com/kitaisreal)). +* The setting `access_management` is now configurable on startup by providing `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT`, defaults to disabled (`0`) which was the prior value. [#20139](https://github.com/ClickHouse/ClickHouse/pull/20139) ([Marquitos](https://github.com/sonirico)). +* Fix toDateTime64(toDate()/toDateTime()) for DateTime64 - Implement DateTime64 clamping to match DateTime behaviour. [#20131](https://github.com/ClickHouse/ClickHouse/pull/20131) ([Azat Khuzhin](https://github.com/azat)). +* Quota improvements: SHOW TABLES is now considered as one query in the quota calculations, not two queries. SYSTEM queries now consume quota. Fix calculation of interval's end in quota consumption. [#20106](https://github.com/ClickHouse/ClickHouse/pull/20106) ([Vitaly Baranov](https://github.com/vitlibar)). +* Supports `path IN (set)` expressions for `system.zookeeper` table. [#20105](https://github.com/ClickHouse/ClickHouse/pull/20105) ([小路](https://github.com/nicelulu)). +* Show full details of `MaterializeMySQL` tables in `system.tables`. [#20051](https://github.com/ClickHouse/ClickHouse/pull/20051) ([Stig Bakken](https://github.com/stigsb)). +* Fix data race in executable dictionary that was possible only on misuse (when the script returns data ignoring its input). [#20045](https://github.com/ClickHouse/ClickHouse/pull/20045) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* The value of MYSQL_OPT_RECONNECT option can now be controlled by "opt_reconnect" parameter in the config section of mysql replica. [#19998](https://github.com/ClickHouse/ClickHouse/pull/19998) ([Alexander Kazakov](https://github.com/Akazz)). +* If user calls `JSONExtract` function with `Float32` type requested, allow inaccurate conversion to the result type. For example the number `0.1` in JSON is double precision and is not representable in Float32, but the user still wants to get it. Previous versions return 0 for non-Nullable type and NULL for Nullable type to indicate that conversion is imprecise. The logic was 100% correct but it was surprising to users and leading to questions. This closes [#13962](https://github.com/ClickHouse/ClickHouse/issues/13962). [#19960](https://github.com/ClickHouse/ClickHouse/pull/19960) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add conversion of block structure for INSERT into Distributed tables if it does not match. [#19947](https://github.com/ClickHouse/ClickHouse/pull/19947) ([Azat Khuzhin](https://github.com/azat)). +* Improvement for the `system.distributed_ddl_queue` table. Initialize MaxDDLEntryID to the last value after restarting. Before this PR, MaxDDLEntryID will remain zero until a new DDLTask is processed. [#19924](https://github.com/ClickHouse/ClickHouse/pull/19924) ([Amos Bird](https://github.com/amosbird)). +* Show `MaterializeMySQL` tables in `system.parts`. [#19770](https://github.com/ClickHouse/ClickHouse/pull/19770) ([Stig Bakken](https://github.com/stigsb)). +* Add separate config directive for `Buffer` profile. [#19721](https://github.com/ClickHouse/ClickHouse/pull/19721) ([Azat Khuzhin](https://github.com/azat)). +* Move conditions that are not related to JOIN to WHERE clause. [#18720](https://github.com/ClickHouse/ClickHouse/issues/18720). [#19685](https://github.com/ClickHouse/ClickHouse/pull/19685) ([hexiaoting](https://github.com/hexiaoting)). +* Add ability to throttle INSERT into Distributed based on amount of pending bytes for async send (`bytes_to_delay_insert`/`max_delay_to_insert` and `bytes_to_throw_insert` settings for `Distributed` engine has been added). [#19673](https://github.com/ClickHouse/ClickHouse/pull/19673) ([Azat Khuzhin](https://github.com/azat)). +* Fix some rare cases when write errors can be ignored in destructors. [#19451](https://github.com/ClickHouse/ClickHouse/pull/19451) ([Azat Khuzhin](https://github.com/azat)). +* Print inline frames in stack traces for fatal errors. [#19317](https://github.com/ClickHouse/ClickHouse/pull/19317) ([Ivan](https://github.com/abyss7)). + +#### Bug Fix + +* Fix redundant reconnects to ZooKeeper and the possibility of two active sessions for a single clickhouse server. Both problems introduced in #14678. [#21264](https://github.com/ClickHouse/ClickHouse/pull/21264) ([alesapin](https://github.com/alesapin)). +* Fix error `Bad cast from type ... to DB::ColumnLowCardinality` while inserting into table with `LowCardinality` column from `Values` format. Fixes #21140 [#21357](https://github.com/ClickHouse/ClickHouse/pull/21357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a deadlock in `ALTER DELETE` mutations for non replicated MergeTree table engines when the predicate contains the table itself. Fixes [#20558](https://github.com/ClickHouse/ClickHouse/issues/20558). [#21477](https://github.com/ClickHouse/ClickHouse/pull/21477) ([alesapin](https://github.com/alesapin)). +* Fix SIGSEGV for distributed queries on failures. [#21434](https://github.com/ClickHouse/ClickHouse/pull/21434) ([Azat Khuzhin](https://github.com/azat)). +* Now `ALTER MODIFY COLUMN` queries will correctly affect changes in partition key, skip indices, TTLs, and so on. Fixes [#13675](https://github.com/ClickHouse/ClickHouse/issues/13675). [#21334](https://github.com/ClickHouse/ClickHouse/pull/21334) ([alesapin](https://github.com/alesapin)). +* Fix bug with `join_use_nulls` and joining `TOTALS` from subqueries. This closes [#19362](https://github.com/ClickHouse/ClickHouse/issues/19362) and [#21137](https://github.com/ClickHouse/ClickHouse/issues/21137). [#21248](https://github.com/ClickHouse/ClickHouse/pull/21248) ([vdimir](https://github.com/vdimir)). +* Fix crash in `EXPLAIN` for query with `UNION`. Fixes [#20876](https://github.com/ClickHouse/ClickHouse/issues/20876), [#21170](https://github.com/ClickHouse/ClickHouse/issues/21170). [#21246](https://github.com/ClickHouse/ClickHouse/pull/21246) ([flynn](https://github.com/ucasFL)). +* Now mutations allowed only for table engines that support them (MergeTree family, Memory, MaterializedView). Other engines will report a more clear error. Fixes [#21168](https://github.com/ClickHouse/ClickHouse/issues/21168). [#21183](https://github.com/ClickHouse/ClickHouse/pull/21183) ([alesapin](https://github.com/alesapin)). +* Fixes [#21112](https://github.com/ClickHouse/ClickHouse/issues/21112). Fixed bug that could cause duplicates with insert query (if one of the callbacks came a little too late). [#21138](https://github.com/ClickHouse/ClickHouse/pull/21138) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix `input_format_null_as_default` take effective when types are nullable. This fixes [#21116](https://github.com/ClickHouse/ClickHouse/issues/21116) . [#21121](https://github.com/ClickHouse/ClickHouse/pull/21121) ([Amos Bird](https://github.com/amosbird)). +* fix bug related to cast Tuple to Map. Closes [#21029](https://github.com/ClickHouse/ClickHouse/issues/21029). [#21120](https://github.com/ClickHouse/ClickHouse/pull/21120) ([hexiaoting](https://github.com/hexiaoting)). +* Fix the metadata leak when the Replicated*MergeTree with custom (non default) ZooKeeper cluster is dropped. [#21119](https://github.com/ClickHouse/ClickHouse/pull/21119) ([fastio](https://github.com/fastio)). +* Fix type mismatch issue when using LowCardinality keys in joinGet. This fixes [#21114](https://github.com/ClickHouse/ClickHouse/issues/21114). [#21117](https://github.com/ClickHouse/ClickHouse/pull/21117) ([Amos Bird](https://github.com/amosbird)). +* fix default_replica_path and default_replica_name values are useless on Replicated(*)MergeTree engine when the engine needs specify other parameters. [#21060](https://github.com/ClickHouse/ClickHouse/pull/21060) ([mxzlxy](https://github.com/mxzlxy)). +* Out of bound memory access was possible when formatting specifically crafted out of range value of type `DateTime64`. This closes [#20494](https://github.com/ClickHouse/ClickHouse/issues/20494). This closes [#20543](https://github.com/ClickHouse/ClickHouse/issues/20543). [#21023](https://github.com/ClickHouse/ClickHouse/pull/21023) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Block parallel insertions into storage join. [#21009](https://github.com/ClickHouse/ClickHouse/pull/21009) ([vdimir](https://github.com/vdimir)). +* Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). +* Closes [#9969](https://github.com/ClickHouse/ClickHouse/issues/9969). Fixed Brotli http compression error, which reproduced for large data sizes, slightly complicated structure and with json output format. Update Brotli to the latest version to include the "fix rare access to uninitialized data in ring-buffer". [#20991](https://github.com/ClickHouse/ClickHouse/pull/20991) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix 'Empty task was returned from async task queue' on query cancellation. [#20881](https://github.com/ClickHouse/ClickHouse/pull/20881) ([Azat Khuzhin](https://github.com/azat)). +* `USE database;` query did not work when using MySQL 5.7 client to connect to ClickHouse server, it's fixed. Fixes [#18926](https://github.com/ClickHouse/ClickHouse/issues/18926). [#20878](https://github.com/ClickHouse/ClickHouse/pull/20878) ([tavplubix](https://github.com/tavplubix)). +* Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). +* Fix subquery with union distinct and limit clause. close [#20597](https://github.com/ClickHouse/ClickHouse/issues/20597). [#20610](https://github.com/ClickHouse/ClickHouse/pull/20610) ([flynn](https://github.com/ucasFL)). +* Fixed inconsistent behavior of dictionary in case of queries where we look for absent keys in dictionary. [#20578](https://github.com/ClickHouse/ClickHouse/pull/20578) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix the number of threads for scalar subqueries and subqueries for index (after [#19007](https://github.com/ClickHouse/ClickHouse/issues/19007) single thread was always used). Fixes [#20457](https://github.com/ClickHouse/ClickHouse/issues/20457), [#20512](https://github.com/ClickHouse/ClickHouse/issues/20512). [#20550](https://github.com/ClickHouse/ClickHouse/pull/20550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash which could happen if unknown packet was received from remove query (was introduced in [#17868](https://github.com/ClickHouse/ClickHouse/issues/17868)). [#20547](https://github.com/ClickHouse/ClickHouse/pull/20547) ([Azat Khuzhin](https://github.com/azat)). +* Add proper checks while parsing directory names for async INSERT (fixes SIGSEGV). [#20498](https://github.com/ClickHouse/ClickHouse/pull/20498) ([Azat Khuzhin](https://github.com/azat)). +* Fix function `transform` does not work properly for floating point keys. Closes [#20460](https://github.com/ClickHouse/ClickHouse/issues/20460). [#20479](https://github.com/ClickHouse/ClickHouse/pull/20479) ([flynn](https://github.com/ucasFL)). +* Fix infinite loop when propagating WITH aliases to subqueries. This fixes [#20388](https://github.com/ClickHouse/ClickHouse/issues/20388). [#20476](https://github.com/ClickHouse/ClickHouse/pull/20476) ([Amos Bird](https://github.com/amosbird)). +* Fix abnormal server termination when http client goes away. [#20464](https://github.com/ClickHouse/ClickHouse/pull/20464) ([Azat Khuzhin](https://github.com/azat)). +* Fix `LOGICAL_ERROR` for `join_use_nulls=1` when JOIN contains const from SELECT. [#20461](https://github.com/ClickHouse/ClickHouse/pull/20461) ([Azat Khuzhin](https://github.com/azat)). +* Check if table function `view` is used in expression list and throw an error. This fixes [#20342](https://github.com/ClickHouse/ClickHouse/issues/20342). [#20350](https://github.com/ClickHouse/ClickHouse/pull/20350) ([Amos Bird](https://github.com/amosbird)). +* Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). +* Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect result of binary operations between two constant decimals of different scale. Fixes [#20283](https://github.com/ClickHouse/ClickHouse/issues/20283). [#20339](https://github.com/ClickHouse/ClickHouse/pull/20339) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix too often retries of failed background tasks for `ReplicatedMergeTree` table engines family. This could lead to too verbose logging and increased CPU load. Fixes [#20203](https://github.com/ClickHouse/ClickHouse/issues/20203). [#20335](https://github.com/ClickHouse/ClickHouse/pull/20335) ([alesapin](https://github.com/alesapin)). +* Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). +* Fixed the behavior when in case of broken JSON we tried to read the whole file into memory which leads to exception from the allocator. Fixes [#19719](https://github.com/ClickHouse/ClickHouse/issues/19719). [#20286](https://github.com/ClickHouse/ClickHouse/pull/20286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix exception during vertical merge for `MergeTree` table engines family which don't allow to perform vertical merges. Fixes [#20259](https://github.com/ClickHouse/ClickHouse/issues/20259). [#20279](https://github.com/ClickHouse/ClickHouse/pull/20279) ([alesapin](https://github.com/alesapin)). +* Fix rare server crash on config reload during the shutdown. Fixes [#19689](https://github.com/ClickHouse/ClickHouse/issues/19689). [#20224](https://github.com/ClickHouse/ClickHouse/pull/20224) ([alesapin](https://github.com/alesapin)). +* Fix CTE when using in INSERT SELECT. This fixes [#20187](https://github.com/ClickHouse/ClickHouse/issues/20187), fixes [#20195](https://github.com/ClickHouse/ClickHouse/issues/20195). [#20211](https://github.com/ClickHouse/ClickHouse/pull/20211) ([Amos Bird](https://github.com/amosbird)). +* Fixes [#19314](https://github.com/ClickHouse/ClickHouse/issues/19314). [#20156](https://github.com/ClickHouse/ClickHouse/pull/20156) ([Ivan](https://github.com/abyss7)). +* fix toMinute function to handle special timezone correctly. [#20149](https://github.com/ClickHouse/ClickHouse/pull/20149) ([keenwolf](https://github.com/keen-wolf)). +* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). +* The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). +* Bugfix in StorageJoin. [#20079](https://github.com/ClickHouse/ClickHouse/pull/20079) ([vdimir](https://github.com/vdimir)). +* Fix the case when calculating modulo of division of negative number by small divisor, the resulting data type was not large enough to accomodate the negative result. This closes [#20052](https://github.com/ClickHouse/ClickHouse/issues/20052). [#20067](https://github.com/ClickHouse/ClickHouse/pull/20067) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* MaterializeMySQL: Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). +* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)). +* Fix a bug that moving pieces to destination table may failed in case of launching multiple clickhouse-copiers. [#19743](https://github.com/ClickHouse/ClickHouse/pull/19743) ([madianjun](https://github.com/mdianjun)). +* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). + +#### Build/Testing/Packaging Improvement + +* Allow to build ClickHouse with AVX-2 enabled globally. It gives slight performance benefits on modern CPUs. Not recommended for production and will not be supported as official build for now. [#20180](https://github.com/ClickHouse/ClickHouse/pull/20180) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix some of the issues found by Coverity. See [#19964](https://github.com/ClickHouse/ClickHouse/issues/19964). [#20010](https://github.com/ClickHouse/ClickHouse/pull/20010) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to start up with modified binary under gdb. In previous version if you set up breakpoint in gdb before start, server will refuse to start up due to failed integrity check. [#21258](https://github.com/ClickHouse/ClickHouse/pull/21258) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add a test for different compression methods in Kafka. [#21111](https://github.com/ClickHouse/ClickHouse/pull/21111) ([filimonov](https://github.com/filimonov)). +* Fixed port clash from test_storage_kerberized_hdfs test. [#19974](https://github.com/ClickHouse/ClickHouse/pull/19974) ([Ilya Yatsishin](https://github.com/qoega)). +* Print `stdout` and `stderr` to log when failed to start docker in integration tests. Before this PR there was a very short error message in this case which didn't help to investigate the problems. [#20631](https://github.com/ClickHouse/ClickHouse/pull/20631) ([Vitaly Baranov](https://github.com/vitlibar)). + + +## ClickHouse release 21.2 + +### ClickHouse release v21.2.2.8-stable, 2021-02-07 + +#### Backward Incompatible Change + +* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)). +* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory tracking for `OPTIMIZE TABLE`/merges; account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)). +* Disallow floating point column as partition key, see [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421#event-4147046255). [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)). +* Excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. + +#### New Feature + +* Added `PostgreSQL` table engine (both select/insert, with support for multidimensional arrays), also as table function. Added `PostgreSQL` dictionary source. Added `PostgreSQL` database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)). +* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)). +* Adds a new table called `system.distributed_ddl_queue` that displays the queries in the DDL worker queue. [#17656](https://github.com/ClickHouse/ClickHouse/pull/17656) ([Bharat Nallan](https://github.com/bharatnc)). +* Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)). +* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasFL)). +* Add function `decodeXMLComponent` to decode characters for XML. Example: `SELECT decodeXMLComponent('Hello,"world"!')` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)). +* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasFL)). +* Add information about used features (functions, table engines, etc) into system.query_log. [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Function `formatDateTime` support the `%Q` modification to format date to quarter. [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Support MetaKey+Enter hotkey binding in play UI. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)). +* Add three functions for map data type: 1. `mapContains(map, key)` to check weather map.keys include the second parameter key. 2. `mapKeys(map)` return all the keys in Array format 3. `mapValues(map)` return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)). +* Add `log_comment` setting related to [#18494](https://github.com/ClickHouse/ClickHouse/issues/18494). [#18549](https://github.com/ClickHouse/ClickHouse/pull/18549) ([Zijie Lu](https://github.com/TszKitLo40)). +* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)). +* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasFL)). + +#### Performance Improvement + +* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Aliases declared in `WITH` statement are properly used in index analysis. Queries like `WITH column AS alias SELECT ... WHERE alias = ...` may use index now. [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896) ([Amos Bird](https://github.com/amosbird)). +* Add `optimize_alias_column_prediction` (on by default), that will: - Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes; - Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count; - Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)). +* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update libc++ and use another ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Danila Kutenin](https://github.com/danlark1)). +* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasFL)). +* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)). +* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Reduce lock contention for multiple layers of the `Buffer` engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)). +* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Improvement + +* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Set charset to `utf8mb4` when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)). +* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)). +* Dictionary: better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)). +* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable function length/empty/notEmpty for datatype Map, which returns keys number in Map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([taiyang-li](https://github.com/taiyang-li)). +* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)). +* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve MySQL compatibility by making more functions case insensitive and adding aliases. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)). +* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)). +* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)). +* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). +* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([tavplubix](https://github.com/tavplubix)). +* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)). +* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)). +* Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender). Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)). +* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)). +* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)). +* Allow CTE (Common Table Expressions) to be further aliased. Propagate CSE (Common Subexpressions Elimination) to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)). +* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)). +* In case of unexpected exceptions automatically restart background thread which is responsible for execution of distributed DDL queries. Fixes [#17991](https://github.com/ClickHouse/ClickHouse/issues/17991). [#18285](https://github.com/ClickHouse/ClickHouse/pull/18285) ([徐炘](https://github.com/weeds085490)). +* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix + +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). +* `MaterializeMySQL` (experimental feature): Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). +* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([tavplubix](https://github.com/tavplubix)). +* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([vdimir](https://github.com/vdimir)). +* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)). +* Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix issue in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix minor issue in JOIN: Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). +* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Fixed issue [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)). +* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* ATTACH PARTITION will reset mutations. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). +* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Fix data type convert issue for MySQL engine. [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)). + + +#### Build/Testing/Packaging Improvement + +* Run [SQLancer](https://twitter.com/RiggerManuel/status/1352345625480884228) (logical SQL fuzzer) in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)). +* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add integration tests run with MSan. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)). +* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)). +* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)). +* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)). +* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)). +* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)). +* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)). +* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +## ClickHouse release 21.1 + +### ClickHouse release v21.1.3.32-stable, 2021-02-03 + +#### Bug Fix + +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)). +* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)). +* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)). +* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)). +* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)). +* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). + + + +### ClickHouse release v21.1.2.15-stable 2021-01-18 + +#### Backward Incompatible Change + +* The setting `input_format_null_as_default` is enabled by default. [#17525](https://github.com/ClickHouse/ClickHouse/pull/17525) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Check settings constraints for profile settings from config. Server will fail to start if users.xml contain settings that do not meet corresponding constraints. [#18486](https://github.com/ClickHouse/ClickHouse/pull/18486) ([tavplubix](https://github.com/tavplubix)). +* Restrict `ALTER MODIFY SETTING` from changing storage settings that affects data parts (`write_final_mark` and `enable_mixed_granularity_parts`). [#18306](https://github.com/ClickHouse/ClickHouse/pull/18306) ([Amos Bird](https://github.com/amosbird)). +* Set `insert_quorum_parallel` to 1 by default. It is significantly more convenient to use than "sequential" quorum inserts. But if you rely to sequential consistency, you should set the setting back to zero. [#17567](https://github.com/ClickHouse/ClickHouse/pull/17567) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove `sumburConsistentHash` function. This closes [#18120](https://github.com/ClickHouse/ClickHouse/issues/18120). [#18656](https://github.com/ClickHouse/ClickHouse/pull/18656) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Removed aggregate functions `timeSeriesGroupSum`, `timeSeriesGroupRateSum` because a friend of mine said they never worked. This fixes [#16869](https://github.com/ClickHouse/ClickHouse/issues/16869). If you have luck using these functions, write a email to clickhouse-feedback@yandex-team.com. [#17423](https://github.com/ClickHouse/ClickHouse/pull/17423) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Prohibit toUnixTimestamp(Date()) (before it just returns UInt16 representation of Date). [#17376](https://github.com/ClickHouse/ClickHouse/pull/17376) ([Azat Khuzhin](https://github.com/azat)). +* Allow using extended integer types (`Int128`, `Int256`, `UInt256`) in `avg` and `avgWeighted` functions. Also allow using different types (integer, decimal, floating point) for value and for weight in `avgWeighted` function. This is a backward-incompatible change: now the `avg` and `avgWeighted` functions always return `Float64` (as documented). Before this change the return type for `Decimal` arguments was also `Decimal`. [#15419](https://github.com/ClickHouse/ClickHouse/pull/15419) ([Mike](https://github.com/myrrc)). +* Expression `toUUID(N)` no longer works. Replace with `toUUID('00000000-0000-0000-0000-000000000000')`. This change is motivated by non-obvious results of `toUUID(N)` where N is non zero. +* SSL Certificates with incorrect "key usage" are rejected. In previous versions they are used to work. See [#19262](https://github.com/ClickHouse/ClickHouse/issues/19262). +* `incl` references to substitutions file (`/etc/metrika.xml`) were removed from the default config (``, ``, ``, ``, ``). If you were using substitutions file and were relying on those implicit references, you should put them back manually and explicitly by adding corresponding sections with `incl="..."` attributes before the update. See [#18740](https://github.com/ClickHouse/ClickHouse/pull/18740) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### New Feature + +* Implement gRPC protocol in ClickHouse. [#15111](https://github.com/ClickHouse/ClickHouse/pull/15111) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow to use multiple zookeeper clusters. [#17070](https://github.com/ClickHouse/ClickHouse/pull/17070) ([fastio](https://github.com/fastio)). +* Implemented `REPLACE TABLE` and `CREATE OR REPLACE TABLE` queries. [#18521](https://github.com/ClickHouse/ClickHouse/pull/18521) ([tavplubix](https://github.com/tavplubix)). +* Implement `UNION DISTINCT` and treat the plain `UNION` clause as `UNION DISTINCT` by default. Add a setting `union_default_mode` that allows to treat it as `UNION ALL` or require explicit mode specification. [#16338](https://github.com/ClickHouse/ClickHouse/pull/16338) ([flynn](https://github.com/ucasFL)). +* Added function `accurateCastOrNull`. This closes [#10290](https://github.com/ClickHouse/ClickHouse/issues/10290). Add type conversions in `x IN (subquery)` expressions. This closes [#10266](https://github.com/ClickHouse/ClickHouse/issues/10266). [#16724](https://github.com/ClickHouse/ClickHouse/pull/16724) ([Maksim Kita](https://github.com/kitaisreal)). +* IP Dictionary supports `IPv4` / `IPv6` types directly. [#17571](https://github.com/ClickHouse/ClickHouse/pull/17571) ([vdimir](https://github.com/vdimir)). +* IP Dictionary supports key fetching. Resolves [#18241](https://github.com/ClickHouse/ClickHouse/issues/18241). [#18480](https://github.com/ClickHouse/ClickHouse/pull/18480) ([vdimir](https://github.com/vdimir)). +* Add `*.zst` compression/decompression support for data import and export. It enables using `*.zst` in `file()` function and `Content-encoding: zstd` in HTTP client. This closes [#16791 ](https://github.com/ClickHouse/ClickHouse/issues/16791). [#17144](https://github.com/ClickHouse/ClickHouse/pull/17144) ([Abi Palagashvili](https://github.com/fibersel)). +* Added `mannWitneyUTest`, `studentTTest` and `welchTTest` aggregate functions. Refactored `rankCorr` a bit. [#16883](https://github.com/ClickHouse/ClickHouse/pull/16883) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add functions `countMatches`/`countMatchesCaseInsensitive`. [#17459](https://github.com/ClickHouse/ClickHouse/pull/17459) ([Azat Khuzhin](https://github.com/azat)). +* Implement `countSubstrings()`/`countSubstringsCaseInsensitive()`/`countSubstringsCaseInsensitiveUTF8()` (Count the number of substring occurrences). [#17347](https://github.com/ClickHouse/ClickHouse/pull/17347) ([Azat Khuzhin](https://github.com/azat)). +* Add information about used databases, tables and columns in system.query_log. Add `query_kind` and `normalized_query_hash` fields. [#17726](https://github.com/ClickHouse/ClickHouse/pull/17726) ([Amos Bird](https://github.com/amosbird)). +* Add a setting `optimize_on_insert`. When enabled, do the same transformation for INSERTed block of data as if merge was done on this block (e.g. Replacing, Collapsing, Aggregating...). This setting is enabled by default. This can influence Materialized View and MaterializeMySQL behaviour (see detailed description). This closes [#10683](https://github.com/ClickHouse/ClickHouse/issues/10683). [#16954](https://github.com/ClickHouse/ClickHouse/pull/16954) ([Kruglov Pavel](https://github.com/Avogar)). +* Kerberos Authenticaiton for HDFS. [#16621](https://github.com/ClickHouse/ClickHouse/pull/16621) ([Ilya Golshtein](https://github.com/ilejn)). +* Support `SHOW SETTINGS` statement to show parameters in system.settings. `SHOW CHANGED SETTINGS` and `LIKE/ILIKE` clause are also supported. [#18056](https://github.com/ClickHouse/ClickHouse/pull/18056) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Function `position` now supports `POSITION(needle IN haystack)` synax for SQL compatibility. This closes [#18701](https://github.com/ClickHouse/ClickHouse/issues/18701). ... [#18779](https://github.com/ClickHouse/ClickHouse/pull/18779) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Now we have a new storage setting `max_partitions_to_read` for tables in the MergeTree family. It limits the max number of partitions that can be accessed in one query. A user setting `force_max_partition_limit` is also added to enforce this constraint. [#18712](https://github.com/ClickHouse/ClickHouse/pull/18712) ([Amos Bird](https://github.com/amosbird)). +* Add `query_id` column to `system.part_log` for inserted parts. Closes [#10097](https://github.com/ClickHouse/ClickHouse/issues/10097). [#18644](https://github.com/ClickHouse/ClickHouse/pull/18644) ([flynn](https://github.com/ucasFL)). +* Allow create table as select with columns specification. Example `CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;`. [#18060](https://github.com/ClickHouse/ClickHouse/pull/18060) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `arrayMin`, `arrayMax`, `arrayAvg` aggregation functions. [#18032](https://github.com/ClickHouse/ClickHouse/pull/18032) ([Maksim Kita](https://github.com/kitaisreal)). +* Implemented `ATTACH TABLE name FROM 'path/to/data/' (col1 Type1, ...` query. It creates new table with provided structure and attaches table data from provided directory in `user_files`. [#17903](https://github.com/ClickHouse/ClickHouse/pull/17903) ([tavplubix](https://github.com/tavplubix)). +* Add mutation support for StorageMemory. This closes [#9117](https://github.com/ClickHouse/ClickHouse/issues/9117). [#15127](https://github.com/ClickHouse/ClickHouse/pull/15127) ([flynn](https://github.com/ucasFL)). +* Support syntax `EXISTS DATABASE name`. [#18458](https://github.com/ClickHouse/ClickHouse/pull/18458) ([Du Chuan](https://github.com/spongedu)). +* Support builtin function `isIPv4String` && `isIPv6String` like [MySQL](https://github.com/ClickHouse/ClickHouse/compare/master...spongedu:support_is_ipv4?expand=1). [#18349](https://github.com/ClickHouse/ClickHouse/pull/18349) ([Du Chuan](https://github.com/spongedu)). +* Add a new setting `insert_distributed_one_random_shard = 1` to allow insertion into multi-sharded distributed table without any distributed key. [#18294](https://github.com/ClickHouse/ClickHouse/pull/18294) ([Amos Bird](https://github.com/amosbird)). +* Add settings `min_compress_block_size` and `max_compress_block_size` to MergeTreeSettings, which have higher priority than the global settings and take effect when they are set. close [13890](https://github.com/ClickHouse/ClickHouse/issues/13890). [#17867](https://github.com/ClickHouse/ClickHouse/pull/17867) ([flynn](https://github.com/ucasFL)). +* Add support for 64bit roaring bitmaps. [#17858](https://github.com/ClickHouse/ClickHouse/pull/17858) ([Andy Yang](https://github.com/andyyzh)). +* Extended `OPTIMIZE ... DEDUPLICATE` syntax to allow explicit (or implicit with asterisk/column transformers) list of columns to check for duplicates on. ... [#17846](https://github.com/ClickHouse/ClickHouse/pull/17846) ([Vasily Nemkov](https://github.com/Enmk)). +* Added functions `toModifiedJulianDay`, `fromModifiedJulianDay`, `toModifiedJulianDayOrNull`, and `fromModifiedJulianDayOrNull`. These functions convert between Proleptic Gregorian calendar date and Modified Julian Day number. [#17750](https://github.com/ClickHouse/ClickHouse/pull/17750) ([PHO](https://github.com/depressed-pho)). +* Add ability to use custom TLD list: added functions `firstSignificantSubdomainCustom`, `cutToFirstSignificantSubdomainCustom`. [#17748](https://github.com/ClickHouse/ClickHouse/pull/17748) ([Azat Khuzhin](https://github.com/azat)). +* Add support for `PROXYv1` protocol to wrap native TCP interface. Allow quotas to be keyed by proxy-forwarded IP address (applied for `PROXYv1` address and for `X-Forwarded-For` from HTTP interface). This is useful when you provide access to ClickHouse only via trusted proxy (e.g. CloudFlare) but want to account user resources by their original IP addresses. This fixes [#17268](https://github.com/ClickHouse/ClickHouse/issues/17268). [#17707](https://github.com/ClickHouse/ClickHouse/pull/17707) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now clickhouse-client supports opening `EDITOR` to edit commands. `Alt-Shift-E`. [#17665](https://github.com/ClickHouse/ClickHouse/pull/17665) ([Amos Bird](https://github.com/amosbird)). +* Add function `encodeXMLComponent` to escape characters to place string into XML text node or attribute. [#17659](https://github.com/ClickHouse/ClickHouse/pull/17659) ([nauta](https://github.com/nautaa)). +* Introduce `DETACH TABLE/VIEW ... PERMANENTLY` syntax, so that after restarting the table does not reappear back automatically on restart (only by explicit request). The table can still be attached back using the short syntax ATTACH TABLE. Implements [#5555](https://github.com/ClickHouse/ClickHouse/issues/5555). Fixes [#13850](https://github.com/ClickHouse/ClickHouse/issues/13850). [#17642](https://github.com/ClickHouse/ClickHouse/pull/17642) ([filimonov](https://github.com/filimonov)). +* Add asynchronous metrics on total amount of rows, bytes and parts in MergeTree tables. This fix [#11714](https://github.com/ClickHouse/ClickHouse/issues/11714). [#17639](https://github.com/ClickHouse/ClickHouse/pull/17639) ([flynn](https://github.com/ucasFL)). +* Add settings `limit` and `offset` for out-of-SQL pagination: [#16176](https://github.com/ClickHouse/ClickHouse/issues/16176) They are useful for building APIs. These two settings will affect SELECT query as if it is added like `select * from (your_original_select_query) t limit xxx offset xxx;`. [#17633](https://github.com/ClickHouse/ClickHouse/pull/17633) ([hexiaoting](https://github.com/hexiaoting)). +* Provide a new aggregator combinator : `-SimpleState` to build `SimpleAggregateFunction` types via query. It's useful for defining MaterializedView of AggregatingMergeTree engine, and will benefit projections too. [#16853](https://github.com/ClickHouse/ClickHouse/pull/16853) ([Amos Bird](https://github.com/amosbird)). +* Added `queries-file` parameter for `clickhouse-client` and `clickhouse-local`. [#15930](https://github.com/ClickHouse/ClickHouse/pull/15930) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `query` parameter for `clickhouse-benchmark`. [#17832](https://github.com/ClickHouse/ClickHouse/pull/17832) ([Maksim Kita](https://github.com/kitaisreal)). +* `EXPLAIN AST` now support queries other then `SELECT`. [#18136](https://github.com/ClickHouse/ClickHouse/pull/18136) ([taiyang-li](https://github.com/taiyang-li)). + + +#### Experimental Feature + +* Added functions for calculation of minHash and simHash of text n-grams and shingles. They are intended for semi-duplicate search. Also functions `bitHammingDistance` and `tupleHammingDistance` are added. [#7649](https://github.com/ClickHouse/ClickHouse/pull/7649) ([flynn](https://github.com/ucasFL)). +* Add new data type `Map`. See [#1841](https://github.com/ClickHouse/ClickHouse/issues/1841). First version for Map only supports `String` type of key and value. [#15806](https://github.com/ClickHouse/ClickHouse/pull/15806) ([hexiaoting](https://github.com/hexiaoting)). +* Implement alternative SQL parser based on ANTLR4 runtime and generated from EBNF grammar. [#11298](https://github.com/ClickHouse/ClickHouse/pull/11298) ([Ivan](https://github.com/abyss7)). + + +#### Performance Improvement + +* New IP Dictionary implementation with lower memory consumption, improved performance for some cases, and fixed bugs. [#16804](https://github.com/ClickHouse/ClickHouse/pull/16804) ([vdimir](https://github.com/vdimir)). +* Parallel formatting for data export. [#11617](https://github.com/ClickHouse/ClickHouse/pull/11617) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* LDAP integration: Added `verification_cooldown` parameter in LDAP server connection configuration to allow caching of successful "bind" attempts for configurable period of time. [#15988](https://github.com/ClickHouse/ClickHouse/pull/15988) ([Denis Glazachev](https://github.com/traceon)). +* Add `--no-system-table` option for `clickhouse-local` to run without system tables. This avoids initialization of `DateLUT` that may take noticeable amount of time (tens of milliseconds) at startup. [#18899](https://github.com/ClickHouse/ClickHouse/pull/18899) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Replace `PODArray` with `PODArrayWithStackMemory` in `AggregateFunctionWindowFunnelData` to improve `windowFunnel` function performance. [#18817](https://github.com/ClickHouse/ClickHouse/pull/18817) ([flynn](https://github.com/ucasFL)). +* Don't send empty blocks to shards on synchronous INSERT into Distributed table. This closes [#14571](https://github.com/ClickHouse/ClickHouse/issues/14571). [#18775](https://github.com/ClickHouse/ClickHouse/pull/18775) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Optimized read for StorageMemory. [#18052](https://github.com/ClickHouse/ClickHouse/pull/18052) ([Maksim Kita](https://github.com/kitaisreal)). +* Using Dragonbox algorithm for float to string conversion instead of ryu. This improves performance of float to string conversion significantly. [#17831](https://github.com/ClickHouse/ClickHouse/pull/17831) ([Maksim Kita](https://github.com/kitaisreal)). +* Speedup `IPv6CIDRToRange` implementation. [#17569](https://github.com/ClickHouse/ClickHouse/pull/17569) ([vdimir](https://github.com/vdimir)). +* Add `remerge_sort_lowered_memory_bytes_ratio` setting (If memory usage after remerge does not reduced by this ratio, remerge will be disabled). [#17539](https://github.com/ClickHouse/ClickHouse/pull/17539) ([Azat Khuzhin](https://github.com/azat)). +* Improve performance of AggregatingMergeTree with SimpleAggregateFunction(String) in PK. [#17109](https://github.com/ClickHouse/ClickHouse/pull/17109) ([Azat Khuzhin](https://github.com/azat)). +* Now the `-If` combinator is devirtualized, and `count` is properly vectorized. It is for [this PR](https://github.com/ClickHouse/ClickHouse/pull/17041). [#17043](https://github.com/ClickHouse/ClickHouse/pull/17043) ([Amos Bird](https://github.com/amosbird)). +* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). +* Improved performance of function `repeat`. [#16937](https://github.com/ClickHouse/ClickHouse/pull/16937) ([satanson](https://github.com/satanson)). +* Slightly improved performance of float parsing. [#16809](https://github.com/ClickHouse/ClickHouse/pull/16809) ([Maksim Kita](https://github.com/kitaisreal)). +* Add possibility to skip merged partitions for `OPTIMIZE TABLE ... FINAL`. [#15939](https://github.com/ClickHouse/ClickHouse/pull/15939) ([Kruglov Pavel](https://github.com/Avogar)). +* Integrate with [fast_float from Daniel Lemire](https://github.com/lemire/fast_float) to parse floating point numbers. [#16787](https://github.com/ClickHouse/ClickHouse/pull/16787) ([Maksim Kita](https://github.com/kitaisreal)). It is not enabled, because performance its performance is still lower than rough float parser in ClickHouse. +* Fix max_distributed_connections (affects `prefer_localhost_replica = 1` and `max_threads != max_distributed_connections`). [#17848](https://github.com/ClickHouse/ClickHouse/pull/17848) ([Azat Khuzhin](https://github.com/azat)). +* Adaptive choice of single/multi part upload when sending data to S3. Single part upload is controlled by a new setting `max_single_part_upload_size`. [#17934](https://github.com/ClickHouse/ClickHouse/pull/17934) ([Pavel Kovalenko](https://github.com/Jokser)). +* Support for async tasks in `PipelineExecutor`. Initial support of async sockets for remote queries. [#17868](https://github.com/ClickHouse/ClickHouse/pull/17868) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow to use `optimize_move_to_prewhere` optimization with compact parts, when sizes of columns are unknown. [#17330](https://github.com/ClickHouse/ClickHouse/pull/17330) ([Anton Popov](https://github.com/CurtizJ)). + + +#### Improvement + +* Avoid deadlock when executing INSERT SELECT into itself from a table with `TinyLog` or `Log` table engines. This closes [#6802](https://github.com/ClickHouse/ClickHouse/issues/6802). This closes [#18691](https://github.com/ClickHouse/ClickHouse/issues/18691). This closes [#16812](https://github.com/ClickHouse/ClickHouse/issues/16812). This closes [#14570](https://github.com/ClickHouse/ClickHouse/issues/14570). [#15260](https://github.com/ClickHouse/ClickHouse/pull/15260) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support `SHOW CREATE VIEW name` syntax like [MySQL](https://dev.mysql.com/doc/refman/5.7/en/show-create-view.html). [#18095](https://github.com/ClickHouse/ClickHouse/pull/18095) ([Du Chuan](https://github.com/spongedu)). +* All queries of type `Decimal * Float` or vice versa are allowed, including aggregate ones (e.g. `SELECT sum(decimal_field * 1.1)` or `SELECT dec_col * float_col`), the result type is Float32 or Float64. [#18145](https://github.com/ClickHouse/ClickHouse/pull/18145) ([Mike](https://github.com/myrrc)). +* Improved minimal Web UI: add history; add sharing support; avoid race condition of different requests; add request in-flight and ready indicators; add favicon; detect Ctrl+Enter if textarea is not in focus. [#17293](https://github.com/ClickHouse/ClickHouse/pull/17293) [#17770](https://github.com/ClickHouse/ClickHouse/pull/17770) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* clickhouse-server didn't send `close` request to ZooKeeper server. [#16837](https://github.com/ClickHouse/ClickHouse/pull/16837) ([alesapin](https://github.com/alesapin)). +* Avoid server abnormal termination in case of too low memory limits (`max_memory_usage = 1` / `max_untracked_memory = 1`). [#17453](https://github.com/ClickHouse/ClickHouse/pull/17453) ([Azat Khuzhin](https://github.com/azat)). +* Fix non-deterministic result of `windowFunnel` function in case of same timestamp for different events. [#18884](https://github.com/ClickHouse/ClickHouse/pull/18884) ([Fuwang Hu](https://github.com/fuwhu)). +* Docker: Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server Docker images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). +* Asynchronous INSERTs to `Distributed` tables: Two new settings (by analogy with MergeTree family) has been added: - `fsync_after_insert` - Do fsync for every inserted. Will decreases performance of inserts. - `fsync_directories` - Do fsync for temporary directory (that is used for async INSERT only) after all operations (writes, renames, etc.). [#18864](https://github.com/ClickHouse/ClickHouse/pull/18864) ([Azat Khuzhin](https://github.com/azat)). +* `SYSTEM KILL` command started to work in Docker. This closes [#18847](https://github.com/ClickHouse/ClickHouse/issues/18847). [#18848](https://github.com/ClickHouse/ClickHouse/pull/18848) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Expand macros in the zk path when executing `FETCH PARTITION`. [#18839](https://github.com/ClickHouse/ClickHouse/pull/18839) ([fastio](https://github.com/fastio)). +* Apply `ALTER TABLE ON CLUSTER MODIFY SETTING ...` to all replicas. Because we don't replicate such alter commands. [#18789](https://github.com/ClickHouse/ClickHouse/pull/18789) ([Amos Bird](https://github.com/amosbird)). +* Allow column transformer `EXCEPT` to accept a string as regular expression matcher. This resolves [#18685](https://github.com/ClickHouse/ClickHouse/issues/18685) . [#18699](https://github.com/ClickHouse/ClickHouse/pull/18699) ([Amos Bird](https://github.com/amosbird)). +* Fix SimpleAggregateFunction in SummingMergeTree. Now it works like AggregateFunction. In previous versions values were summed together regardless to the aggregate function. This fixes [#18564](https://github.com/ClickHouse/ClickHouse/issues/18564) . [#8052](https://github.com/ClickHouse/ClickHouse/issues/8052). [#18637](https://github.com/ClickHouse/ClickHouse/pull/18637) ([Amos Bird](https://github.com/amosbird)). Another fix of using `SimpleAggregateFunction` in `SummingMergeTree`. This fixes [#18676](https://github.com/ClickHouse/ClickHouse/issues/18676) . [#18677](https://github.com/ClickHouse/ClickHouse/pull/18677) ([Amos Bird](https://github.com/amosbird)). +* Fixed assertion error inside allocator in case when last argument of function bar is NaN. Now simple ClickHouse's exception is being thrown. This fixes [#17876](https://github.com/ClickHouse/ClickHouse/issues/17876). [#18520](https://github.com/ClickHouse/ClickHouse/pull/18520) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix usability issue: no newline after exception message in some tools. [#18444](https://github.com/ClickHouse/ClickHouse/pull/18444) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add ability to modify primary and partition key column type from `LowCardinality(Type)` to `Type` and vice versa. Also add an ability to modify primary key column type from `EnumX ` to `IntX` type. Fixes [#5604](https://github.com/ClickHouse/ClickHouse/issues/5604). [#18362](https://github.com/ClickHouse/ClickHouse/pull/18362) ([alesapin](https://github.com/alesapin)). +* Implement `untuple` field access. [#18133](https://github.com/ClickHouse/ClickHouse/issues/18133). [#18309](https://github.com/ClickHouse/ClickHouse/pull/18309) ([hexiaoting](https://github.com/hexiaoting)). +* Allow to parse Array fields from CSV if it is represented as a string containing array that was serialized as nested CSV. Example: `"[""Hello"", ""world"", ""42"""" TV""]"` will parse as `['Hello', 'world', '42" TV']`. Allow to parse array in CSV in a string without enclosing braces. Example: `"'Hello', 'world', '42"" TV'"` will parse as `['Hello', 'world', '42" TV']`. [#18271](https://github.com/ClickHouse/ClickHouse/pull/18271) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make better adaptive granularity calculation for merge tree wide parts. [#18223](https://github.com/ClickHouse/ClickHouse/pull/18223) ([alesapin](https://github.com/alesapin)). +* Now `clickhouse install` could work on Mac. The problem was that there is no procfs on this platform. [#18201](https://github.com/ClickHouse/ClickHouse/pull/18201) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Better hints for `SHOW ...` query syntax. [#18183](https://github.com/ClickHouse/ClickHouse/pull/18183) ([Du Chuan](https://github.com/spongedu)). +* Array aggregation `arrayMin`, `arrayMax`, `arraySum`, `arrayAvg` support for `Int128`, `Int256`, `UInt256`. [#18147](https://github.com/ClickHouse/ClickHouse/pull/18147) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `disk` to Set and Join storage settings. [#18112](https://github.com/ClickHouse/ClickHouse/pull/18112) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). +* Access control: Now table function `merge()` requires current user to have `SELECT` privilege on each table it receives data from. This PR fixes [#16964](https://github.com/ClickHouse/ClickHouse/issues/16964). [#18104](https://github.com/ClickHouse/ClickHouse/pull/18104) [#17983](https://github.com/ClickHouse/ClickHouse/pull/17983) ([Vitaly Baranov](https://github.com/vitlibar)). +* Temporary tables are visible in the system tables `system.tables` and `system.columns` now only in those session where they have been created. The internal database `_temporary_and_external_tables` is now hidden in those system tables; temporary tables are shown as tables with empty database with the `is_temporary` flag set instead. [#18014](https://github.com/ClickHouse/ClickHouse/pull/18014) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix clickhouse-client rendering issue when the size of terminal window changes. [#18009](https://github.com/ClickHouse/ClickHouse/pull/18009) ([Amos Bird](https://github.com/amosbird)). +* Decrease log verbosity of the events when the client drops the connection from Warning to Information. [#18005](https://github.com/ClickHouse/ClickHouse/pull/18005) ([filimonov](https://github.com/filimonov)). +* Forcibly removing empty or bad metadata files from filesystem for DiskS3. S3 is an experimental feature. [#17935](https://github.com/ClickHouse/ClickHouse/pull/17935) ([Pavel Kovalenko](https://github.com/Jokser)). +* Access control: `allow_introspection_functions=0` prohibits usage of introspection functions but doesn't prohibit giving grants for them anymore (the grantee will need to set `allow_introspection_functions=1` for himself to be able to use that grant). Similarly `allow_ddl=0` prohibits usage of DDL commands but doesn't prohibit giving grants for them anymore. [#17908](https://github.com/ClickHouse/ClickHouse/pull/17908) ([Vitaly Baranov](https://github.com/vitlibar)). +* Usability improvement: hints for column names. [#17112](https://github.com/ClickHouse/ClickHouse/issues/17112). [#17857](https://github.com/ClickHouse/ClickHouse/pull/17857) ([fastio](https://github.com/fastio)). +* Add diagnostic information when two merge tables try to read each other's data. [#17854](https://github.com/ClickHouse/ClickHouse/pull/17854) ([徐炘](https://github.com/weeds085490)). +* Let the possibility to override timeout value for running script using the ClickHouse docker image. [#17818](https://github.com/ClickHouse/ClickHouse/pull/17818) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Check system log tables' engine definition grammar to prevent some configuration errors. Notes that this grammar check is not semantical, that means such mistakes as non-existent columns / expression functions would be not found out util the table is created. [#17739](https://github.com/ClickHouse/ClickHouse/pull/17739) ([Du Chuan](https://github.com/spongedu)). +* Removed exception throwing at `RabbitMQ` table initialization if there was no connection (it will be reconnecting in the background). [#17709](https://github.com/ClickHouse/ClickHouse/pull/17709) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Do not ignore server memory limits during Buffer flush. [#17646](https://github.com/ClickHouse/ClickHouse/pull/17646) ([Azat Khuzhin](https://github.com/azat)). +* Switch to patched version of RocksDB (from ClickHouse-Extras) to fix use-after-free error. [#17643](https://github.com/ClickHouse/ClickHouse/pull/17643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added an offset to exception message for parallel parsing. This fixes [#17457](https://github.com/ClickHouse/ClickHouse/issues/17457). [#17641](https://github.com/ClickHouse/ClickHouse/pull/17641) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Don't throw "Too many parts" error in the middle of INSERT query. [#17566](https://github.com/ClickHouse/ClickHouse/pull/17566) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow query parameters in UPDATE statement of ALTER query. Fixes [#10976](https://github.com/ClickHouse/ClickHouse/issues/10976). [#17563](https://github.com/ClickHouse/ClickHouse/pull/17563) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Query obfuscator: avoid usage of some SQL keywords for identifier names. [#17526](https://github.com/ClickHouse/ClickHouse/pull/17526) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Export current max ddl entry executed by DDLWorker via server metric. It's useful to check if DDLWorker hangs somewhere. [#17464](https://github.com/ClickHouse/ClickHouse/pull/17464) ([Amos Bird](https://github.com/amosbird)). +* Export asynchronous metrics of all servers current threads. It's useful to track down issues like [this](https://github.com/ClickHouse-Extras/poco/pull/28). [#17463](https://github.com/ClickHouse/ClickHouse/pull/17463) ([Amos Bird](https://github.com/amosbird)). +* Include dynamic columns like MATERIALIZED / ALIAS for wildcard query when settings `asterisk_include_materialized_columns` and `asterisk_include_alias_columns` are turned on. [#17462](https://github.com/ClickHouse/ClickHouse/pull/17462) ([Ken Chen](https://github.com/chenziliang)). +* Allow specifying [TTL](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#mergetree-table-ttl) to remove old entries from [system log tables](https://clickhouse.com/docs/en/operations/system-tables/), using the `` attribute in `config.xml`. [#17438](https://github.com/ClickHouse/ClickHouse/pull/17438) ([Du Chuan](https://github.com/spongedu)). +* Now queries coming to the server via MySQL and PostgreSQL protocols have distinctive interface types (which can be seen in the `interface` column of the table`system.query_log`): `4` for MySQL, and `5` for PostgreSQL, instead of formerly used `1` which is now used for the native protocol only. [#17437](https://github.com/ClickHouse/ClickHouse/pull/17437) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix parsing of SETTINGS clause of the `INSERT ... SELECT ... SETTINGS` query. [#17414](https://github.com/ClickHouse/ClickHouse/pull/17414) ([Azat Khuzhin](https://github.com/azat)). +* Correctly account memory in RadixSort. [#17412](https://github.com/ClickHouse/ClickHouse/pull/17412) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add eof check in `receiveHello` in server to prevent getting `Attempt to read after eof` exception. [#17365](https://github.com/ClickHouse/ClickHouse/pull/17365) ([Kruglov Pavel](https://github.com/Avogar)). +* Avoid possible stack overflow in bigint conversion. Big integers are experimental. [#17269](https://github.com/ClickHouse/ClickHouse/pull/17269) ([flynn](https://github.com/ucasFL)). +* Now `set` indices will work with `GLOBAL IN`. This fixes [#17232](https://github.com/ClickHouse/ClickHouse/issues/17232) , [#5576](https://github.com/ClickHouse/ClickHouse/issues/5576) . [#17253](https://github.com/ClickHouse/ClickHouse/pull/17253) ([Amos Bird](https://github.com/amosbird)). +* Add limit for http redirects in request to S3 storage (`s3_max_redirects`). [#17220](https://github.com/ClickHouse/ClickHouse/pull/17220) ([ianton-ru](https://github.com/ianton-ru)). +* When `-OrNull` combinator combined `-If`, `-Merge`, `-MergeState`, `-State` combinators, we should put `-OrNull` in front. [#16935](https://github.com/ClickHouse/ClickHouse/pull/16935) ([flynn](https://github.com/ucasFL)). +* Support HTTP proxy and HTTPS S3 endpoint configuration. [#16861](https://github.com/ClickHouse/ClickHouse/pull/16861) ([Pavel Kovalenko](https://github.com/Jokser)). +* Added proper authentication using environment, `~/.aws` and `AssumeRole` for S3 client. [#16856](https://github.com/ClickHouse/ClickHouse/pull/16856) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add more OpenTelemetry spans. Add an example of how to export the span data to Zipkin. [#16535](https://github.com/ClickHouse/ClickHouse/pull/16535) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Cache dictionaries: Completely eliminate callbacks and locks for acquiring them. Keys are not divided into "not found" and "expired", but stored in the same map during query. [#14958](https://github.com/ClickHouse/ClickHouse/pull/14958) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix never worked `fsync_part_directory`/`fsync_after_insert`/`in_memory_parts_insert_sync` (experimental feature). [#18845](https://github.com/ClickHouse/ClickHouse/pull/18845) ([Azat Khuzhin](https://github.com/azat)). +* Allow using `Atomic` engine for nested database of `MaterializeMySQL` engine. [#14849](https://github.com/ClickHouse/ClickHouse/pull/14849) ([tavplubix](https://github.com/tavplubix)). + + +#### Bug Fix + +* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) (Amos Bird, [alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)). +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message ` MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix use after free bug in `rocksdb` library. [#18862](https://github.com/ClickHouse/ClickHouse/pull/18862) ([sundyli](https://github.com/sundy-li)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Join tries to materialize const columns, but our code wants them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). +* Fix possible hang at shutdown in `clickhouse-local`. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Queries for external databases (MySQL, ODBC, JDBC) were incorrectly rewritten if there was an expression in form of `x IN table`. This fixes [#9756](https://github.com/ClickHouse/ClickHouse/issues/9756). [#18876](https://github.com/ClickHouse/ClickHouse/pull/18876) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue that asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Fix minor issue with logging. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). +* Fix removing of empty parts in `ReplicatedMergeTree` tables, created with old syntax. Fixes [#18582](https://github.com/ClickHouse/ClickHouse/issues/18582). [#18614](https://github.com/ClickHouse/ClickHouse/pull/18614) ([Anton Popov](https://github.com/CurtizJ)). +* Fix previous bug when date overflow with different values. Strict Date value limit to "2106-02-07", cast date > "2106-02-07" to value 0. [#18565](https://github.com/ClickHouse/ClickHouse/pull/18565) ([hexiaoting](https://github.com/hexiaoting)). +* Add FixedString data type support for replication from MySQL. Replication from MySQL is an experimental feature. This patch fixes [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)). +* Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). +* Proper support for 12AM in `parseDateTimeBestEffort` function. This fixes [#18402](https://github.com/ClickHouse/ClickHouse/issues/18402). [#18449](https://github.com/ClickHouse/ClickHouse/pull/18449) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). +* Fix the unexpected behaviour of `SHOW TABLES`. [#18431](https://github.com/ClickHouse/ClickHouse/pull/18431) ([fastio](https://github.com/fastio)). +* Fix -SimpleState combinator generates incompatible arugment type and return type. [#18404](https://github.com/ClickHouse/ClickHouse/pull/18404) ([Amos Bird](https://github.com/amosbird)). +* Fix possible race condition in concurrent usage of `Set` or `Join` tables and selects from `system.tables`. [#18385](https://github.com/ClickHouse/ClickHouse/pull/18385) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes of the odbc-bridge process; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)). +* Access control: `SELECT count() FROM table` now can be executed if the user has access to at least single column from a table. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Access control: `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)). +* Replication from MySQL (experimental feature). Fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) Fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) Fix unique key convert issue in MaterializeMySQL database engine. [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)). +* Fix inconsistency for queries with both `WITH FILL` and `WITH TIES` [#17466](https://github.com/ClickHouse/ClickHouse/issues/17466). [#18188](https://github.com/ClickHouse/ClickHouse/pull/18188) ([hexiaoting](https://github.com/hexiaoting)). +* Fix inserting a row with default value in case of parsing error in the last column. Fixes [#17712](https://github.com/ClickHouse/ClickHouse/issues/17712). [#18182](https://github.com/ClickHouse/ClickHouse/pull/18182) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix `Unknown setting profile` error on attempt to set settings profile. [#18167](https://github.com/ClickHouse/ClickHouse/pull/18167) ([tavplubix](https://github.com/tavplubix)). +* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). +* Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix comparison of `DateTime64` and `Date`. Fixes [#13804](https://github.com/ClickHouse/ClickHouse/issues/13804) and [#11222](https://github.com/ClickHouse/ClickHouse/issues/11222). ... [#18050](https://github.com/ClickHouse/ClickHouse/pull/18050) ([Vasily Nemkov](https://github.com/Enmk)). +* Replication from MySQL (experimental feature): Fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) Fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) support convert MySQL prefix index for MaterializeMySQL. [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)). +* When server log rotation was configured using `logger.size` parameter with numeric value larger than 2^32, the logs were not rotated properly. This is fixed. [#17905](https://github.com/ClickHouse/ClickHouse/pull/17905) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Trivial query optimization was producing wrong result if query contains ARRAY JOIN (so query is actually non trivial). [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)). +* Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)). +* WAL (experimental feature): Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). +* Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed possible segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)). +* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Windows: Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)). +* In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([tavplubix](https://github.com/tavplubix)). +* Fix empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)). +* Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Fix issue with memory accounting when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed high CPU usage in background tasks of *MergeTree tables. [#17416](https://github.com/ClickHouse/ClickHouse/pull/17416) ([tavplubix](https://github.com/tavplubix)). +* Fix possible crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Replication from MySQL (experimental feature): Fixes [#16835](https://github.com/ClickHouse/ClickHouse/issues/16835) try fix miss match header with MySQL SHOW statement. [#17366](https://github.com/ClickHouse/ClickHouse/pull/17366) ([Winter Zhang](https://github.com/zhang2014)). +* Fix nondeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). +* Fix possible `Unexpected packet Data received from client` error for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)). +* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246). [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* clickhouse-copier: Fix for non-partitioned tables [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). +* Fixed possible not-working mutations for parts stored on S3 disk (experimental feature). [#17227](https://github.com/ClickHouse/ClickHouse/pull/17227) ([Pavel Kovalenko](https://github.com/Jokser)). +* Bug fix for funciton `fuzzBits`, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Fix `optimize_distributed_group_by_sharding_key` for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)). +* Fix queries from `Merge` tables over `Distributed` tables with JOINs. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)). +* Fix order by optimization with monotonic functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix incorrect comparison of types `DateTime64` with different scales. Fixes [#16655](https://github.com/ClickHouse/ClickHouse/issues/16655) ... [#16952](https://github.com/ClickHouse/ClickHouse/pull/16952) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)). +* Minor fix in SHOW ACCESS query. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)). +* Fix the behaviour with enabled `optimize_trivial_count_query` setting with partition predicate. [#16767](https://github.com/ClickHouse/ClickHouse/pull/16767) ([Azat Khuzhin](https://github.com/azat)). +* Return number of affected rows for INSERT queries via MySQL wire protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). +* Fix inconsistent behavior caused by `select_sequential_consistency` for optimized trivial count query and system tables. [#16309](https://github.com/ClickHouse/ClickHouse/pull/16309) ([Hao Chen](https://github.com/haoch)). +* Throw error when `REPLACE` column transformer operates on non existing column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)). +* Throw exception in case of not equi-join ON expression in RIGH|FULL JOIN. [#15162](https://github.com/ClickHouse/ClickHouse/pull/15162) ([Artem Zuikov](https://github.com/4ertus2)). + + +#### Build/Testing/Packaging Improvement + +* Add simple integrity check for ClickHouse binary. It allows to detect corruption due to faulty hardware (bit rot on storage media or bit flips in RAM). [#18811](https://github.com/ClickHouse/ClickHouse/pull/18811) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Change `OpenSSL` to `BoringSSL`. It allows to avoid issues with sanitizers. This fixes [#12490](https://github.com/ClickHouse/ClickHouse/issues/12490). This fixes [#17502](https://github.com/ClickHouse/ClickHouse/issues/17502). This fixes [#12952](https://github.com/ClickHouse/ClickHouse/issues/12952). [#18129](https://github.com/ClickHouse/ClickHouse/pull/18129) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Simplify `Sys/V` init script. It was not working on Ubuntu 12.04 or older. [#17428](https://github.com/ClickHouse/ClickHouse/pull/17428) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Multiple improvements in `./clickhouse install` script. [#17421](https://github.com/ClickHouse/ClickHouse/pull/17421) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now ClickHouse can pretend to be a fake ZooKeeper. Currently, storage implementation is just stored in-memory hash-table, and server partially support ZooKeeper protocol. [#16877](https://github.com/ClickHouse/ClickHouse/pull/16877) ([alesapin](https://github.com/alesapin)). +* Fix dead list watches removal for TestKeeperStorage (a mock for ZooKeeper). [#18065](https://github.com/ClickHouse/ClickHouse/pull/18065) ([alesapin](https://github.com/alesapin)). +* Add `SYSTEM SUSPEND` command for fault injection. It can be used to faciliate failover tests. This closes [#15979](https://github.com/ClickHouse/ClickHouse/issues/15979). [#18850](https://github.com/ClickHouse/ClickHouse/pull/18850) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Generate build id when ClickHouse is linked with `lld`. It's appeared that `lld` does not generate it by default on my machine. Build id is used for crash reports and introspection. [#18808](https://github.com/ClickHouse/ClickHouse/pull/18808) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix shellcheck errors in style check. [#18566](https://github.com/ClickHouse/ClickHouse/pull/18566) ([Ilya Yatsishin](https://github.com/qoega)). +* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). +* Fix codespell warnings. Split style checks into separate parts. Update style checks docker image. [#18463](https://github.com/ClickHouse/ClickHouse/pull/18463) ([Ilya Yatsishin](https://github.com/qoega)). +* Automated check for leftovers of conflict markers in docs. [#18332](https://github.com/ClickHouse/ClickHouse/pull/18332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Enable Thread Fuzzer for stateless tests flaky check. [#18299](https://github.com/ClickHouse/ClickHouse/pull/18299) ([alesapin](https://github.com/alesapin)). +* Do not use non thread-safe function `strerror`. [#18204](https://github.com/ClickHouse/ClickHouse/pull/18204) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update `anchore/scan-action@main` workflow action (was moved from `master` to `main`). [#18192](https://github.com/ClickHouse/ClickHouse/pull/18192) ([Stig Bakken](https://github.com/stigsb)). +* Now `clickhouse-test` does DROP/CREATE databases with a timeout. [#18098](https://github.com/ClickHouse/ClickHouse/pull/18098) ([alesapin](https://github.com/alesapin)). +* Enable experimental support for Pytest framework for stateless tests. [#17902](https://github.com/ClickHouse/ClickHouse/pull/17902) ([Ivan](https://github.com/abyss7)). +* Now we use the fresh docker daemon version in integration tests. [#17671](https://github.com/ClickHouse/ClickHouse/pull/17671) ([alesapin](https://github.com/alesapin)). +* Send info about official build, memory, cpu and free disk space to Sentry if it is enabled. Sentry is opt-in feature to help ClickHouse developers. This closes [#17279](https://github.com/ClickHouse/ClickHouse/issues/17279). [#17543](https://github.com/ClickHouse/ClickHouse/pull/17543) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* There was an uninitialized variable in the code of clickhouse-copier. [#17363](https://github.com/ClickHouse/ClickHouse/pull/17363) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix [one MSan report](https://clickhouse-test-reports.s3.yandex.net/17309/065cd002578f2e8228f12a2744bd40c970065e0c/stress_test_(memory)/stderr.log) from [#17309](https://github.com/ClickHouse/ClickHouse/issues/17309). [#17344](https://github.com/ClickHouse/ClickHouse/pull/17344) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix for the issue with IPv6 in Arrow Flight library. See [the comments](https://github.com/ClickHouse/ClickHouse/pull/16243#issuecomment-720830294) for details. [#16664](https://github.com/ClickHouse/ClickHouse/pull/16664) ([Zhanna](https://github.com/FawnD2)). +* Add a library that replaces some `libc` functions to traps that will terminate the process. [#16366](https://github.com/ClickHouse/ClickHouse/pull/16366) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Provide diagnostics in server logs in case of stack overflow, send error message to clickhouse-client. This closes [#14840](https://github.com/ClickHouse/ClickHouse/issues/14840). [#16346](https://github.com/ClickHouse/ClickHouse/pull/16346) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now we can run almost all stateless functional tests in parallel. [#15236](https://github.com/ClickHouse/ClickHouse/pull/15236) ([alesapin](https://github.com/alesapin)). +* Fix corruption in `librdkafka` snappy decompression (was a problem only for gcc10 builds, but official builds uses clang already, so at least recent official releases are not affected). [#18053](https://github.com/ClickHouse/ClickHouse/pull/18053) ([Azat Khuzhin](https://github.com/azat)). +* If server was terminated by OOM killer, print message in log. [#13516](https://github.com/ClickHouse/ClickHouse/pull/13516) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* PODArray: Avoid call to memcpy with (nullptr, 0) arguments (Fix UBSan report). This fixes [#18525](https://github.com/ClickHouse/ClickHouse/issues/18525). [#18526](https://github.com/ClickHouse/ClickHouse/pull/18526) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Minor improvement for path concatenation of zookeeper paths inside DDLWorker. [#17767](https://github.com/ClickHouse/ClickHouse/pull/17767) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow to reload symbols from debug file. This PR also fixes a build-id issue. [#17637](https://github.com/ClickHouse/ClickHouse/pull/17637) ([Amos Bird](https://github.com/amosbird)). + + +## [Changelog for 2020](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2020.md) diff --git a/docs/en/whats-new/changelog/index.md b/docs/en/whats-new/changelog/index.md index 08fe7e3b8dc..517ea16f3e7 100644 --- a/docs/en/whats-new/changelog/index.md +++ b/docs/en/whats-new/changelog/index.md @@ -1,7 +1,7 @@ --- toc_folder_title: Changelog toc_priority: 74 -toc_title: '2021' +toc_title: '2022' --- {% include "content/changelog.md" %} diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md index da51d27ded2..5bcb9ce95b1 100644 --- a/docs/ru/operations/monitoring.md +++ b/docs/ru/operations/monitoring.md @@ -37,7 +37,7 @@ ClickHouse собирает: Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). -Можно настроить экспорт метрик из ClickHouse в [Prometheus](https://prometheus.io). Смотрите [prometheus](server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Prometheus, как указано в [официальном руководстве](https://prometheus.io/docs/prometheus/latest/installation/). +Можно настроить экспорт метрик из ClickHouse в [Prometheus](https://prometheus.io). Смотрите секцию [prometheus](server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Prometheus, как указано в [официальном руководстве](https://prometheus.io/docs/prometheus/latest/installation/). Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/ping`. Если сервер доступен, он отвечает `200 OK`. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 21fef0e0286..a2d9ebaf68e 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -55,7 +55,12 @@ SELECT * FROM insert_select_testtable └───┴───┴───┘ ``` -В этом примере мы видим, что вторая строка содержит столбцы `a` и `c`, заполненные переданными значениями и `b`, заполненный значением по умолчанию. +В этом примере мы видим, что вторая строка содержит столбцы `a` и `c`, заполненные переданными значениями и `b`, заполненный значением по умолчанию. Также можно использовать ключевое слово `DEFAULT` для вставки значений по умолчанию: + +``` sql +INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ; +``` + Если список столбцов не включает все существующие столбцы, то все остальные столбцы заполняются следующим образом: - Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы. diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index b1e1345cf71..a5e4517824d 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1,23 +1,17 @@ #include #include -#include #include #include -#include #include -#include -#include #include #include #include #include -#include #include #include #include "Client.h" #include "Core/Protocol.h" -#include #include #include diff --git a/programs/keeper/keeper_config.xml b/programs/keeper/keeper_config.xml index 8b4d4274e6a..bb80850d01e 100644 --- a/programs/keeper/keeper_config.xml +++ b/programs/keeper/keeper_config.xml @@ -38,7 +38,8 @@ 10000 - 30000 + 10000 + 100000 information diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 67c754495d1..5fc3f9aa967 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include #include #include @@ -525,6 +527,7 @@ int Server::main(const std::vector & /*args*/) registerDictionaries(); registerDisks(); registerFormats(); + registerRemoteFileMetadatas(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); @@ -559,6 +562,21 @@ if (ThreadFuzzer::instance().isEffective()) config().getUInt("thread_pool_queue_size", 10000) ); + + /// Initialize global local cache for remote filesystem. + if (config().has("local_cache_for_remote_fs")) + { + bool enable = config().getBool("local_cache_for_remote_fs.enable", false); + if (enable) + { + String root_dir = config().getString("local_cache_for_remote_fs.root_dir"); + UInt64 limit_size = config().getUInt64("local_cache_for_remote_fs.limit_size"); + UInt64 bytes_read_before_flush + = config().getUInt64("local_cache_for_remote_fs.bytes_read_before_flush", DBMS_DEFAULT_BUFFER_SIZE); + ExternalDataSourceCache::instance().initOnce(global_context, root_dir, limit_size, bytes_read_before_flush); + } + } + Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); std::mutex servers_lock; std::vector servers; diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 9cad53e667b..7868a9088a9 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -75,6 +75,7 @@ public: cache.remove(params); } auto res = std::shared_ptr(new ContextAccess(access_control, params)); + res->initialize(); cache.add(params, res); return res; } diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index b254a59376d..400ee55a35d 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -146,17 +146,23 @@ ContextAccess::ContextAccess(const AccessControl & access_control_, const Params : access_control(&access_control_) , params(params_) { - std::lock_guard lock{mutex}; +} - subscription_for_user_change = access_control->subscribeForChanges( - *params.user_id, [this](const UUID &, const AccessEntityPtr & entity) - { - UserPtr changed_user = entity ? typeid_cast(entity) : nullptr; - std::lock_guard lock2{mutex}; - setUser(changed_user); - }); - setUser(access_control->read(*params.user_id)); +void ContextAccess::initialize() +{ + std::lock_guard lock{mutex}; + subscription_for_user_change = access_control->subscribeForChanges( + *params.user_id, [weak_ptr = weak_from_this()](const UUID &, const AccessEntityPtr & entity) + { + auto ptr = weak_ptr.lock(); + if (!ptr) + return; + UserPtr changed_user = entity ? typeid_cast(entity) : nullptr; + std::lock_guard lock2{ptr->mutex}; + ptr->setUser(changed_user); + }); + setUser(access_control->read(*params.user_id)); } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index d7cc999e95f..02cef3d0935 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -63,7 +63,7 @@ struct ContextAccessParams }; -class ContextAccess +class ContextAccess : public std::enable_shared_from_this { public: using Params = ContextAccessParams; @@ -161,6 +161,7 @@ private: ContextAccess() {} ContextAccess(const AccessControl & access_control_, const Params & params_); + void initialize(); void setUser(const UserPtr & user_) const; void setRolesInfo(const std::shared_ptr & roles_info_) const; void setSettingsAndConstraints() const; diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 77c3281e5ab..cac5dacef9b 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -353,6 +353,9 @@ bool LDAPAccessStorage::areLDAPCredentialsValidNoLock(const User & user, const C if (credentials.getUserName() != user.getName()) return false; + if (typeid_cast(&credentials)) + return true; + if (const auto * basic_credentials = dynamic_cast(&credentials)) return external_authenticators.checkLDAPCredentials(ldap_server_name, *basic_credentials, &role_search_params, &role_search_results); @@ -478,53 +481,53 @@ std::optional LDAPAccessStorage::authenticateImpl( const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, - bool /* throw_if_user_not_exists */) const + bool throw_if_user_not_exists) const { std::scoped_lock lock(mutex); - LDAPClient::SearchResultsList external_roles; auto id = memory_storage.find(credentials.getUserName()); - if (id) + UserPtr user = id ? memory_storage.read(*id) : nullptr; + + std::shared_ptr new_user; + if (!user) { - auto user = memory_storage.read(*id); + // User does not exist, so we create one, and will add it if authentication is successful. + new_user = std::make_shared(); + new_user->setName(credentials.getUserName()); + new_user->auth_data = AuthenticationData(AuthenticationType::LDAP); + new_user->auth_data.setLDAPServerName(ldap_server_name); + user = new_user; + } - if (!isAddressAllowed(*user, address)) - throwAddressNotAllowed(address); + if (!isAddressAllowed(*user, address)) + throwAddressNotAllowed(address); - if (typeid_cast(&credentials)) - return id; + LDAPClient::SearchResultsList external_roles; + if (!areLDAPCredentialsValidNoLock(*user, credentials, external_authenticators, external_roles)) + { + // We don't know why the authentication has just failed: + // either there is no such user in LDAP or the password is not correct. + // We treat this situation as if there is no such user because we don't want to block + // other storages following this LDAPAccessStorage from trying to authenticate on their own. + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName()); + else + return {}; + } - if (!areLDAPCredentialsValidNoLock(*user, credentials, external_authenticators, external_roles)) - throwInvalidCredentials(); + if (new_user) + { + // TODO: if these were AlwaysAllowCredentials, then mapped external roles are not available here, + // since without a password we can't authenticate and retrieve roles from the LDAP server. - // Just in case external_roles are changed. This will be no-op if they are not. - updateAssignedRolesNoLock(*id, user->getName(), external_roles); - - return id; + assignRolesNoLock(*new_user, external_roles); + id = memory_storage.insert(new_user); } else { - // User does not exist, so we create one, and will add it if authentication is successful. - auto user = std::make_shared(); - user->setName(credentials.getUserName()); - user->auth_data = AuthenticationData(AuthenticationType::LDAP); - user->auth_data.setLDAPServerName(ldap_server_name); - - if (!isAddressAllowed(*user, address)) - throwAddressNotAllowed(address); - - if (typeid_cast(&credentials)) - { - // TODO: mapped external roles are not available here. Without a password we can't authenticate and retrieve roles from LDAP server. - assignRolesNoLock(*user, external_roles); - return memory_storage.insert(user); - } - - if (!areLDAPCredentialsValidNoLock(*user, credentials, external_authenticators, external_roles)) - throwInvalidCredentials(); - - assignRolesNoLock(*user, external_roles); - - return memory_storage.insert(user); + // Just in case external_roles are changed. This will be no-op if they are not. + updateAssignedRolesNoLock(*id, user->getName(), external_roles); } + + return id; } } diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 49d01074f6a..1b1cac0800f 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -69,7 +69,7 @@ namespace std::recursive_mutex ldap_global_mutex; - auto escapeForLDAP(const String & src) + auto escapeForDN(const String & src) { String dest; dest.reserve(src.size() * 2); @@ -96,6 +96,39 @@ namespace return dest; } + auto escapeForFilter(const String & src) + { + String dest; + dest.reserve(src.size() * 3); + + for (auto ch : src) + { + switch (ch) + { + case '*': + dest += "\\2A"; + break; + case '(': + dest += "\\28"; + break; + case ')': + dest += "\\29"; + break; + case '\\': + dest += "\\5C"; + break; + case '\0': + dest += "\\00"; + break; + default: + dest += ch; + break; + } + } + + return dest; + } + auto replacePlaceholders(const String & src, const std::vector> & pairs) { String dest = src; @@ -160,7 +193,7 @@ void LDAPClient::diag(const int rc, String text) } } -void LDAPClient::openConnection() +bool LDAPClient::openConnection() { std::scoped_lock lock(ldap_global_mutex); @@ -294,7 +327,7 @@ void LDAPClient::openConnection() if (params.enable_tls == LDAPClient::Params::TLSEnable::YES_STARTTLS) diag(ldap_start_tls_s(handle, nullptr, nullptr)); - final_user_name = escapeForLDAP(params.user); + final_user_name = escapeForDN(params.user); final_bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", final_user_name} }); final_user_dn = final_bind_dn; // The default value... may be updated right after a successful bind. @@ -306,7 +339,15 @@ void LDAPClient::openConnection() cred.bv_val = const_cast(params.password.c_str()); cred.bv_len = params.password.size(); - diag(ldap_sasl_bind_s(handle, final_bind_dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr)); + { + const auto rc = ldap_sasl_bind_s(handle, final_bind_dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr); + + // Handle invalid credentials gracefully. + if (rc == LDAP_INVALID_CREDENTIALS) + return false; + + diag(rc); + } // Once bound, run the user DN search query and update the default value, if asked. if (params.user_dn_detection) @@ -322,7 +363,7 @@ void LDAPClient::openConnection() final_user_dn = *user_dn_search_results.begin(); } - break; + return true; } default: @@ -366,10 +407,10 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) }); const auto final_search_filter = replacePlaceholders(search_params.search_filter, { - {"{user_name}", final_user_name}, - {"{bind_dn}", final_bind_dn}, - {"{user_dn}", final_user_dn}, - {"{base_dn}", final_base_dn} + {"{user_name}", escapeForFilter(final_user_name)}, + {"{bind_dn}", escapeForFilter(final_bind_dn)}, + {"{user_dn}", escapeForFilter(final_user_dn)}, + {"{base_dn}", escapeForFilter(final_base_dn)} }); char * attrs[] = { const_cast(search_params.attribute.c_str()), nullptr }; @@ -541,8 +582,9 @@ bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList * role_search SCOPE_EXIT({ closeConnection(); }); - // Will throw on any error, including invalid credentials. - openConnection(); + // Will return false on invalid credentials, will throw on any other error. + if (!openConnection()) + return false; // While connected, run search queries and save the results, if asked. if (role_search_params) @@ -574,7 +616,7 @@ void LDAPClient::diag(const int, String) throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } -void LDAPClient::openConnection() +bool LDAPClient::openConnection() { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index 3d6ee9c8dc9..05f71993699 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -134,7 +134,7 @@ public: protected: MAYBE_NORETURN void diag(const int rc, String text = ""); - MAYBE_NORETURN void openConnection(); + MAYBE_NORETURN bool openConnection(); void closeConnection() noexcept; SearchResults search(const SearchParams & search_params); diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index 11540dd1b77..dd77f47971d 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -452,9 +452,11 @@ void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock std::optional MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists) const { auto storages = getStoragesInternal(); - for (const auto & storage : *storages) + for (size_t i = 0; i != storages->size(); ++i) { - auto id = storage->authenticate(credentials, address, external_authenticators, /* throw_if_user_not_exists = */ false); + const auto & storage = (*storages)[i]; + bool is_last_storage = (i == storages->size() - 1); + auto id = storage->authenticate(credentials, address, external_authenticators, throw_if_user_not_exists && is_last_storage); if (id) { std::lock_guard lock{mutex}; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c9e9f736e0d..62767e02a64 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -115,6 +115,11 @@ if (USE_HDFS) add_headers_and_sources(dbms Disks/HDFS) endif() +add_headers_and_sources(dbms Storages/Cache) +if (USE_HIVE) + add_headers_and_sources(dbms Storages/Hive) +endif() + if(USE_FILELOG) add_headers_and_sources(dbms Storages/FileLog) endif() @@ -448,6 +453,12 @@ if (USE_HDFS) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR}) endif() +if (USE_HIVE) + dbms_target_link_libraries(PRIVATE hivemetastore) + dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore) +endif() + + if (USE_AWS_S3) target_link_libraries (clickhouse_common_io PUBLIC ${AWS_S3_LIBRARY}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_CORE_INCLUDE_DIR}) @@ -469,6 +480,11 @@ if (USE_BROTLI) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) endif() +if (USE_SNAPPY) + target_link_libraries (clickhouse_common_io PUBLIC ${SNAPPY_LIBRARY}) + target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${SNAPPY_INCLUDE_DIR}) +endif() + if (USE_AMQPCPP) dbms_target_link_libraries(PUBLIC ${AMQPCPP_LIBRARY}) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${AMQPCPP_INCLUDE_DIR}) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 0938a9cfee5..eb00ee349ee 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -463,12 +463,13 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query) /// The query can specify output format or output file. if (const auto * query_with_output = dynamic_cast(parsed_query.get())) { + String out_file; if (query_with_output->out_file) { select_into_file = true; const auto & out_file_node = query_with_output->out_file->as(); - const auto & out_file = out_file_node.value.safeGet(); + out_file = out_file_node.value.safeGet(); std::string compression_method; if (query_with_output->compression) @@ -494,6 +495,12 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query) const auto & id = query_with_output->format->as(); current_format = id.name(); } + else if (query_with_output->out_file) + { + const auto & format_name = FormatFactory::instance().getFormatFromFileName(out_file); + if (!format_name.empty()) + current_format = format_name; + } } if (has_vertical_output_suffix) @@ -1008,11 +1015,15 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des compression_method = compression_method_node.value.safeGet(); } + String current_format = parsed_insert_query->format; + if (current_format.empty()) + current_format = FormatFactory::instance().getFormatFromFileName(in_file, true); + /// Create temporary storage file, to support globs and parallel reading StorageFile::CommonArguments args{ WithContext(global_context), parsed_insert_query->table_id, - parsed_insert_query->format, + current_format, getFormatSettings(global_context), compression_method, columns_description_for_query, diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 8ad853950b2..528c38f9b76 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -214,6 +214,12 @@ bool LocalConnection::poll(size_t) if (next_packet_type) return true; + if (state->exception) + { + next_packet_type = Protocol::Server::Exception; + return true; + } + if (!state->is_finished) { if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index 09a3dbad242..f4b7eeb771c 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -3,9 +3,11 @@ #include #include #include +#include #include #include + namespace ProfileEvents { extern const Event FunctionExecute; @@ -59,6 +61,40 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } +void ColumnFunction::insertFrom(const IColumn & src, size_t n) +{ + const ColumnFunction & src_func = assert_cast(src); + + size_t num_captured_columns = captured_columns.size(); + assert(num_captured_columns == src_func.captured_columns.size()); + + for (size_t i = 0; i < num_captured_columns; ++i) + { + auto mut_column = IColumn::mutate(std::move(captured_columns[i].column)); + mut_column->insertFrom(*src_func.captured_columns[i].column, n); + captured_columns[i].column = std::move(mut_column); + } + + ++size_; +} + +void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) +{ + const ColumnFunction & src_func = assert_cast(src); + + size_t num_captured_columns = captured_columns.size(); + assert(num_captured_columns == src_func.captured_columns.size()); + + for (size_t i = 0; i < num_captured_columns; ++i) + { + auto mut_column = IColumn::mutate(std::move(captured_columns[i].column)); + mut_column->insertRangeFrom(*src_func.captured_columns[i].column, start, length); + captured_columns[i].column = std::move(mut_column); + } + + size_ += length; +} + ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint) const { if (size_ != filt.size()) diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 2592dc01f98..5b410a28ddf 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -88,10 +88,8 @@ public: throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void insertRangeFrom(const IColumn &, size_t, size_t) override - { - throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } + void insertFrom(const IColumn & src, size_t n) override; + void insertRangeFrom(const IColumn &, size_t start, size_t length) override; void insertData(const char *, size_t) override { diff --git a/src/Common/CombinedCardinalityEstimator.h b/src/Common/CombinedCardinalityEstimator.h index 55afb028247..8cf35436840 100644 --- a/src/Common/CombinedCardinalityEstimator.h +++ b/src/Common/CombinedCardinalityEstimator.h @@ -323,7 +323,7 @@ private: UInt64 address = 0; }; static const UInt64 mask = 0xFFFFFFFFFFFFFFFC; - static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; + static const UInt32 medium_set_size_max = 1ULL << medium_set_power2_max; }; } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ef2be3b2164..a22db080374 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -606,6 +606,9 @@ M(635, CANNOT_POLL) \ M(636, CANNOT_EXTRACT_TABLE_STRUCTURE) \ M(637, INVALID_TABLE_OVERRIDE) \ + M(638, SNAPPY_UNCOMPRESS_FAILED) \ + M(639, SNAPPY_COMPRESS_FAILED) \ + M(640, NO_HIVEMETASTORE) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index d30271d65db..7d704e4bdc7 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -280,7 +280,7 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; + n[0] &= -1ULL >> s; } else { diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index 93bbcb2835d..871becc86a4 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -114,7 +114,7 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; + n[0] &= -1ULL >> s; } else { diff --git a/src/Common/LRUResourceCache.h b/src/Common/LRUResourceCache.h index e1a28e7ab60..afc2ba5baf8 100644 --- a/src/Common/LRUResourceCache.h +++ b/src/Common/LRUResourceCache.h @@ -12,9 +12,15 @@ namespace DB { template -struct TrivailLRUResourceCacheWeightFunction +struct TrivialLRUResourceCacheWeightFunction { - size_t operator()(const T &) const { return 1; } + size_t operator()(const T &) const noexcept { return 1; } +}; + +template +struct TrivialLRUResourceCacheReleaseFunction +{ + void operator()(std::shared_ptr) noexcept { } }; /** @@ -24,9 +30,11 @@ struct TrivailLRUResourceCacheWeightFunction * * Warning (!): This implementation is in development, not to be used. */ -template , + typename WeightFunction = TrivialLRUResourceCacheWeightFunction, + typename ReleaseFunction = TrivialLRUResourceCacheReleaseFunction, typename HashFunction = std::hash> class LRUResourceCache { @@ -38,8 +46,7 @@ public: class MappedHolder { public: - MappedHolder(LRUResourceCache * cache_, const Key & key_, MappedPtr value_) - : cache(cache_), key(key_), val(value_) {} + MappedHolder(LRUResourceCache * cache_, const Key & key_, MappedPtr value_) : cache(cache_), key(key_), val(value_) { } ~MappedHolder() { cache->release(key); } @@ -54,7 +61,9 @@ public: using MappedHolderPtr = std::unique_ptr; explicit LRUResourceCache(size_t max_weight_, size_t max_element_size_ = 0) - : max_weight(max_weight_), max_element_size(max_element_size_) {} + : max_weight(max_weight_), max_element_size(max_element_size_) + { + } MappedHolderPtr get(const Key & key) { @@ -86,6 +95,7 @@ public: { queue.erase(cell.queue_iterator); current_weight -= cell.weight; + release_function(cell.value); cells.erase(it); } else @@ -198,6 +208,7 @@ private: friend struct InsertTokenHolder; InsertTokenById insert_tokens; WeightFunction weight_function; + ReleaseFunction release_function; std::atomic hits{0}; std::atomic misses{0}; std::atomic evict_count{0}; @@ -305,6 +316,7 @@ private: { queue.erase(cell.queue_iterator); current_weight -= cell.weight; + release_function(cell.value); cells.erase(it); } } @@ -330,12 +342,11 @@ private: // key mustn't be in the cache Cell * set(const Key & insert_key, MappedPtr value) { - auto weight = value ? weight_function(*value) : 0; - auto queue_size = cells.size() + 1; - auto loss_weight = 0; - + size_t weight = value ? weight_function(*value) : 0; + size_t queue_size = cells.size() + 1; + size_t loss_weight = 0; auto is_overflow = [&] { - return current_weight + weight - loss_weight > max_weight || (max_element_size != 0 && queue_size > max_element_size); + return current_weight + weight > max_weight + loss_weight || (max_element_size != 0 && queue_size > max_element_size); }; auto key_it = queue.begin(); @@ -356,7 +367,7 @@ private: if (cell.reference_count == 0) { loss_weight += cell.weight; - queue_size -= 1; + queue_size--; to_release_keys.insert(key); } @@ -376,6 +387,7 @@ private: { auto & cell = cells[key]; queue.erase(cell.queue_iterator); + release_function(cell.value); cells.erase(key); ++evict_count; } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 878930f58d9..ea6c782ebb4 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -277,6 +277,8 @@ \ M(AsynchronousReadWaitMicroseconds, "Time spent in waiting for asynchronous reads.") \ \ + M(ExternalDataSourceLocalCacheReadBytes, "Bytes read from local cache buffer in RemoteReadBufferCache")\ + \ M(MainConfigLoads, "Number of times the main configuration was reloaded.") \ namespace ProfileEvents diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index f02001e338a..95baf40f2c0 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -23,7 +23,7 @@ static constexpr auto NS = 1000000000UL; /// Tracking window. Actually the size is not really important. We just want to avoid /// throttles when there are no actions for a long period time. -static const double window_ns = 1UL * NS; +static const double window_ns = 1ULL * NS; void Throttler::add(size_t amount) { diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index ed7afd83628..1ed2c442f6c 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -39,6 +39,7 @@ std::string toString(OpNum op_num); OpNum getOpNum(int32_t raw_op_num); static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION = 0; +static constexpr int32_t KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT = 42; static constexpr int32_t CLIENT_HANDSHAKE_LENGTH = 44; static constexpr int32_t CLIENT_HANDSHAKE_LENGTH_WITH_READONLY = 45; static constexpr int32_t SERVER_HANDSHAKE_LENGTH = 36; @@ -48,6 +49,8 @@ static constexpr int32_t PASSWORD_LENGTH = 16; /// but it can be raised up, so we have a slightly larger limit on our side. static constexpr int32_t MAX_STRING_OR_ARRAY_SIZE = 1 << 28; /// 256 MiB static constexpr int32_t DEFAULT_SESSION_TIMEOUT_MS = 30000; +static constexpr int32_t DEFAULT_MIN_SESSION_TIMEOUT_MS = 10000; +static constexpr int32_t DEFAULT_MAX_SESSION_TIMEOUT_MS = 100000; static constexpr int32_t DEFAULT_OPERATION_TIMEOUT_MS = 10000; } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 09b2548f2f4..f2603cc267f 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -489,7 +489,15 @@ void ZooKeeper::receiveHandshake() read(protocol_version_read); if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION) - throw Exception("Unexpected protocol version: " + DB::toString(protocol_version_read), Error::ZMARSHALLINGERROR); + { + /// Special way to tell a client that server is not ready to serve it. + /// It's better for faster failover than just connection drop. + /// Implemented in clickhouse-keeper. + if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT) + throw Exception("Keeper server rejected the connection during the handshake. Possibly it's overloaded, doesn't see leader or stale", Error::ZCONNECTIONLOSS); + else + throw Exception("Unexpected protocol version: " + DB::toString(protocol_version_read), Error::ZMARSHALLINGERROR); + } read(timeout); if (timeout != session_timeout.totalMilliseconds()) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index c53abd17bc2..28506a94581 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -20,3 +20,5 @@ #cmakedefine01 USE_YAML_CPP #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 USE_BZIP2 +#cmakedefine01 USE_SNAPPY +#cmakedefine01 USE_HIVE diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 020f3cc4446..be91101ef40 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -80,3 +80,6 @@ target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) add_executable (executable_udf executable_udf.cpp) target_link_libraries (executable_udf PRIVATE dbms) + +add_executable(hive_metastore_client hive_metastore_client.cpp) +target_link_libraries (hive_metastore_client PUBLIC hivemetastore ${THRIFT_LIBRARY}) diff --git a/src/Common/examples/hive_metastore_client.cpp b/src/Common/examples/hive_metastore_client.cpp new file mode 100644 index 00000000000..5a48f2829bf --- /dev/null +++ b/src/Common/examples/hive_metastore_client.cpp @@ -0,0 +1,43 @@ +#include + +#include +#include +#include +#include + + +using namespace std; +using namespace apache::thrift; +using namespace apache::thrift::protocol; +using namespace apache::thrift::transport; +using namespace Apache::Hadoop::Hive; + +int main() +{ + std::shared_ptr socket(new TSocket("localhost", 9083)); + std::shared_ptr transport(new TBufferedTransport(socket)); + std::shared_ptr protocol(new TBinaryProtocol(transport)); + ThriftHiveMetastoreClient client(protocol); + + try + { + transport->open(); + + Table table; + client.get_table(table, "default", "persons"); + table.printTo(std::cout); + + vector partitions; + client.get_partitions(partitions, "default", "persons", 0); + for (const auto & part : partitions) + { + part.printTo(std::cout); + } + + transport->close(); + } + catch (TException & tx) + { + cout << "ERROR: " << tx.what() << endl; + } +} diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 22a6bd1d941..b93420133fa 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -98,6 +98,8 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const writeText("max_requests_batch_size=", buf); write_int(coordination_settings->max_requests_batch_size); + writeText("min_session_timeout_ms=", buf); + write_int(uint64_t(coordination_settings->min_session_timeout_ms)); writeText("session_timeout_ms=", buf); write_int(uint64_t(coordination_settings->session_timeout_ms)); writeText("operation_timeout_ms=", buf); diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 5546551cf3a..38a04043b38 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -19,7 +19,8 @@ struct Settings; #define LIST_OF_COORDINATION_SETTINGS(M) \ - M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_SESSION_TIMEOUT_MS, "Default client session timeout", 0) \ + M(Milliseconds, min_session_timeout_ms, Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS, "Min client session timeout", 0) \ + M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS, "Max client session timeout", 0) \ M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \ M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \ M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \ diff --git a/src/Coordination/KeeperConnectionStats.cpp b/src/Coordination/KeeperConnectionStats.cpp index f3938b395b5..1e9f2c051a7 100644 --- a/src/Coordination/KeeperConnectionStats.cpp +++ b/src/Coordination/KeeperConnectionStats.cpp @@ -74,6 +74,7 @@ void KeeperConnectionStats::resetLatency() count = 0; max_latency = 0; min_latency = 0; + last_latency = 0; } void KeeperConnectionStats::resetRequestCounters() diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 4f71274291b..bd13a70252e 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -168,7 +168,7 @@ void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::st auto max_session_id = deserializeSessionAndTimeout(storage, reader); LOG_INFO(log, "Sessions and timeouts deserialized"); - storage.session_id_counter = max_session_id; + storage.session_id_counter = max_session_id + 1; /// session_id_counter pointer to next slot deserializeACLMap(storage, reader); LOG_INFO(log, "ACLs deserialized"); diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 85eb6264220..07db1a1fafa 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -38,14 +38,14 @@ static ReturnType onError(const std::string & message [[maybe_unused]], int code template static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, const ColumnWithTypeAndName & expected, - const std::string & context_description, bool allow_materialize, int code) + std::string_view context_description, bool allow_materialize, int code) { if (actual.name != expected.name) - return onError("Block structure mismatch in " + context_description + " stream: different names of columns:\n" + return onError("Block structure mismatch in " + std::string(context_description) + " stream: different names of columns:\n" + actual.dumpStructure() + "\n" + expected.dumpStructure(), code); if (!actual.type->equals(*expected.type)) - return onError("Block structure mismatch in " + context_description + " stream: different types:\n" + return onError("Block structure mismatch in " + std::string(context_description) + " stream: different types:\n" + actual.dumpStructure() + "\n" + expected.dumpStructure(), code); if (!actual.column || !expected.column) @@ -66,7 +66,7 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con } if (actual_column->getName() != expected.column->getName()) - return onError("Block structure mismatch in " + context_description + " stream: different columns:\n" + return onError("Block structure mismatch in " + std::string(context_description) + " stream: different columns:\n" + actual.dumpStructure() + "\n" + expected.dumpStructure(), code); if (isColumnConst(*actual.column) && isColumnConst(*expected.column)) @@ -75,7 +75,7 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con Field expected_value = assert_cast(*expected.column).getField(); if (actual_value != expected_value) - return onError("Block structure mismatch in " + context_description + " stream: different values of constants, actual: " + return onError("Block structure mismatch in " + std::string(context_description) + " stream: different values of constants, actual: " + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value), code); } @@ -85,11 +85,11 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con template -static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description, bool allow_materialize) +static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, std::string_view context_description, bool allow_materialize) { size_t columns = rhs.columns(); if (lhs.columns() != columns) - return onError("Block structure mismatch in " + context_description + " stream: different number of columns:\n" + return onError("Block structure mismatch in " + std::string(context_description) + " stream: different number of columns:\n" + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR); for (size_t i = 0; i < columns; ++i) @@ -604,11 +604,11 @@ Names Block::getDataTypeNames() const bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs) { - return checkBlockStructure(lhs, rhs, {}, false); + return checkBlockStructure(lhs, rhs, "", false); } -void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description) +void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, std::string_view context_description) { checkBlockStructure(lhs, rhs, context_description, false); } @@ -616,11 +616,11 @@ void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const bool isCompatibleHeader(const Block & actual, const Block & desired) { - return checkBlockStructure(actual, desired, {}, true); + return checkBlockStructure(actual, desired, "", true); } -void assertCompatibleHeader(const Block & actual, const Block & desired, const std::string & context_description) +void assertCompatibleHeader(const Block & actual, const Block & desired, std::string_view context_description) { checkBlockStructure(actual, desired, context_description, true); } diff --git a/src/Core/Block.h b/src/Core/Block.h index cad29dea7e6..efa5ce7c326 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -182,13 +182,13 @@ using ExtraBlockPtr = std::shared_ptr; bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs); /// Throw exception when blocks are different. -void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description); +void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, std::string_view context_description); /// Actual header is compatible to desired if block have equal structure except constants. /// It is allowed when column from actual header is constant, but in desired is not. /// If both columns are constant, it is checked that they have the same value. bool isCompatibleHeader(const Block & actual, const Block & desired); -void assertCompatibleHeader(const Block & actual, const Block & desired, const std::string & context_description); +void assertCompatibleHeader(const Block & actual, const Block & desired, std::string_view context_description); /// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns. void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff); diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h index 0398ec5cbac..b8498b6c84b 100644 --- a/src/Core/DecimalComparison.h +++ b/src/Core/DecimalComparison.h @@ -52,7 +52,6 @@ struct DecCompareInt using TypeB = Type; }; -/// template typename Operation, bool _check_overflow = true, bool _actual = is_decimal || is_decimal> class DecimalComparison diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6e53fa4342c..d55be808aa8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -544,6 +544,7 @@ class IColumn; \ M(Bool, force_remove_data_recursively_on_drop, false, "Recursively remove data on DROP query. Avoids 'Directory not empty' error, but may silently remove detached data", 0) \ M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \ + M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ @@ -596,6 +597,12 @@ class IColumn; M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ + M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ + M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \ + M(Bool, input_format_arrow_allow_missing_columns, false, "Allow missing columns while reading Arrow input formats", 0) \ + M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \ + M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \ + M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \ M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ \ diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h index 9f026e06127..5843086248c 100644 --- a/src/DataTypes/DataTypesNumber.h +++ b/src/DataTypes/DataTypesNumber.h @@ -12,6 +12,7 @@ namespace DB template class DataTypeNumber final : public DataTypeNumberBase { +public: bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); } bool canBeUsedAsVersion() const override { return true; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 85644b6f6ca..9f3458b1ece 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -506,7 +506,7 @@ inline bool isNotCreatable(const T & data_type) inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type) { WhichDataType which(data_type); - return which.isInt() || which.isUInt(); + return which.isInt() || which.isUInt() || which.isFloat(); } inline bool isCompilableType(const DataTypePtr & data_type) diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index b5a178bd3a4..c5e2b31e043 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -95,7 +95,7 @@ void SerializationNumber::deserializeTextJSON(IColumn & column, ReadBuffer & } template -void SerializationNumber::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationNumber::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & /*settings*/) const { FieldType x; readCSV(x, istr); diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 7da25298cf2..102421e8721 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -504,17 +504,10 @@ Pipe CacheDictionary::read(const Names & column_names, size } std::shared_ptr dictionary = shared_from_this(); - auto coordinator = std::make_shared(dictionary, column_names, std::move(key_columns), max_block_size); + auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); + auto result = coordinator->read(num_streams); - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared(coordinator); - pipes.emplace_back(Pipe(std::move(source))); - } - - return Pipe::unitePipes(std::move(pipes)); + return result; } template diff --git a/src/Dictionaries/DictionarySource.cpp b/src/Dictionaries/DictionarySource.cpp index fa25dab6115..d3058db87f4 100644 --- a/src/Dictionaries/DictionarySource.cpp +++ b/src/Dictionaries/DictionarySource.cpp @@ -1,6 +1,7 @@ #include "DictionarySource.h" #include + namespace DB { @@ -10,12 +11,95 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; } +class DictionarySource : public SourceWithProgress +{ +public: + + explicit DictionarySource(std::shared_ptr coordinator_) + : SourceWithProgress(coordinator_->getHeader()), coordinator(std::move(coordinator_)) + { + } + +private: + String getName() const override { return "DictionarySource"; } + + Chunk generate() override + { + ColumnsWithTypeAndName key_columns_to_read; + ColumnsWithTypeAndName data_columns; + + if (!coordinator->getKeyColumnsNextRangeToRead(key_columns_to_read, data_columns)) + return {}; + + const auto & header = coordinator->getHeader(); + + std::vector key_columns; + std::vector key_types; + + key_columns.reserve(key_columns_to_read.size()); + key_types.reserve(key_columns_to_read.size()); + + std::unordered_map name_to_column; + + for (const auto & key_column_to_read : key_columns_to_read) + { + key_columns.emplace_back(key_column_to_read.column); + key_types.emplace_back(key_column_to_read.type); + + if (header.has(key_column_to_read.name)) + name_to_column.emplace(key_column_to_read.name, key_column_to_read.column); + } + + for (const auto & data_column : data_columns) + { + if (header.has(data_column.name)) + name_to_column.emplace(data_column.name, data_column.column); + } + + const auto & attributes_names_to_read = coordinator->getAttributesNamesToRead(); + const auto & attributes_types_to_read = coordinator->getAttributesTypesToRead(); + const auto & attributes_default_values_columns = coordinator->getAttributesDefaultValuesColumns(); + + const auto & dictionary = coordinator->getDictionary(); + auto attributes_columns = dictionary->getColumns( + attributes_names_to_read, + attributes_types_to_read, + key_columns, + key_types, + attributes_default_values_columns); + + for (size_t i = 0; i < attributes_names_to_read.size(); ++i) + { + const auto & attribute_name = attributes_names_to_read[i]; + name_to_column.emplace(attribute_name, attributes_columns[i]); + } + + std::vector result_columns; + result_columns.reserve(header.columns()); + + for (const auto & column_with_type : header) + { + const auto & header_name = column_with_type.name; + auto it = name_to_column.find(header_name); + if (it == name_to_column.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name {} not found in result columns", header_name); + + result_columns.emplace_back(it->second); + } + + size_t rows_size = result_columns[0]->size(); + return Chunk(result_columns, rows_size); + } + + std::shared_ptr coordinator; +}; + bool DictionarySourceCoordinator::getKeyColumnsNextRangeToRead(ColumnsWithTypeAndName & key_columns, ColumnsWithTypeAndName & data_columns) { size_t read_block_index = parallel_read_block_index++; - size_t start = read_block_index * max_block_size; - size_t end = (read_block_index + 1) * max_block_size; + size_t start = max_block_size * read_block_index; + size_t end = max_block_size * (read_block_index + 1); size_t keys_size = key_columns_with_type[0].column->size(); @@ -112,73 +196,20 @@ DictionarySourceCoordinator::cutColumns(const ColumnsWithTypeAndName & columns_w return result; } - -Chunk DictionarySource::generate() +Pipe DictionarySourceCoordinator::read(size_t num_streams) { - ColumnsWithTypeAndName key_columns_to_read; - ColumnsWithTypeAndName data_columns; + Pipes pipes; + pipes.reserve(num_streams); - if (!coordinator->getKeyColumnsNextRangeToRead(key_columns_to_read, data_columns)) - return {}; + auto coordinator = shared_from_this(); - const auto & header = coordinator->getHeader(); - - std::vector key_columns; - std::vector key_types; - - key_columns.reserve(key_columns_to_read.size()); - key_types.reserve(key_columns_to_read.size()); - - std::unordered_map name_to_column; - - for (const auto & key_column_to_read : key_columns_to_read) + for (size_t i = 0; i < num_streams; ++i) { - key_columns.emplace_back(key_column_to_read.column); - key_types.emplace_back(key_column_to_read.type); - - if (header.has(key_column_to_read.name)) - name_to_column.emplace(key_column_to_read.name, key_column_to_read.column); + auto source = std::make_shared(coordinator); + pipes.emplace_back(Pipe(std::move(source))); } - for (const auto & data_column : data_columns) - { - if (header.has(data_column.name)) - name_to_column.emplace(data_column.name, data_column.column); - } - - const auto & attributes_names_to_read = coordinator->getAttributesNamesToRead(); - const auto & attributes_types_to_read = coordinator->getAttributesTypesToRead(); - const auto & attributes_default_values_columns = coordinator->getAttributesDefaultValuesColumns(); - - const auto & dictionary = coordinator->getDictionary(); - auto attributes_columns = dictionary->getColumns( - attributes_names_to_read, - attributes_types_to_read, - key_columns, - key_types, - attributes_default_values_columns); - - for (size_t i = 0; i < attributes_names_to_read.size(); ++i) - { - const auto & attribute_name = attributes_names_to_read[i]; - name_to_column.emplace(attribute_name, attributes_columns[i]); - } - - std::vector result_columns; - result_columns.reserve(header.columns()); - - for (const auto & column_with_type : header) - { - const auto & header_name = column_with_type.name; - auto it = name_to_column.find(header_name); - if (it == name_to_column.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name {} not found in result columns", header_name); - - result_columns.emplace_back(it->second); - } - - size_t rows_size = result_columns[0]->size(); - return Chunk(result_columns, rows_size); + return Pipe::unitePipes(std::move(pipes)); } } diff --git a/src/Dictionaries/DictionarySource.h b/src/Dictionaries/DictionarySource.h index d1851b73a21..0237e1338df 100644 --- a/src/Dictionaries/DictionarySource.h +++ b/src/Dictionaries/DictionarySource.h @@ -1,12 +1,8 @@ #pragma once #include -#include -#include -#include #include #include -#include #include #include #include @@ -15,10 +11,18 @@ namespace DB { -class DictionarySourceCoordinator +class DictionarySource; + +class DictionarySourceCoordinator final : public shared_ptr_helper, public std::enable_shared_from_this { + friend struct shared_ptr_helper; + public: + Pipe read(size_t num_streams); + +private: + explicit DictionarySourceCoordinator( std::shared_ptr dictionary_, const Names & column_names, @@ -45,6 +49,8 @@ public: initialize(column_names); } + friend class DictionarySource; + bool getKeyColumnsNextRangeToRead(ColumnsWithTypeAndName & key_columns, ColumnsWithTypeAndName & data_columns); const Block & getHeader() const { return header; } @@ -57,7 +63,6 @@ public: const std::shared_ptr & getDictionary() const { return dictionary; } -private: void initialize(const Names & column_names); static ColumnsWithTypeAndName cutColumns(const ColumnsWithTypeAndName & columns_with_type, size_t start, size_t length); @@ -77,21 +82,4 @@ private: std::atomic parallel_read_block_index = 0; }; -class DictionarySource : public SourceWithProgress -{ -public: - - explicit DictionarySource(std::shared_ptr coordinator_) - : SourceWithProgress(coordinator_->getHeader()), coordinator(std::move(coordinator_)) - { - } - -private: - String getName() const override { return "DictionarySource"; } - - Chunk generate() override; - - std::shared_ptr coordinator; -}; - } diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 6955b3ddfdc..aca566c9258 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -123,25 +124,35 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration access_to_key_from_attributes = true; } +DataTypes DictionaryStructure::getKeyTypes() const +{ + if (id) + return {std::make_shared()}; + + const auto & key_attributes = *key; + size_t key_attributes_size = key_attributes.size(); + + DataTypes result; + result.reserve(key_attributes_size); + + for (size_t i = 0; i < key_attributes_size; ++i) + result.emplace_back(key_attributes[i].type); + + return result; +} void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const { - size_t key_types_size = key_types.size(); - if (key_types_size != getKeysSize()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Key structure does not match, expected {}", getKeyDescription()); + auto key_attributes_types = getKeyTypes(); + size_t key_attributes_types_size = key_attributes_types.size(); - if (id && !isUInt64(key_types[0])) - { - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Key type for simple key does not match, expected {}, found {}", - std::to_string(0), - "UInt64", - key_types[0]->getName()); - } + size_t key_types_size = key_types.size(); + if (key_types_size != key_attributes_types_size) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Key structure does not match, expected {}", getKeyDescription()); for (size_t i = 0; i < key_types_size; ++i) { - const auto & expected_type = (*key)[i].type; + const auto & expected_type = key_attributes_types[i]; const auto & actual_type = key_types[i]; if (!areTypesEqual(expected_type, actual_type)) diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 817bc8d7824..3b5164f7f48 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -120,6 +120,7 @@ struct DictionaryStructure final DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); + DataTypes getKeyTypes() const; void validateKeyTypes(const DataTypes & key_types) const; const DictionaryAttribute & getAttribute(const std::string & attribute_name) const; diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index c260924a82b..d9c6b04b593 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -550,17 +550,10 @@ Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size, siz ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared(), dict_struct.id->name)}; std::shared_ptr dictionary = shared_from_this(); - auto coordinator = std::make_shared(dictionary, column_names, std::move(key_columns), max_block_size); + auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); + auto result = coordinator->read(num_streams); - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared(coordinator); - pipes.emplace_back(Pipe(std::move(source))); - } - - return Pipe::unitePipes(std::move(pipes)); + return result; } void registerDictionaryFlat(DictionaryFactory & factory) diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 062620fb25b..8d16f453328 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -758,17 +758,10 @@ Pipe HashedArrayDictionary::read(const Names & column_names key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size()); std::shared_ptr dictionary = shared_from_this(); - auto coordinator = std::make_shared(dictionary, column_names, std::move(key_columns), max_block_size); + auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); + auto result = coordinator->read(num_streams); - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared(coordinator); - pipes.emplace_back(Pipe(std::move(source))); - } - - return Pipe::unitePipes(std::move(pipes)); + return result; } template class HashedArrayDictionary; diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index b4f3aece174..bcc93a34d1e 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -666,17 +666,10 @@ Pipe HashedDictionary::read(const Names & column_na key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size()); std::shared_ptr dictionary = shared_from_this(); - auto coordinator = std::make_shared(dictionary, column_names, std::move(key_columns), max_block_size); + auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); + auto result = coordinator->read(num_streams); - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared(coordinator); - pipes.emplace_back(Pipe(std::move(source))); - } - - return Pipe::unitePipes(std::move(pipes)); + return result; } template diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index b1923306003..022fecab03f 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int TYPE_MISMATCH; } class IDictionary; @@ -107,12 +109,46 @@ public: virtual bool isInjective(const std::string & attribute_name) const = 0; /** Subclass must provide key type that is supported by dictionary. - * Client will use that key type to provide valid key columns for `getColumn` and `has` functions. + * Client will use that key type to provide valid key columns for `getColumn` and `hasKeys` functions. */ virtual DictionaryKeyType getKeyType() const = 0; virtual DictionarySpecialKeyType getSpecialKeyType() const { return DictionarySpecialKeyType::None; } + /** Convert key columns for getColumn, hasKeys functions to match dictionary key column types. + * Default implementation convert key columns into DictionaryStructure key types. + * Subclass can override this method if keys for getColumn, hasKeys functions + * are different from DictionaryStructure keys. Or to prevent implicit conversion of key columns. + */ + virtual void convertKeyColumns(Columns & key_columns, DataTypes & key_types) const + { + const auto & dictionary_structure = getStructure(); + auto key_attributes_types = dictionary_structure.getKeyTypes(); + size_t key_attributes_types_size = key_attributes_types.size(); + size_t key_types_size = key_types.size(); + + if (key_types_size != key_attributes_types_size) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} key lookup structure does not match, expected {}", + getFullName(), + dictionary_structure.getKeyDescription()); + + for (size_t key_attribute_type_index = 0; key_attribute_type_index < key_attributes_types_size; ++key_attribute_type_index) + { + const auto & key_attribute_type = key_attributes_types[key_attribute_type_index]; + auto & key_type = key_types[key_attribute_type_index]; + + if (key_attribute_type->equals(*key_type)) + continue; + + auto & key_column_to_cast = key_columns[key_attribute_type_index]; + ColumnWithTypeAndName column_to_cast = {key_column_to_cast, key_type, ""}; + auto casted_column = castColumnAccurate(std::move(column_to_cast), key_attribute_type); + key_column_to_cast = std::move(casted_column); + key_type = key_attribute_type; + } + } + /** Subclass must validate key columns and keys types * and return column representation of dictionary attribute. * diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 266f8a7415c..c1f244e3b81 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -209,6 +209,11 @@ IPAddressDictionary::IPAddressDictionary( calculateBytesAllocated(); } +void IPAddressDictionary::convertKeyColumns(Columns &, DataTypes &) const +{ + /// Do not perform any implicit keys conversion for IPAddressDictionary +} + ColumnPtr IPAddressDictionary::getColumn( const std::string & attribute_name, const DataTypePtr & result_type, @@ -866,17 +871,10 @@ Pipe IPAddressDictionary::read(const Names & column_names, size_t max_block_size } std::shared_ptr dictionary = shared_from_this(); - auto coordinator = std::make_shared(dictionary, column_names, std::move(key_columns_with_type), std::move(view_columns), max_block_size); + auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns_with_type), std::move(view_columns), max_block_size); + auto result = coordinator->read(num_streams); - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared(coordinator); - pipes.emplace_back(Pipe(std::move(source))); - } - - return Pipe::unitePipes(std::move(pipes)); + return result; } IPAddressDictionary::RowIdxConstIter IPAddressDictionary::ipNotFound() const diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index 9f604b5aeb8..33a9989a9e5 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -69,6 +69,8 @@ public: DictionaryKeyType getKeyType() const override { return DictionaryKeyType::Complex; } + void convertKeyColumns(Columns & key_columns, DataTypes & key_types) const override; + ColumnPtr getColumn( const std::string& attribute_name, const DataTypePtr & result_type, diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 8c31be329aa..8aa9527e467 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,29 @@ IPolygonDictionary::IPolygonDictionary( calculateBytesAllocated(); } +void IPolygonDictionary::convertKeyColumns(Columns & key_columns, DataTypes & key_types) const +{ + if (key_columns.size() != 2) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} key lookup structure does not match, expected two columns of coordinates with type Float64", + getFullName()); + + auto float_64_type = std::make_shared(); + size_t key_types_size = key_types.size(); + for (size_t key_type_index = 0; key_type_index < key_types_size; ++key_type_index) + { + auto & key_type = key_types[key_type_index]; + if (float_64_type->equals(*key_type)) + continue; + + auto & key_column_to_cast = key_columns[key_type_index]; + ColumnWithTypeAndName column_to_cast = {key_column_to_cast, key_type, ""}; + auto casted_column = castColumnAccurate(std::move(column_to_cast), float_64_type); + key_column_to_cast = std::move(casted_column); + key_type = float_64_type; + } +} + ColumnPtr IPolygonDictionary::getColumn( const std::string & attribute_name, const DataTypePtr & result_type, diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h index 50810e250cb..d4864acdec4 100644 --- a/src/Dictionaries/PolygonDictionary.h +++ b/src/Dictionaries/PolygonDictionary.h @@ -95,6 +95,8 @@ public: DictionaryKeyType getKeyType() const override { return DictionaryKeyType::Complex; } + void convertKeyColumns(Columns & key_columns, DataTypes & key_types) const override; + ColumnPtr getColumn( const std::string& attribute_name, const DataTypePtr & result_type, diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 9dcc38dc4b2..0d862573b65 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -736,17 +736,10 @@ Pipe RangeHashedDictionary::read(const Names & column_names ColumnsWithTypeAndName data_columns = {std::move(range_min_column), std::move(range_max_column)}; std::shared_ptr dictionary = shared_from_this(); - auto coordinator = std::make_shared(dictionary, column_names, std::move(key_columns), std::move(data_columns), max_block_size); + auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), std::move(data_columns), max_block_size); + auto result = coordinator->read(num_streams); - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared(coordinator); - pipes.emplace_back(Pipe(std::move(source))); - } - - return Pipe::unitePipes(std::move(pipes)); + return result; } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 2068de0d01c..e00a473f584 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -14,6 +14,8 @@ #include #include +#include + namespace DB { @@ -23,6 +25,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT; extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT; + extern const int BAD_ARGUMENTS; } const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const @@ -60,6 +63,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; + format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; + format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; + format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; format_settings.custom.field_delimiter = settings.format_custom_field_delimiter; format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter; @@ -84,6 +90,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; + format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -112,7 +119,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.write_statistics = settings.output_format_write_statistics; format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; + format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; @@ -191,7 +200,8 @@ InputFormatPtr FormatFactory::getInput( ParallelParsingInputFormat::Params params{ - buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing}; + buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing, + context->getApplicationType() == Context::ApplicationType::SERVER}; return std::make_shared(params); } @@ -373,6 +383,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_ if (target) throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR); target = std::move(input_creator); + registerFileExtension(name, name); } void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker) @@ -389,6 +400,42 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp if (target) throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR); target = std::move(output_creator); + registerFileExtension(name, name); +} + +void FormatFactory::registerFileExtension(const String & extension, const String & format_name) +{ + file_extension_formats[boost::to_lower_copy(extension)] = format_name; +} + +String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_found) +{ + CompressionMethod compression_method = chooseCompressionMethod(file_name, ""); + if (CompressionMethod::None != compression_method) + { + auto pos = file_name.find_last_of('.'); + if (pos != String::npos) + file_name = file_name.substr(0, pos); + } + + auto pos = file_name.find_last_of('.'); + if (pos == String::npos) + { + if (throw_if_not_found) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension"); + return ""; + } + + String file_extension = file_name.substr(pos + 1, String::npos); + boost::algorithm::to_lower(file_extension); + auto it = file_extension_formats.find(file_extension); + if (it == file_extension_formats.end()) + { + if (throw_if_not_found) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension"); + return ""; + } + return it->second; } void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index a62b32da0cc..a5eaa43a29f 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -108,6 +109,7 @@ private: }; using FormatsDictionary = std::unordered_map; + using FileExtensionFormats = std::unordered_map; public: static FormatFactory & instance(); @@ -169,6 +171,10 @@ public: void registerInputFormat(const String & name, InputCreator input_creator); void registerOutputFormat(const String & name, OutputCreator output_creator); + /// Register file extension for format + void registerFileExtension(const String & extension, const String & format_name); + String getFormatFromFileName(String file_name, bool throw_if_not_found = false); + /// Register schema readers for format its name. void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator); void registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator); @@ -192,6 +198,7 @@ public: private: FormatsDictionary dict; + FileExtensionFormats file_extension_formats; const Creators & getCreators(const String & name) const; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 6298e959c3e..b484d623944 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -1,5 +1,6 @@ #pragma once +#include #include @@ -71,6 +72,7 @@ struct FormatSettings UInt64 row_group_size = 1000000; bool low_cardinality_as_dictionary = false; bool import_nested = false; + bool allow_missing_columns = false; } arrow; struct @@ -99,6 +101,14 @@ struct FormatSettings char tuple_delimiter = ','; } csv; + struct HiveText + { + char fields_delimiter = '\x01'; + char collection_items_delimiter = '\x02'; + char map_keys_delimiter = '\x03'; + Names input_field_names; + } hive_text; + struct Custom { std::string result_before_delimiter; @@ -124,6 +134,7 @@ struct FormatSettings { UInt64 row_group_size = 1000000; bool import_nested = false; + bool allow_missing_columns = false; } parquet; struct Pretty @@ -202,6 +213,7 @@ struct FormatSettings struct { bool import_nested = false; + bool allow_missing_columns = false; int64_t row_batch_size = 100'000; } orc; diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 1349c9e3323..b7b3b51cd7b 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -77,6 +77,10 @@ void registerInputFormatJSONAsString(FormatFactory & factory); void registerInputFormatLineAsString(FormatFactory & factory); void registerInputFormatCapnProto(FormatFactory & factory); +#if USE_HIVE +void registerInputFormatHiveText(FormatFactory & factory); +#endif + /// Non trivial prefix and suffix checkers for disabling parallel parsing. void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory); void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory); @@ -103,6 +107,8 @@ void registerTSKVSchemaReader(FormatFactory & factory); void registerValuesSchemaReader(FormatFactory & factory); void registerTemplateSchemaReader(FormatFactory & factory); +void registerFileExtensions(FormatFactory & factory); + void registerFormats() { auto & factory = FormatFactory::instance(); @@ -169,6 +175,9 @@ void registerFormats() registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); registerInputFormatLineAsString(factory); +#if USE_HIVE + registerInputFormatHiveText(factory); +#endif registerInputFormatCapnProto(factory); diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 945090781dc..a0c7fc643d2 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -11,17 +11,17 @@ #include #include -#include -#include -#include +#include #include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -1175,8 +1175,11 @@ public: const bool left_is_num = col_left_untyped->isNumeric(); const bool right_is_num = col_right_untyped->isNumeric(); - const bool left_is_string = isStringOrFixedString(which_left); - const bool right_is_string = isStringOrFixedString(which_right); + const bool left_is_string = which_left.isStringOrFixedString(); + const bool right_is_string = which_right.isStringOrFixedString(); + + const bool left_is_float = which_left.isFloat(); + const bool right_is_float = which_right.isFloat(); bool date_and_datetime = (which_left.idx != which_right.idx) && (which_left.isDate() || which_left.isDate32() || which_left.isDateTime() || which_left.isDateTime64()) && (which_right.isDate() || which_right.isDate32() || which_right.isDateTime() || which_right.isDateTime64()); @@ -1232,11 +1235,23 @@ public: } else { - // compare + /// Check does another data type is comparable to Decimal, includes Int and Float. if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime) throw Exception( "No operation " + getName() + " between " + left_type->getName() + " and " + right_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + /// When Decimal comparing to Float32/64, we convert both of them into Float64. + /// Other systems like MySQL and Spark also do as this. + if (left_is_float || right_is_float) + { + const auto converted_type = std::make_shared(); + ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, converted_type); + ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, converted_type); + + auto new_arguments + = ColumnsWithTypeAndName{{c0_converted, converted_type, "left"}, {c1_converted, converted_type, "right"}}; + return executeImpl(new_arguments, result_type, input_rows_count); + } return executeDecimal(col_with_type_and_name_left, col_with_type_and_name_right); } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 71597f2b433..fb0dbdfff5c 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -145,6 +145,7 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } bool isDeterministic() const override { return false; } @@ -183,7 +184,17 @@ public: if (input_rows_count == 0) return result_type->createColumn(); - auto dictionary = helper.getDictionary(arguments[0].column); + String dictionary_name; + + if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) + dictionary_name = name_col->getValue(); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected a const string.", + arguments[0].type->getName(), + getName()); + + auto dictionary = helper.getDictionary(dictionary_name); auto dictionary_key_type = dictionary->getKeyType(); auto dictionary_special_key_type = dictionary->getSpecialKeyType(); @@ -216,15 +227,8 @@ public: if (dictionary_key_type == DictionaryKeyType::Simple) { - if (!WhichDataType(key_column_type).isUInt64()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument of function {} must be UInt64 when dictionary is simple. Actual type {}.", - getName(), - key_column_with_type.type->getName()); - key_columns = {key_column}; - key_types = {std::make_shared()}; + key_types = {key_column_with_type.type}; } else if (dictionary_key_type == DictionaryKeyType::Complex) { @@ -257,6 +261,8 @@ public: } } + dictionary->convertKeyColumns(key_columns, key_types); + if (dictionary_special_key_type == DictionarySpecialKeyType::Range) { key_columns.emplace_back(range_col); @@ -460,15 +466,8 @@ public: if (dictionary_key_type == DictionaryKeyType::Simple) { - if (!WhichDataType(key_col_with_type.type).isUInt64()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument of function {} must be UInt64 when dictionary is simple. Actual type {}.", - getName(), - key_col_with_type.type->getName()); - key_columns = {key_column}; - key_types = {std::make_shared()}; + key_types = {key_col_with_type.type}; } else if (dictionary_key_type == DictionaryKeyType::Complex) { @@ -481,7 +480,7 @@ public: key_columns = assert_cast(*key_column).getColumnsCopy(); key_types = assert_cast(*key_column_type).getElements(); } - else if (!isTuple(key_column_type)) + else { size_t keys_size = dictionary->getStructure().getKeysSize(); @@ -502,6 +501,8 @@ public: } } + dictionary->convertKeyColumns(key_columns, key_types); + if (dictionary_special_key_type == DictionarySpecialKeyType::Range) { key_columns.emplace_back(range_col); diff --git a/src/Functions/h3DegsToRads.cpp b/src/Functions/h3DegsToRads.cpp deleted file mode 100644 index b3afc28f5a2..00000000000 --- a/src/Functions/h3DegsToRads.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "config_functions.h" - -#if USE_H3 - -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ -namespace ErrorCodes -{ -extern const int ILLEGAL_TYPE_OF_ARGUMENT; -extern const int ILLEGAL_COLUMN; -} - -namespace -{ - -class FunctionH3DegsToRads final : public IFunction -{ -public: - static constexpr auto name = "h3DegsToRads"; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - std::string getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - - bool useDefaultImplementationForConstants() const override { return true; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto * arg = arguments[0].get(); - if (!WhichDataType(arg).isFloat64()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument {} of function {}. Must be Float64", - arg->getName(), 1, getName()); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto * column = checkAndGetColumn(arguments[0].column.get()); - - if (!column) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Illegal type {} of argument {} of function {}. Must be Float64", - arguments[0].type->getName(), - 1, - getName()); - - const auto & data = column->getData(); - - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(input_rows_count); - - for (size_t row = 0; row < input_rows_count; ++row) - { - const Float64 degrees = data[row]; - auto res = degsToRads(degrees); - dst_data[row] = res; - } - - return dst; - } -}; - -} - -void registerFunctionH3DegsToRads(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/Functions/h3RadsToDegs.cpp b/src/Functions/h3RadsToDegs.cpp deleted file mode 100644 index 99b8969e13f..00000000000 --- a/src/Functions/h3RadsToDegs.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include "config_functions.h" - -#if USE_H3 - -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ -namespace ErrorCodes -{ -extern const int ILLEGAL_TYPE_OF_ARGUMENT; -extern const int ILLEGAL_COLUMN; -} - -namespace -{ - -class FunctionH3RadsToDegs final : public IFunction -{ -public: - static constexpr auto name = "h3RadsToDegs"; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - std::string getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - - bool useDefaultImplementationForConstants() const override { return true; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto * arg = arguments[0].get(); - if (!WhichDataType(arg).isFloat64()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument {} of function {}. Must be Float64", - arg->getName(), 1, getName()); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto * column = checkAndGetColumn(arguments[0].column.get()); - if (!column) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Illegal type {} of argument {} of function {}. Must be Float64", - arguments[0].type->getName(), - 1, - getName()); - - const auto & col_rads = column->getData(); - - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(input_rows_count); - - for (size_t row = 0; row < input_rows_count; ++row) - { - const Float64 rads = col_rads[row]; - auto res = radsToDegs(rads); - dst_data[row] = res; - } - return dst; - } -}; - -} - -void registerFunctionH3RadsToDegs(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index d0bb47ea3d7..e3e1a44663f 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -43,8 +43,6 @@ void registerFunctionH3HexAreaM2(FunctionFactory &); void registerFunctionH3IsResClassIII(FunctionFactory &); void registerFunctionH3IsPentagon(FunctionFactory &); void registerFunctionH3GetFaces(FunctionFactory &); -void registerFunctionH3DegsToRads(FunctionFactory &); -void registerFunctionH3RadsToDegs(FunctionFactory &); void registerFunctionH3HexAreaKm2(FunctionFactory &); void registerFunctionH3CellAreaM2(FunctionFactory &); void registerFunctionH3CellAreaRads2(FunctionFactory &); @@ -105,8 +103,6 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3IsResClassIII(factory); registerFunctionH3IsPentagon(factory); registerFunctionH3GetFaces(factory); - registerFunctionH3DegsToRads(factory); - registerFunctionH3RadsToDegs(factory); registerFunctionH3HexAreaKm2(factory); registerFunctionH3CellAreaM2(factory); registerFunctionH3CellAreaRads2(factory); diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index c61d091cd09..eaab7560e6a 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -46,6 +47,8 @@ std::string toContentEncodingName(CompressionMethod method) return "lz4"; case CompressionMethod::Bzip2: return "bz2"; + case CompressionMethod::Snappy: + return "snappy"; case CompressionMethod::None: return ""; } @@ -79,11 +82,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s return CompressionMethod::Lz4; if (method_str == "bz2") return CompressionMethod::Bzip2; + if (method_str == "snappy") + return CompressionMethod::Snappy; if (hint.empty() || hint == "auto" || hint == "none") return CompressionMethod::None; throw Exception( - "Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'lz4', 'bz2' are supported as compression methods", + "Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'lz4', 'bz2', 'snappy' are supported as compression methods", ErrorCodes::NOT_IMPLEMENTED); } @@ -107,6 +112,11 @@ std::unique_ptr wrapReadBufferWithCompressionMethod( if (method == CompressionMethod::Bzip2) return std::make_unique(std::move(nested), buf_size, existing_memory, alignment); #endif +#if USE_SNAPPY + if (method == CompressionMethod::Snappy) + return std::make_unique(std::move(nested), buf_size, existing_memory, alignment); +#endif + if (method == CompressionMethod::None) return nested; @@ -136,6 +146,10 @@ std::unique_ptr wrapWriteBufferWithCompressionMethod( #if USE_BZIP2 if (method == CompressionMethod::Bzip2) return std::make_unique(std::move(nested), level, buf_size, existing_memory, alignment); +#endif +#if USE_SNAPPY + if (method == CompressionMethod::Snappy) + throw Exception("Unsupported compression method", ErrorCodes::NOT_IMPLEMENTED); #endif if (method == CompressionMethod::None) return nested; diff --git a/src/IO/CompressionMethod.h b/src/IO/CompressionMethod.h index df6400c55a3..3953ba9d212 100644 --- a/src/IO/CompressionMethod.h +++ b/src/IO/CompressionMethod.h @@ -33,7 +33,8 @@ enum class CompressionMethod Zstd, Brotli, Lz4, - Bzip2 + Bzip2, + Snappy, }; /// How the compression method is named in HTTP. diff --git a/src/IO/HadoopSnappyReadBuffer.cpp b/src/IO/HadoopSnappyReadBuffer.cpp new file mode 100644 index 00000000000..324df67e900 --- /dev/null +++ b/src/IO/HadoopSnappyReadBuffer.cpp @@ -0,0 +1,218 @@ +#include + +#if USE_SNAPPY +#include +#include +#include +#include +#include + +#include + +#include "HadoopSnappyReadBuffer.h" + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SNAPPY_UNCOMPRESS_FAILED; +} + + +inline bool HadoopSnappyDecoder::checkBufferLength(int max) const +{ + return buffer_length >= 0 && buffer_length < max; +} + +inline bool HadoopSnappyDecoder::checkAvailIn(size_t avail_in, int min) +{ + return avail_in >= size_t(min); +} + +inline void HadoopSnappyDecoder::copyToBuffer(size_t * avail_in, const char ** next_in) +{ + assert(*avail_in <= sizeof(buffer)); + + memcpy(buffer, *next_in, *avail_in); + + buffer_length = *avail_in; + *next_in += *avail_in; + *avail_in = 0; +} + + +inline uint32_t HadoopSnappyDecoder::readLength(const char * in) +{ + uint32_t b1 = *(reinterpret_cast(in)); + uint32_t b2 = *(reinterpret_cast(in + 1)); + uint32_t b3 = *(reinterpret_cast(in + 2)); + uint32_t b4 = *(reinterpret_cast(in + 3)); + uint32_t res = ((b1 << 24) + (b2 << 16) + (b3 << 8) + b4); + return res; +} + + +inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readLength(size_t * avail_in, const char ** next_in, int * length) +{ + char tmp[4] = {0}; + + if (!checkBufferLength(4)) + return Status::INVALID_INPUT; + memcpy(tmp, buffer, buffer_length); + + if (!checkAvailIn(*avail_in, 4 - buffer_length)) + { + copyToBuffer(avail_in, next_in); + return Status::NEEDS_MORE_INPUT; + } + memcpy(tmp + buffer_length, *next_in, 4 - buffer_length); + + *avail_in -= 4 - buffer_length; + *next_in += 4 - buffer_length; + buffer_length = 0; + *length = readLength(tmp); + return Status::OK; +} + +inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlockLength(size_t * avail_in, const char ** next_in) +{ + if (block_length < 0) + return readLength(avail_in, next_in, &block_length); + return Status::OK; +} + +inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(size_t * avail_in, const char ** next_in) +{ + if (compressed_length < 0) + return readLength(avail_in, next_in, &compressed_length); + return Status::OK; +} + +inline HadoopSnappyDecoder::Status +HadoopSnappyDecoder::readCompressedData(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out) +{ + if (!checkBufferLength(compressed_length)) + return Status::INVALID_INPUT; + + if (!checkAvailIn(*avail_in, compressed_length - buffer_length)) + { + copyToBuffer(avail_in, next_in); + return Status::NEEDS_MORE_INPUT; + } + + const char * compressed = nullptr; + if (buffer_length > 0) + { + compressed = buffer; + memcpy(buffer + buffer_length, *next_in, compressed_length - buffer_length); + } + else + { + compressed = const_cast(*next_in); + } + + size_t uncompressed_length = *avail_out; + auto status = snappy_uncompress(compressed, compressed_length, *next_out, &uncompressed_length); + if (status != SNAPPY_OK) + { + return Status(status); + } + + *avail_in -= compressed_length - buffer_length; + *next_in += compressed_length - buffer_length; + *avail_out -= uncompressed_length; + *next_out += uncompressed_length; + + total_uncompressed_length += uncompressed_length; + compressed_length = -1; + buffer_length = 0; + return Status::OK; +} + +HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out) +{ + if (*avail_in == 0) + { + if (buffer_length == 0 && block_length < 0 && compressed_length < 0) + return Status::OK; + return Status::NEEDS_MORE_INPUT; + } + + HadoopSnappyDecoder::Status status = readBlockLength(avail_in, next_in); + if (status != Status::OK) + return status; + + while (total_uncompressed_length < block_length) + { + status = readCompressedLength(avail_in, next_in); + if (status != Status::OK) + return status; + + status = readCompressedData(avail_in, next_in, avail_out, next_out); + if (status != Status::OK) + return status; + } + if (total_uncompressed_length != block_length) + return Status::INVALID_INPUT; + return Status::OK; +} + +HadoopSnappyReadBuffer::HadoopSnappyReadBuffer(std::unique_ptr in_, size_t buf_size, char * existing_memory, size_t alignment) + : BufferWithOwnMemory(buf_size, existing_memory, alignment) + , in(std::move(in_)) + , decoder(std::make_unique()) + , in_available(0) + , in_data(nullptr) + , out_capacity(0) + , out_data(nullptr) + , eof(false) +{ +} + +HadoopSnappyReadBuffer::~HadoopSnappyReadBuffer() = default; + +bool HadoopSnappyReadBuffer::nextImpl() +{ + if (eof) + return false; + + if (!in_available) + { + in->nextIfAtEnd(); + in_available = in->buffer().end() - in->position(); + in_data = in->position(); + } + + if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof())) + { + throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED); + } + + out_capacity = internal_buffer.size(); + out_data = internal_buffer.begin(); + decoder->result = decoder->readBlock(&in_available, &in_data, &out_capacity, &out_data); + + in->position() = in->buffer().end() - in_available; + working_buffer.resize(internal_buffer.size() - out_capacity); + + if (decoder->result == Status::OK) + { + decoder->reset(); + if (in->eof()) + { + eof = true; + return !working_buffer.empty(); + } + return true; + } + else if (decoder->result == Status::INVALID_INPUT || decoder->result == Status::BUFFER_TOO_SMALL) + { + throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED); + } + return true; +} + +} + +#endif diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h new file mode 100644 index 00000000000..b1b67399e56 --- /dev/null +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -0,0 +1,115 @@ +#pragma once + +#include + +#if USE_SNAPPY + +#include +#include +#include + +namespace DB +{ + + +/* + * Hadoop-snappy format is one of the compression formats base on Snappy used in Hadoop. It uses its own framing format as follows: + * 1. A compressed file consists of one or more blocks. + * 2. A block consists of uncompressed length (big endian 4 byte integer) and one or more subblocks. + * 3. A subblock consists of compressed length (big endian 4 byte integer) and raw compressed data. + * + * HadoopSnappyDecoder implements the decompression of data compressed with hadoop-snappy format. + */ +class HadoopSnappyDecoder +{ +public: + enum class Status : int + { + OK = 0, + INVALID_INPUT = 1, + BUFFER_TOO_SMALL = 2, + NEEDS_MORE_INPUT = 3, + }; + + HadoopSnappyDecoder() = default; + ~HadoopSnappyDecoder() = default; + + Status readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out); + + inline void reset() + { + buffer_length = 0; + block_length = -1; + compressed_length = -1; + total_uncompressed_length = 0; + } + + Status result = Status::OK; + +private: + inline bool checkBufferLength(int max) const; + inline static bool checkAvailIn(size_t avail_in, int min); + + inline void copyToBuffer(size_t * avail_in, const char ** next_in); + + inline static uint32_t readLength(const char * in); + inline Status readLength(size_t * avail_in, const char ** next_in, int * length); + inline Status readBlockLength(size_t * avail_in, const char ** next_in); + inline Status readCompressedLength(size_t * avail_in, const char ** next_in); + inline Status readCompressedData(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out); + + char buffer[DBMS_DEFAULT_BUFFER_SIZE] = {0}; + int buffer_length = 0; + + int block_length = -1; + int compressed_length = -1; + int total_uncompressed_length = 0; +}; + +/// HadoopSnappyReadBuffer implements read buffer for data compressed with hadoop-snappy format. +class HadoopSnappyReadBuffer : public BufferWithOwnMemory +{ +public: + using Status = HadoopSnappyDecoder::Status; + + inline static String statusToString(Status status) + { + switch (status) + { + case Status::OK: + return "OK"; + case Status::INVALID_INPUT: + return "INVALID_INPUT"; + case Status::BUFFER_TOO_SMALL: + return "BUFFER_TOO_SMALL"; + case Status::NEEDS_MORE_INPUT: + return "NEEDS_MORE_INPUT"; + } + __builtin_unreachable(); + } + + explicit HadoopSnappyReadBuffer( + std::unique_ptr in_, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0); + + ~HadoopSnappyReadBuffer() override; + +private: + bool nextImpl() override; + + std::unique_ptr in; + std::unique_ptr decoder; + + size_t in_available; + const char * in_data; + + size_t out_capacity; + char * out_data; + + bool eof; +}; + +} +#endif diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 6d1023947a5..73fe1a28be6 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1068,7 +1068,6 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf) assertChar('"', buf); } - /// CSV, for numbers, dates: quotes are optional, no special escaping rules. template inline void readCSVSimple(T & x, ReadBuffer & buf) @@ -1088,8 +1087,10 @@ inline void readCSVSimple(T & x, ReadBuffer & buf) } template -inline std::enable_if_t, void> -readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline std::enable_if_t, void> readCSV(T & x, ReadBuffer & buf) +{ + readCSVSimple(x, buf); +} inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } diff --git a/src/IO/SnappyReadBuffer.cpp b/src/IO/SnappyReadBuffer.cpp new file mode 100644 index 00000000000..c75aee9dc3a --- /dev/null +++ b/src/IO/SnappyReadBuffer.cpp @@ -0,0 +1,76 @@ +#include + +#if USE_SNAPPY +#include +#include +#include + +#include + +#include +#include +#include + +#include "SnappyReadBuffer.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SNAPPY_UNCOMPRESS_FAILED; + extern const int SEEK_POSITION_OUT_OF_BOUND; +} + + +SnappyReadBuffer::SnappyReadBuffer(std::unique_ptr in_, size_t buf_size, char * existing_memory, size_t alignment) + : BufferWithOwnMemory(buf_size, existing_memory, alignment), in(std::move(in_)) +{ +} + +bool SnappyReadBuffer::nextImpl() +{ + if (compress_buffer.empty() && uncompress_buffer.empty()) + { + WriteBufferFromString wb(compress_buffer); + copyData(*in, wb); + + bool success = snappy::Uncompress(compress_buffer.data(), wb.count(), &uncompress_buffer); + if (!success) + { + throw Exception("snappy uncomress failed: ", ErrorCodes::SNAPPY_UNCOMPRESS_FAILED); + } + BufferBase::set(const_cast(uncompress_buffer.data()), uncompress_buffer.size(), 0); + return true; + } + return false; +} + +SnappyReadBuffer::~SnappyReadBuffer() = default; + +off_t SnappyReadBuffer::seek(off_t off, int whence) +{ + off_t new_pos; + if (whence == SEEK_SET) + new_pos = off; + else if (whence == SEEK_CUR) + new_pos = count() + off; + else + throw Exception("Only SEEK_SET and SEEK_CUR seek modes allowed.", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + working_buffer = internal_buffer; + if (new_pos < 0 || new_pos > off_t(working_buffer.size())) + throw Exception( + String("Cannot seek through buffer") + " because seek position (" + toString(new_pos) + ") is out of bounds [0, " + + toString(working_buffer.size()) + "]", + ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + position() = working_buffer.begin() + new_pos; + return new_pos; +} + +off_t SnappyReadBuffer::getPosition() +{ + return count(); +} + +} +#endif diff --git a/src/IO/SnappyReadBuffer.h b/src/IO/SnappyReadBuffer.h new file mode 100644 index 00000000000..e440f2d3003 --- /dev/null +++ b/src/IO/SnappyReadBuffer.h @@ -0,0 +1,35 @@ +#pragma once + +#include + +#if USE_SNAPPY + +#include +#include +#include + +namespace DB +{ +class SnappyReadBuffer : public BufferWithOwnMemory +{ +public: + explicit SnappyReadBuffer( + std::unique_ptr in_, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0); + + ~SnappyReadBuffer() override; + + bool nextImpl() override; + off_t seek(off_t off, int whence) override; + off_t getPosition() override; + +private: + std::unique_ptr in; + String compress_buffer; + String uncompress_buffer; +}; + +} +#endif diff --git a/src/IO/SnappyWriteBuffer.cpp b/src/IO/SnappyWriteBuffer.cpp new file mode 100644 index 00000000000..9cbdaab330f --- /dev/null +++ b/src/IO/SnappyWriteBuffer.cpp @@ -0,0 +1,92 @@ +#include + +#if USE_SNAPPY +#include + +#include + +#include +#include "SnappyWriteBuffer.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SNAPPY_COMPRESS_FAILED; +} + +SnappyWriteBuffer::SnappyWriteBuffer(std::unique_ptr out_, size_t buf_size, char * existing_memory, size_t alignment) + : BufferWithOwnMemory(buf_size, existing_memory, alignment), out(std::move(out_)) +{ +} + +SnappyWriteBuffer::~SnappyWriteBuffer() +{ + finish(); +} + +void SnappyWriteBuffer::nextImpl() +{ + if (!offset()) + { + return; + } + + const char * in_data = reinterpret_cast(working_buffer.begin()); + size_t in_available = offset(); + uncompress_buffer.append(in_data, in_available); +} + +void SnappyWriteBuffer::finish() +{ + if (finished) + return; + + try + { + finishImpl(); + out->finalize(); + finished = true; + } + catch (...) + { + /// Do not try to flush next time after exception. + out->position() = out->buffer().begin(); + finished = true; + throw; + } +} + +void SnappyWriteBuffer::finishImpl() +{ + next(); + + bool success = snappy::Compress(uncompress_buffer.data(), uncompress_buffer.size(), &compress_buffer); + if (!success) + { + throw Exception("snappy compress failed: ", ErrorCodes::SNAPPY_COMPRESS_FAILED); + } + + char * in_data = compress_buffer.data(); + size_t in_available = compress_buffer.size(); + char * out_data = nullptr; + size_t out_capacity = 0; + size_t len = 0; + while (in_available > 0) + { + out->nextIfAtEnd(); + out_data = out->position(); + out_capacity = out->buffer().end() - out->position(); + len = in_available > out_capacity ? out_capacity : in_available; + + memcpy(out_data, in_data, len); + in_data += len; + in_available -= len; + out->position() += len; + } +} + +} + +#endif + diff --git a/src/IO/SnappyWriteBuffer.h b/src/IO/SnappyWriteBuffer.h new file mode 100644 index 00000000000..90fb8521c25 --- /dev/null +++ b/src/IO/SnappyWriteBuffer.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +#if USE_SNAPPY +#include +#include + +namespace DB +{ +/// Performs compression using snappy library and write compressed data to the underlying buffer. +class SnappyWriteBuffer : public BufferWithOwnMemory +{ +public: + explicit SnappyWriteBuffer( + std::unique_ptr out_, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0); + + ~SnappyWriteBuffer() override; + + void finalizeImpl() override { finish(); } + +private: + void nextImpl() override; + + void finishImpl(); + void finish(); + + std::unique_ptr out; + bool finished = false; + + String uncompress_buffer; + String compress_buffer; +}; + +} + +#endif + diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt index d5907bf67ad..0d335d66d27 100644 --- a/src/IO/examples/CMakeLists.txt +++ b/src/IO/examples/CMakeLists.txt @@ -72,3 +72,10 @@ target_link_libraries (dragonbox_test PRIVATE dragonbox_to_chars) add_executable (zstd_buffers zstd_buffers.cpp) target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io) + +add_executable (snappy_read_buffer snappy_read_buffer.cpp) +target_link_libraries (snappy_read_buffer PRIVATE clickhouse_common_io) + +add_executable (hadoop_snappy_read_buffer hadoop_snappy_read_buffer.cpp) +target_link_libraries (hadoop_snappy_read_buffer PRIVATE clickhouse_common_io) + diff --git a/src/IO/examples/hadoop_snappy_read_buffer.cpp b/src/IO/examples/hadoop_snappy_read_buffer.cpp new file mode 100644 index 00000000000..9cb01e6d697 --- /dev/null +++ b/src/IO/examples/hadoop_snappy_read_buffer.cpp @@ -0,0 +1,43 @@ +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +std::string uncompress(size_t buf_size) +{ + using namespace DB; + + String path = "test.snappy"; + std::unique_ptr in1 = std::make_unique(path, buf_size); + HadoopSnappyReadBuffer in2(std::move(in1)); + + String output; + WriteBufferFromString out(output); + copyData(in2, out); + + output.resize(out.count()); + return output; +} + +int main() +{ + auto output = uncompress(1024 * 1024); + for (size_t i = 1; i < 1024; ++i) + { + size_t buf_size = 1024 * i; + if (uncompress(buf_size) != output) + { + std::cout << "test hadoop snappy read buffer failed, buf_size:" << buf_size << std::endl; + return 1; + } + } + std::cout << "test hadoop snappy read buffer success" << std::endl; + return 0; +} diff --git a/src/IO/examples/snappy_read_buffer.cpp b/src/IO/examples/snappy_read_buffer.cpp new file mode 100644 index 00000000000..ccf2f70f1c4 --- /dev/null +++ b/src/IO/examples/snappy_read_buffer.cpp @@ -0,0 +1,35 @@ +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + + +int main() +{ + using namespace DB; + String str = "this is a snappy example.\n"; + String input; + for (size_t i = 0; i < 5; i++) + input += str; + + String input1; + snappy::Compress(input.data(), input.size(), &input1); + + std::unique_ptr in1 = std::make_unique(input1); + SnappyReadBuffer in2(std::move(in1)); + + String output; + WriteBufferFromString out(output); + copyData(in2, out); + + output.resize(out.count()); + assert(input == output); + return 0; +} diff --git a/src/IO/examples/test.snappy b/src/IO/examples/test.snappy new file mode 100644 index 00000000000..1036e7e3a4d Binary files /dev/null and b/src/IO/examples/test.snappy differ diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index d5039a2f19e..2e63e6e1b43 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -33,11 +34,21 @@ public: explicit AddDefaultDatabaseVisitor( ContextPtr context_, const String & database_name_, - bool only_replace_current_database_function_ = false) + bool only_replace_current_database_function_ = false, + bool only_replace_in_join_ = false) : context(context_) , database_name(database_name_) , only_replace_current_database_function(only_replace_current_database_function_) - {} + , only_replace_in_join(only_replace_in_join_) + { + if (!context->isGlobalContext()) + { + for (const auto & [table_name, _ /* storage */] : context->getExternalTables()) + { + external_tables.insert(table_name); + } + } + } void visitDDL(ASTPtr & ast) const { @@ -81,8 +92,10 @@ private: ContextPtr context; const String database_name; + std::set external_tables; bool only_replace_current_database_function = false; + bool only_replace_in_join = false; void visit(ASTSelectWithUnionQuery & select, ASTPtr &) const { @@ -124,6 +137,9 @@ private: void visit(ASTTablesInSelectQueryElement & tables_element, ASTPtr &) const { + if (only_replace_in_join && !tables_element.table_join) + return; + if (tables_element.table_expression) tryVisit(tables_element.table_expression); } @@ -138,13 +154,17 @@ private: void visit(const ASTTableIdentifier & identifier, ASTPtr & ast) const { - if (!identifier.compound()) - { - auto qualified_identifier = std::make_shared(database_name, identifier.name()); - if (!identifier.alias.empty()) - qualified_identifier->setAlias(identifier.alias); - ast = qualified_identifier; - } + /// Already has database. + if (identifier.compound()) + return; + /// There is temporary table with such name, should not be rewritten. + if (external_tables.count(identifier.shortName())) + return; + + auto qualified_identifier = std::make_shared(database_name, identifier.name()); + if (!identifier.alias.empty()) + qualified_identifier->setAlias(identifier.alias); + ast = qualified_identifier; } void visit(ASTSubquery & subquery, ASTPtr &) const diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7ddb0c8c26e..d0308d11a35 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -725,9 +725,8 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (create.storage || create.is_dictionary || create.isView()) { if (create.temporary && create.storage && create.storage->engine && create.storage->engine->name != "Memory") - throw Exception( - "Temporary tables can only be created with ENGINE = Memory, not " + create.storage->engine->name, - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Temporary tables can only be created with ENGINE = Memory, not {}", create.storage->engine->name); return; } @@ -1254,17 +1253,14 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) { /// If the query is a CREATE SELECT, insert the data into the table. if (create.select && !create.attach - && !create.is_ordinary_view && !create.is_live_view && !create.is_window_view && (!create.is_materialized_view || create.is_populate)) + && !create.is_ordinary_view && !create.is_live_view && !create.is_window_view + && (!create.is_materialized_view || create.is_populate)) { auto insert = std::make_shared(); insert->table_id = {create.getDatabase(), create.getTable(), create.uuid}; insert->select = create.select->clone(); - if (create.temporary && !getContext()->getSessionContext()->hasQueryContext()) - getContext()->getSessionContext()->makeQueryContext(); - - return InterpreterInsertQuery(insert, - create.temporary ? getContext()->getSessionContext() : getContext(), + return InterpreterInsertQuery(insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 7049dfc03f6..8e0f73f0b31 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1977,6 +1977,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete)) quota = context->getQuota(); + query_info.settings_limit_offset_done = options.settings_limit_offset_done; storage->read(query_plan, required_columns, metadata_snapshot, query_info, context, processing_stage, max_block_size, max_streams); if (context->hasQueryContext() && !options.is_internal) diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 6779093a53d..e4b3e62c358 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -83,7 +83,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( } } - if (num_children == 1 && settings_limit_offset_needed) + if (num_children == 1 && settings_limit_offset_needed && !options.settings_limit_offset_done) { const ASTPtr first_select_ast = ast->list_of_selects->children.at(0); ASTSelectQuery * select_query = dynamic_cast(first_select_ast.get()); @@ -127,7 +127,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(new_limit_length_ast)); } - settings_limit_offset_done = true; + options.settings_limit_offset_done = true; } } @@ -305,7 +305,7 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan) } } - if (settings_limit_offset_needed && !settings_limit_offset_done) + if (settings_limit_offset_needed && !options.settings_limit_offset_done) { if (settings.limit > 0) { diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 02af07bc00c..efa0e921527 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -13,7 +13,7 @@ #include #include - +#if USE_MYSQL using namespace DB; static inline ASTPtr tryRewrittenCreateQuery(const String & query, ContextPtr context) @@ -255,3 +255,4 @@ TEST(MySQLCreateRewritten, QueryWithEnum) std::string(MATERIALIZEDMYSQL_TABLE_COLUMNS) + ") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)"); } +#endif diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index bc95a940c18..ee708b064bd 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -48,6 +48,7 @@ struct SelectQueryOptions bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select bool with_all_cols = false; /// asterisk include materialized and aliased columns + bool settings_limit_offset_done = false; /// These two fields are used to evaluate shardNum() and shardCount() function when /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local @@ -58,8 +59,10 @@ struct SelectQueryOptions SelectQueryOptions( QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, - bool is_subquery_ = false) - : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_) + bool is_subquery_ = false, + bool settings_limit_offset_done_ = false) + : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_), + settings_limit_offset_done(settings_limit_offset_done_) {} SelectQueryOptions copy() const { return *this; } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 71964130412..f937c73d1a8 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -36,9 +36,9 @@ public: ~Session(); Session(const Session &&) = delete; - Session& operator=(const Session &&) = delete; + Session & operator=(const Session &&) = delete; Session(const Session &) = delete; - Session& operator=(const Session &) = delete; + Session & operator=(const Session &) = delete; /// Provides information about the authentication type of a specified user. AuthenticationType getAuthenticationType(const String & user_name) const; @@ -97,4 +97,3 @@ private: }; } - diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index d6342e3973e..46254d0c3a2 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -168,6 +168,8 @@ public: void shutdown() override { stopFlushThread(); + + auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (table) table->flushAndShutdown(); } @@ -186,7 +188,6 @@ private: /* Saving thread data */ const StorageID table_id; const String storage_def; - StoragePtr table; String create_query; String old_create_query; bool is_prepared = false; @@ -525,7 +526,7 @@ void SystemLog::prepareTable() { String description = table_id.getNameForLogs(); - table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); + auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (table) { diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 2d1b6b3f239..0d7d56058b9 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -363,8 +363,7 @@ void RestoreQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, D if (IdentifierSemantic::getMembership(identifier)) { identifier.restoreTable(); // TODO(ilezhankin): should restore qualified name here - why exactly here? - if (data.rename) - data.changeTable(identifier); + data.changeTable(identifier); } } } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 1ed4da57a93..0f35d052ed2 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -67,7 +67,6 @@ struct RestoreQualifiedNamesMatcher { DatabaseAndTableWithAlias distributed_table; DatabaseAndTableWithAlias remote_table; - bool rename = false; void changeTable(ASTIdentifier & identifier) const; }; diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index c733398a32b..7e1d48d7f55 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -79,6 +79,13 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << ")"; } + if (infile) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM INFILE " << (settings.hilite ? hilite_none : "") << infile->as().value.safeGet(); + if (compression) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " COMPRESSION " << (settings.hilite ? hilite_none : "") << compression->as().value.safeGet(); + } + if (select) { settings.ostr << " "; @@ -91,12 +98,6 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s } else { - if (infile) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM INFILE " << (settings.hilite ? hilite_none : "") << infile->as().value.safeGet(); - if (compression) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " COMPRESSION " << (settings.hilite ? hilite_none : "") << compression->as().value.safeGet(); - } if (!format.empty()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format; diff --git a/src/Parsers/ASTLiteral.h b/src/Parsers/ASTLiteral.h index 856bed61979..f570597074f 100644 --- a/src/Parsers/ASTLiteral.h +++ b/src/Parsers/ASTLiteral.h @@ -15,8 +15,7 @@ namespace DB class ASTLiteral : public ASTWithAlias { public: - explicit ASTLiteral(Field && value_) : value(value_) {} - explicit ASTLiteral(const Field & value_) : value(value_) {} + explicit ASTLiteral(Field value_) : value(std::move(value_)) {} Field value; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 526b3aeb2bd..9c8f8b4e46b 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1639,7 +1639,7 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, if (std::is_same_v && arr.size() == 1) return false; - literal = std::make_shared(arr); + literal = std::make_shared(std::move(arr)); literal->begin = literal_begin; literal->end = ++pos; node = literal; diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index a968e159a5e..568f486a5cf 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -113,10 +113,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - Pos before_values = pos; - String format_str; - - /// VALUES or FROM INFILE or FORMAT or SELECT + /// Check if file is a source of data. if (s_from_infile.ignore(pos, expected)) { /// Read file name to process it later @@ -131,14 +128,14 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!compression_p.parse(pos, compression, expected)) return false; } - - /// Read format name - if (!s_format.ignore(pos, expected) || !name_p.parse(pos, format, expected)) - return false; - - tryGetIdentifierNameInto(format, format_str); } - else if (s_values.ignore(pos, expected)) + + Pos before_values = pos; + String format_str; + + /// VALUES or FORMAT or SELECT or WITH or WATCH. + /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing. + if (!infile && s_values.ignore(pos, expected)) { /// If VALUES is defined in query, everything except setting will be parsed as data data = pos->begin; @@ -166,21 +163,17 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) tryGetIdentifierNameInto(format, format_str); } - else if (s_watch.ignore(pos, expected)) + else if (!infile && s_watch.ignore(pos, expected)) { /// If WATCH is defined, return to position before WATCH and parse /// rest of query as WATCH query. pos = before_values; ParserWatchQuery watch_p; watch_p.parse(pos, watch, expected); - - /// FORMAT section is expected if we have input() in SELECT part - if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected)) - return false; } - else + else if (!infile) { - /// If all previous conditions were false, query is incorrect + /// If all previous conditions were false and it's not FROM INFILE, query is incorrect return false; } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 4af2c651c39..558ba9bdd65 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -30,6 +30,7 @@ ArrowBlockInputFormat::ArrowBlockInputFormat(ReadBuffer & in_, const Block & hea Chunk ArrowBlockInputFormat::generate() { Chunk res; + block_missing_values.clear(); arrow::Result> batch_result; if (stream) @@ -71,6 +72,13 @@ Chunk ArrowBlockInputFormat::generate() arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result); + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + if (format_settings.defaults_for_omitted_fields) + for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) + for (const auto & column_idx : missing_columns) + block_missing_values.setBit(column_idx, row_idx); + return res; } @@ -83,6 +91,12 @@ void ArrowBlockInputFormat::resetParser() else file_reader.reset(); record_batch_current = 0; + block_missing_values.clear(); +} + +const BlockMissingValues & ArrowBlockInputFormat::getMissingValues() const +{ + return block_missing_values; } static std::shared_ptr createStreamReader(ReadBuffer & in) @@ -110,16 +124,23 @@ static std::shared_ptr createFileReader(ReadB void ArrowBlockInputFormat::prepareReader() { + std::shared_ptr schema; if (stream) + { stream_reader = createStreamReader(*in); + schema = stream_reader->schema(); + } else { file_reader = createFileReader(*in, format_settings, is_stopped); if (!file_reader) return; + schema = file_reader->schema(); } - arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Arrow", format_settings.arrow.import_nested); + arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns); + missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); if (stream) record_batch_total = -1; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index 62cbf949fc2..ee1e2d6c5a8 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -25,6 +25,8 @@ public: String getName() const override { return "ArrowBlockInputFormat"; } + const BlockMissingValues & getMissingValues() const override; + private: Chunk generate() override; @@ -45,6 +47,9 @@ private: int record_batch_total = 0; int record_batch_current = 0; + std::vector missing_columns; + BlockMissingValues block_missing_values; + const FormatSettings format_settings; void prepareReader(); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 86d278397c2..484a3a17f8f 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -5,7 +5,6 @@ #include "ArrowBufferedStreams.h" #if USE_ARROW || USE_ORC || USE_PARQUET - #include #include #include diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index aa181ea0b8b..c09181ed74f 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -59,12 +59,12 @@ namespace DB namespace ErrorCodes { + extern const int UNKNOWN_TYPE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; extern const int BAD_ARGUMENTS; extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; - extern const int UNKNOWN_TYPE; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; } @@ -80,9 +80,11 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); + if (chunk->length() == 0) + continue; + /// buffers[0] is a null bitmap and buffers[1] are actual values std::shared_ptr buffer = chunk->data()->buffers[1]; - const auto * raw_data = reinterpret_cast(buffer->data()); column_data.insert_assume_reserved(raw_data, raw_data + chunk->length()); } @@ -146,6 +148,9 @@ static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { arrow::BooleanArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); + if (chunk.length() == 0) + continue; + /// buffers[0] is a null bitmap and buffers[1] are actual values std::shared_ptr buffer = chunk.data()->buffers[1]; @@ -525,8 +530,8 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, } ArrowColumnToCHColumn::ArrowColumnToCHColumn( - const Block & header_, const std::string & format_name_, bool import_nested_) - : header(header_), format_name(format_name_), import_nested(import_nested_) + const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_) + : header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_) { } @@ -547,10 +552,8 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.rows()); - std::unordered_map nested_tables; for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) { @@ -574,10 +577,18 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & read_from_nested = nested_tables[nested_table_name]->has(header_column.name); } - - // TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable? if (!read_from_nested) - throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; + { + if (!allow_missing_columns) + throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; + + ColumnWithTypeAndName column; + column.name = header_column.name; + column.type = header_column.type; + column.column = header_column.column->cloneResized(num_rows); + columns_list.push_back(std::move(column.column)); + continue; + } } std::shared_ptr arrow_column = name_to_column_ptr[header_column.name]; @@ -600,11 +611,39 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } column.type = header_column.type; - num_rows = column.column->size(); columns_list.push_back(std::move(column.column)); } res.setColumns(columns_list, num_rows); } + +std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const +{ + std::vector missing_columns; + auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name); + auto flatten_block_from_arrow = Nested::flatten(block_from_arrow); + for (size_t i = 0, columns = header.columns(); i < columns; ++i) + { + const auto & column = header.getByPosition(i); + bool read_from_nested = false; + String nested_table_name = Nested::extractTableName(column.name); + if (!block_from_arrow.has(column.name)) + { + if (import_nested && block_from_arrow.has(nested_table_name)) + read_from_nested = flatten_block_from_arrow.has(column.name); + + if (!read_from_nested) + { + if (!allow_missing_columns) + throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", column.name}; + + missing_columns.push_back(i); + } + } + } + return missing_columns; } + +} + #endif diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 58f8f1536b5..07e7fb36404 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -21,18 +21,27 @@ class ArrowColumnToCHColumn public: using NameToColumnPtr = std::unordered_map>; - ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_); + ArrowColumnToCHColumn( + const Block & header_, + const std::string & format_name_, + bool import_nested_, + bool allow_missing_columns_); void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table); void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr); + /// Get missing columns that exists in header but not in arrow::Schema + std::vector getMissingColumns(const arrow::Schema & schema) const; + static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name); private: const Block & header; const std::string format_name; bool import_nested; + /// If false, throw exception if some columns in header not exists in arrow table. + bool allow_missing_columns; /// Map {column name : dictionary column}. /// To avoid converting dictionary from Arrow Dictionary diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index fd7b2404c77..fb3389475ac 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -391,20 +391,27 @@ AvroRowOutputFormat::AvroRowOutputFormat( AvroRowOutputFormat::~AvroRowOutputFormat() = default; -void AvroRowOutputFormat::writePrefix() +void AvroRowOutputFormat::createFileWriter() { - // we have to recreate avro::DataFileWriterBase object due to its interface limitations file_writer_ptr = std::make_unique( std::make_unique(out), serializer.getSchema(), settings.avro.output_sync_interval, getCodec(settings.avro.output_codec)); +} + +void AvroRowOutputFormat::writePrefix() +{ + // we have to recreate avro::DataFileWriterBase object due to its interface limitations + createFileWriter(); file_writer_ptr->syncIfNeeded(); } void AvroRowOutputFormat::write(const Columns & columns, size_t row_num) { + if (!file_writer_ptr) + createFileWriter(); file_writer_ptr->syncIfNeeded(); serializer.serializeRow(columns, row_num, file_writer_ptr->encoder()); file_writer_ptr->incr(); diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index b5583406cb8..b07edd88ae1 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -58,6 +58,8 @@ private: virtual void writePrefix() override; virtual void writeSuffix() override; + void createFileWriter(); + FormatSettings settings; AvroSerializer serializer; std::unique_ptr file_writer_ptr; diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index b356967a544..bb202a3e177 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -114,6 +114,7 @@ void registerInputFormatRowBinary(FormatFactory & factory) }; registerWithNamesAndTypes("RowBinary", register_func); + factory.registerFileExtension("bin", "RowBinary"); } void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 735a549d0a6..216ec6b295a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -29,14 +29,20 @@ CSVRowInputFormat::CSVRowInputFormat( bool with_names_, bool with_types_, const FormatSettings & format_settings_) - : RowInputFormatWithNamesAndTypes( - header_, - in_, - params_, - with_names_, - with_types_, - format_settings_, - std::make_unique(in_, format_settings_)) + : CSVRowInputFormat( + header_, in_, params_, with_names_, with_types_, format_settings_, std::make_unique(in_, format_settings_)) +{ +} + +CSVRowInputFormat::CSVRowInputFormat( + const Block & header_, + ReadBuffer & in_, + const Params & params_, + bool with_names_, + bool with_types_, + const FormatSettings & format_settings_, + std::unique_ptr format_reader_) + : RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_, std::move(format_reader_)) { const String bad_delimiters = " \t\"'.UL"; if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index d723647595e..ad9f6c4e492 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -26,6 +26,10 @@ public: String getName() const override { return "CSVRowInputFormat"; } +protected: + explicit CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr format_reader_); + private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 311f4742335..67743a04bf3 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -310,6 +310,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), FormatSchemaInfo(settings, "CapnProto", true), settings); }); + factory.registerFileExtension("capnp", "CapnProto"); } void registerCapnProtoSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp new file mode 100644 index 00000000000..4cbdbc27764 --- /dev/null +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -0,0 +1,60 @@ +#include + +#if USE_HIVE + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + +static FormatSettings updateFormatSettings(const FormatSettings & settings) +{ + FormatSettings updated = settings; + updated.csv.delimiter = updated.hive_text.fields_delimiter; + return updated; +} + +HiveTextRowInputFormat::HiveTextRowInputFormat( + const Block & header_, ReadBuffer & in_, const Params & params_, const FormatSettings & format_settings_) + : HiveTextRowInputFormat(header_, std::make_unique(in_), params_, updateFormatSettings(format_settings_)) +{ +} + +HiveTextRowInputFormat::HiveTextRowInputFormat( + const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_) + : CSVRowInputFormat( + header_, *buf_, params_, true, false, format_settings_, std::make_unique(std::move(buf_), format_settings_)) +{ +} + +HiveTextFormatReader::HiveTextFormatReader(std::unique_ptr buf_, const FormatSettings & format_settings_) + : CSVFormatReader(*buf_, format_settings_), buf(std::move(buf_)), input_field_names(format_settings_.hive_text.input_field_names) +{ +} + +std::vector HiveTextFormatReader::readNames() +{ + PeekableReadBufferCheckpoint checkpoint{*buf, true}; + auto values = readHeaderRow(); + input_field_names.resize(values.size()); + return input_field_names; +} + +std::vector HiveTextFormatReader::readTypes() +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readTypes is not implemented"); +} + +void registerInputFormatHiveText(FormatFactory & factory) +{ + factory.registerInputFormat( + "HiveText", [](ReadBuffer & buf, const Block & sample, const RowInputFormatParams & params, const FormatSettings & settings) + { + return std::make_shared(sample, buf, params, settings); + }); +} +} +#endif diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h new file mode 100644 index 00000000000..3b37078acb3 --- /dev/null +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#if USE_HIVE +#include +#include + + +namespace DB +{ + +/// A stream for input data in Hive Text format. +/// Parallel parsing is disabled currently. +class HiveTextRowInputFormat : public CSVRowInputFormat +{ +public: + HiveTextRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, const FormatSettings & format_settings_); + + String getName() const override { return "HiveTextRowInputFormat"; } + +private: + HiveTextRowInputFormat( + const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_); +}; + +class HiveTextFormatReader : public CSVFormatReader +{ +public: + HiveTextFormatReader(std::unique_ptr buf_, const FormatSettings & format_settings_); + + std::vector readNames() override; + std::vector readTypes() override; + +private: + std::unique_ptr buf; + std::vector input_field_names; +}; + +} + +#endif diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 75beca955b9..82cf44890a9 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -340,6 +340,8 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), settings, false); }); + factory.registerFileExtension("ndjson", "JSONEachRow"); + factory.registerInputFormat("JSONStringsEachRow", []( ReadBuffer & buf, const Block & sample, diff --git a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp index 044740d31dd..cf732d8c0f3 100644 --- a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp @@ -67,6 +67,7 @@ void registerOutputFormatMarkdown(FormatFactory & factory) }); factory.markOutputFormatSupportsParallelFormatting("Markdown"); + factory.registerFileExtension("md", "Markdown"); } } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index c56af536e15..2471a98f83d 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -486,6 +486,7 @@ void registerInputFormatMsgPack(FormatFactory & factory) { return std::make_shared(sample, buf, params); }); + factory.registerFileExtension("messagepack", "MsgPack"); } void registerMsgPackSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 9a787e5a614..61511d634d3 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -26,6 +26,7 @@ ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_, const Chunk ORCBlockInputFormat::generate() { Chunk res; + block_missing_values.clear(); if (!file_reader) prepareReader(); @@ -52,6 +53,12 @@ Chunk ORCBlockInputFormat::generate() return res; arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + if (format_settings.defaults_for_omitted_fields) + for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) + for (const auto & column_idx : missing_columns) + block_missing_values.setBit(column_idx, row_idx); return res; } @@ -62,6 +69,12 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); + block_missing_values.clear(); +} + +const BlockMissingValues & ORCBlockInputFormat::getMissingValues() const +{ + return block_missing_values; } static size_t countIndicesForType(std::shared_ptr type) @@ -116,7 +129,9 @@ void ORCBlockInputFormat::prepareReader() if (is_stopped) return; - arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "ORC", format_settings.orc.import_nested); + arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns); + missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); std::unordered_set nested_table_names; if (format_settings.orc.import_nested) diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 9b55747f552..bb136d02d6e 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -27,6 +27,8 @@ public: void resetParser() override; + const BlockMissingValues & getMissingValues() const override; + protected: Chunk generate() override; @@ -48,6 +50,9 @@ private: // indices of columns to read from ORC file std::vector include_indices; + std::vector missing_columns; + BlockMissingValues block_missing_values; + const FormatSettings format_settings; void prepareReader(); diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 4be91299bdb..213226c9d68 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -137,7 +137,8 @@ void ParallelParsingInputFormat::onBackgroundException(size_t offset) if (e->getLineNumber() != -1) e->setLineNumber(e->getLineNumber() + offset); } - tryLogCurrentException(__PRETTY_FUNCTION__); + if (is_server) + tryLogCurrentException(__PRETTY_FUNCTION__); parsing_finished = true; first_parser_finished.set(); reader_condvar.notify_all(); diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index 264beba8589..5efdaf1b0b7 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -82,6 +82,7 @@ public: String format_name; size_t max_threads; size_t min_chunk_bytes; + bool is_server; }; explicit ParallelParsingInputFormat(Params params) @@ -90,6 +91,7 @@ public: , file_segmentation_engine(params.file_segmentation_engine) , format_name(params.format_name) , min_chunk_bytes(params.min_chunk_bytes) + , is_server(params.is_server) , pool(params.max_threads) { // One unit for each thread, including segmentator and reader, plus a @@ -203,6 +205,8 @@ private: std::atomic parsing_started{false}; std::atomic parsing_finished{false}; + const bool is_server; + /// There are multiple "parsers", that's why we use thread pool. ThreadPool pool; /// Reading and segmentating the file diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 1d303014d31..3f0d9980573 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -40,6 +40,7 @@ ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_ Chunk ParquetBlockInputFormat::generate() { Chunk res; + block_missing_values.clear(); if (!file_reader) prepareReader(); @@ -59,6 +60,13 @@ Chunk ParquetBlockInputFormat::generate() ++row_group_current; arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + if (format_settings.defaults_for_omitted_fields) + for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) + for (const auto & column_idx : missing_columns) + block_missing_values.setBit(column_idx, row_idx); return res; } @@ -69,6 +77,12 @@ void ParquetBlockInputFormat::resetParser() file_reader.reset(); column_indices.clear(); row_group_current = 0; + block_missing_values.clear(); +} + +const BlockMissingValues & ParquetBlockInputFormat::getMissingValues() const +{ + return block_missing_values; } static size_t countIndicesForType(std::shared_ptr type) @@ -118,7 +132,8 @@ void ParquetBlockInputFormat::prepareReader() row_group_total = file_reader->num_row_groups(); row_group_current = 0; - arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested); + arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns); + missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); std::unordered_set nested_table_names; if (format_settings.parquet.import_nested) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index dbc99c08a35..1faadaa3d21 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -24,6 +24,8 @@ public: String getName() const override { return "ParquetBlockInputFormat"; } + const BlockMissingValues & getMissingValues() const override; + private: Chunk generate() override; @@ -40,6 +42,8 @@ private: std::vector column_indices; std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; + std::vector missing_columns; + BlockMissingValues block_missing_values; const FormatSettings format_settings; std::atomic is_stopped{0}; diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 66da27e8829..395d8294aa5 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -95,6 +95,8 @@ void registerProtobufSchemaReader(FormatFactory & factory) { return std::make_shared(settings); }); + factory.registerFileExtension("pb", "Protobuf"); + factory.registerExternalSchemaReader("ProtobufSingle", [](const FormatSettings & settings) { return std::make_shared(settings); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index b58be3f5526..c9337929adc 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -178,12 +178,20 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx) try { bool read = true; - const auto & type = types[column_idx]; - const auto & serialization = serializations[column_idx]; - if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) - read = SerializationNullable::deserializeTextQuotedImpl(column, *buf, format_settings, serialization); + if (bool default_value = checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("DEFAULT", *buf); default_value) + { + column.insertDefault(); + read = false; + } else - serialization->deserializeTextQuoted(column, *buf, format_settings); + { + const auto & type = types[column_idx]; + const auto & serialization = serializations[column_idx]; + if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) + read = SerializationNullable::deserializeTextQuotedImpl(column, *buf, format_settings, serialization); + else + serialization->deserializeTextQuoted(column, *buf, format_settings); + } rollback_on_exception = true; diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 2a515fdf3be..01df264005b 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -27,6 +27,9 @@ public: using AggregatorList = std::list; using AggregatorListPtr = std::shared_ptr; +using AggregatorList = std::list; +using AggregatorListPtr = std::shared_ptr; + struct AggregatingTransformParams { Aggregator::Params params; diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp index 7d0da3dca91..e79dcb34c41 100644 --- a/src/Processors/Transforms/TTLTransform.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -139,8 +139,10 @@ void TTLTransform::finalize() if (delete_algorithm) { - size_t rows_removed = all_data_dropped ? data_part->rows_count : delete_algorithm->getNumberOfRemovedRows(); - LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name); + if (all_data_dropped) + LOG_DEBUG(log, "Removed all rows from part {} due to expired TTL", data_part->name); + else + LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", delete_algorithm->getNumberOfRemovedRows(), data_part->name); } } diff --git a/src/Processors/examples/CMakeLists.txt b/src/Processors/examples/CMakeLists.txt index e69de29bb2d..6f78d611f45 100644 --- a/src/Processors/examples/CMakeLists.txt +++ b/src/Processors/examples/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable (comma_separated_streams comma_separated_streams.cpp) +target_link_libraries (comma_separated_streams PRIVATE dbms) \ No newline at end of file diff --git a/src/Processors/examples/comma_separated_streams.cpp b/src/Processors/examples/comma_separated_streams.cpp new file mode 100644 index 00000000000..a8c925354d9 --- /dev/null +++ b/src/Processors/examples/comma_separated_streams.cpp @@ -0,0 +1,118 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +int main() +try +{ + Block sample; + { + // a + ColumnWithTypeAndName col; + col.name = "a"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + { + // b + ColumnWithTypeAndName col; + col.name = "b"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + { + // c + ColumnWithTypeAndName col; + col.name = "c"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + { + // d + ColumnWithTypeAndName col; + col.name = "d"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + { + // e + ColumnWithTypeAndName col; + col.name = "e"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + { + // f + ColumnWithTypeAndName col; + col.name = "f"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + { + // g + ColumnWithTypeAndName col; + col.name = "g"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + { + // h + ColumnWithTypeAndName col; + col.name = "h"; + col.type = std::make_shared(); + sample.insert(std::move(col)); + } + + + ReadBufferFromFile in_buf("test_in"); + WriteBufferFromFile out_buf("test_out"); + + FormatSettings format_settings; + format_settings.with_names_use_header = true; + format_settings.skip_unknown_fields = true; + format_settings.csv.delimiter = '\x01'; + format_settings.hive_text.input_field_names = + { + "d", + "e", + "f", + "a", + "b", + "c", + "g", + "h", + "i", + "j", + }; + + RowInputFormatParams in_params{DEFAULT_INSERT_BLOCK_SIZE}; + InputFormatPtr input_format = std::make_shared(sample, in_buf, in_params, format_settings); + auto pipeline = QueryPipeline(std::move(input_format)); + auto reader = std::make_unique(pipeline); + + RowOutputFormatParams out_params; + OutputFormatPtr output_format = std::make_shared(out_buf, sample, true, true, out_params, format_settings); + Block res; + while (reader->pull(res)) + { + output_format->write(res); + } + return 0; +} +catch (...) +{ + std::cerr << getCurrentExceptionMessage(true) << '\n'; + return 1; +} diff --git a/src/Processors/examples/test_in b/src/Processors/examples/test_in new file mode 100644 index 00000000000..c7df97a26a6 --- /dev/null +++ b/src/Processors/examples/test_in @@ -0,0 +1,8 @@ +2021-09-14JPall20.0200 +2021-09-14CIall20.0100 +2021-09-14JMall40.25411 +2021-09-14MMall310.19354838709677422766 +2021-09-14TZAndroid30.3333333333333333311 +2021-09-14SGall80.25412 +2021-09-14PYall11.0001 +2021-09-14MXall10.0100 diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 673edfb6719..727781ea4a3 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -10,10 +10,7 @@ #include #include #include -#include #include -#include -#include #include #include #include @@ -27,7 +24,6 @@ #include #include #include -#include #include #include @@ -41,13 +37,11 @@ #include #include #include -#include #include #include #include #include -#include #include @@ -56,7 +50,6 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int CANNOT_PARSE_TEXT; extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 0c5d7d93689..e7da63c6927 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -212,10 +212,14 @@ KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSoc 0, global_context->getConfigRef().getUInt( "keeper_server.coordination_settings.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) - , session_timeout( + , min_session_timeout( 0, global_context->getConfigRef().getUInt( - "keeper_server.coordination_settings.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000) + "keeper_server.coordination_settings.min_session_timeout_ms", Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS) * 1000) + , max_session_timeout( + 0, + global_context->getConfigRef().getUInt( + "keeper_server.coordination_settings.session_timeout_ms", Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS) * 1000) , poll_wrapper(std::make_unique(socket_)) , responses(std::make_unique(std::numeric_limits::max())) , last_op(std::make_unique(EMPTY_LAST_OP)) @@ -227,9 +231,16 @@ void KeeperTCPHandler::sendHandshake(bool has_leader) { Coordination::write(Coordination::SERVER_HANDSHAKE_LENGTH, *out); if (has_leader) + { Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out); - else /// Specially ignore connections if we are not leader, client will throw exception - Coordination::write(42, *out); + } + else + { + /// Ignore connections if we are not leader, client will throw exception + /// and reconnect to another replica faster. ClickHouse client provide + /// clear message for such protocol version. + Coordination::write(Coordination::KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT, *out); + } Coordination::write(static_cast(session_timeout.totalMilliseconds()), *out); Coordination::write(session_id, *out); @@ -320,8 +331,10 @@ void KeeperTCPHandler::runImpl() int32_t handshake_length = header; auto client_timeout = receiveHandshake(handshake_length); - if (client_timeout != 0) - session_timeout = std::min(client_timeout, session_timeout); + if (client_timeout == 0) + client_timeout = Coordination::DEFAULT_SESSION_TIMEOUT_MS; + session_timeout = std::max(client_timeout, min_session_timeout); + session_timeout = std::min(session_timeout, max_session_timeout); } catch (const Exception & e) /// Typical for an incorrect username, password, or address. { diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index f98b269b8be..7953dfd2cbe 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -63,6 +63,8 @@ private: ContextPtr global_context; std::shared_ptr keeper_dispatcher; Poco::Timespan operation_timeout; + Poco::Timespan min_session_timeout; + Poco::Timespan max_session_timeout; Poco::Timespan session_timeout; int64_t session_id{-1}; Stopwatch session_stopwatch; diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp new file mode 100644 index 00000000000..711dfeebcae --- /dev/null +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -0,0 +1,223 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ +extern const Event ExternalDataSourceLocalCacheReadBytes; +} +namespace DB +{ +namespace fs = std::filesystem; +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +LocalFileHolder::LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller) : file_cache_controller(std::move(cache_controller)) +{ + file_buffer = file_cache_controller->value().allocFile(); + if (!file_buffer) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Create file readbuffer failed. {}", file_cache_controller->value().getLocalPath().string()); +} + +RemoteReadBuffer::RemoteReadBuffer(size_t buff_size) : BufferWithOwnMemory(buff_size) +{ +} + +std::unique_ptr RemoteReadBuffer::create( + ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr read_buffer, size_t buff_size) +{ + auto remote_path = remote_file_metadata->remote_path; + auto remote_read_buffer = std::make_unique(buff_size); + + std::tie(remote_read_buffer->local_file_holder, read_buffer) + = ExternalDataSourceCache::instance().createReader(context, remote_file_metadata, read_buffer); + if (remote_read_buffer->local_file_holder == nullptr) + return read_buffer; + remote_read_buffer->remote_file_size = remote_file_metadata->file_size; + return remote_read_buffer; +} + +bool RemoteReadBuffer::nextImpl() +{ + auto start_offset = local_file_holder->file_buffer->getPosition(); + auto end_offset = start_offset + local_file_holder->file_buffer->internalBuffer().size(); + local_file_holder->file_cache_controller->value().waitMoreData(start_offset, end_offset); + + auto status = local_file_holder->file_buffer->next(); + if (status) + { + BufferBase::set( + local_file_holder->file_buffer->buffer().begin(), + local_file_holder->file_buffer->buffer().size(), + local_file_holder->file_buffer->offset()); + ProfileEvents::increment(ProfileEvents::ExternalDataSourceLocalCacheReadBytes, local_file_holder->file_buffer->available()); + } + return status; +} + +off_t RemoteReadBuffer::seek(off_t offset, int whence) +{ + if (!local_file_holder->file_buffer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot call seek() in this buffer. It's a bug!"); + /* + * Need to wait here. For example, the current file has been download at position X, but here we try to seek to + * position Y (Y > X), it would fail. + */ + auto & file_buffer = local_file_holder->file_buffer; + local_file_holder->file_cache_controller->value().waitMoreData(offset, offset + file_buffer->internalBuffer().size()); + auto ret = file_buffer->seek(offset, whence); + BufferBase::set(file_buffer->buffer().begin(), file_buffer->buffer().size(), file_buffer->offset()); + return ret; +} + +off_t RemoteReadBuffer::getPosition() +{ + return local_file_holder->file_buffer->getPosition(); +} + +ExternalDataSourceCache::ExternalDataSourceCache() = default; + +ExternalDataSourceCache::~ExternalDataSourceCache() +{ + recover_task_holder->deactivate(); +} + +ExternalDataSourceCache & ExternalDataSourceCache::instance() +{ + static ExternalDataSourceCache instance; + return instance; +} + +void ExternalDataSourceCache::recoverTask() +{ + std::vector invalid_paths; + for (auto const & group_dir : fs::directory_iterator{root_dir}) + { + for (auto const & cache_dir : fs::directory_iterator{group_dir.path()}) + { + String path = cache_dir.path(); + auto cache_controller = RemoteCacheController::recover(path); + if (!cache_controller) + { + invalid_paths.emplace_back(path); + continue; + } + auto cache_load_func = [&] { return cache_controller; }; + if (!lru_caches->getOrSet(path, cache_load_func)) + { + invalid_paths.emplace_back(path); + } + } + } + for (auto & path : invalid_paths) + fs::remove_all(path); + initialized = true; + LOG_INFO(log, "Recovered from directory:{}", root_dir); +} + +void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_) +{ + std::lock_guard lock(mutex); + if (isInitialized()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot initialize ExternalDataSourceCache twice"); + } + LOG_INFO( + log, "Initializing local cache for remote data sources. Local cache root path: {}, cache size limit: {}", root_dir_, limit_size_); + root_dir = root_dir_; + local_cache_bytes_read_before_flush = bytes_read_before_flush_; + lru_caches = std::make_unique(limit_size_); + + /// create if root_dir not exists + if (!fs::exists(fs::path(root_dir))) + { + fs::create_directories(fs::path(root_dir)); + } + + recover_task_holder = context->getSchedulePool().createTask("recover local cache metadata for remote files", [this] { recoverTask(); }); + recover_task_holder->activateAndSchedule(); +} + +String ExternalDataSourceCache::calculateLocalPath(IRemoteFileMetadataPtr metadata) const +{ + // add version into the full_path, and not block to read the new version + String full_path = metadata->getName() + ":" + metadata->remote_path + ":" + metadata->getVersion(); + UInt128 hashcode = sipHash128(full_path.c_str(), full_path.size()); + String hashcode_str = getHexUIntLowercase(hashcode); + return fs::path(root_dir) / hashcode_str.substr(0, 3) / hashcode_str; +} + +std::pair, std::unique_ptr> ExternalDataSourceCache::createReader( + ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer) +{ + // If something is wrong on startup, rollback to read from the original ReadBuffer + if (!isInitialized()) + { + LOG_ERROR(log, "ExternalDataSourceCache has not been initialized"); + return {nullptr, std::move(read_buffer)}; + } + + auto remote_path = remote_file_metadata->remote_path; + const auto & last_modification_timestamp = remote_file_metadata->last_modification_timestamp; + auto local_path = calculateLocalPath(remote_file_metadata); + std::lock_guard lock(mutex); + auto cache = lru_caches->get(local_path); + if (cache) + { + // the remote file has been updated, need to redownload + if (!cache->value().isValid() || cache->value().isModified(remote_file_metadata)) + { + LOG_TRACE( + log, + "Remote file ({}) has been updated. Last saved modification time: {}, actual last modification time: {}", + remote_path, + std::to_string(cache->value().getLastModificationTimestamp()), + std::to_string(last_modification_timestamp)); + cache->value().markInvalid(); + cache.reset(); + } + else + { + return {std::make_unique(std::move(cache)), nullptr}; + } + } + + if (!fs::exists(local_path)) + fs::create_directories(local_path); + + // cache is not found or is invalid, try to remove it at first + lru_caches->tryRemove(local_path); + + auto new_cache_controller + = std::make_shared(remote_file_metadata, local_path, local_cache_bytes_read_before_flush); + auto new_cache = lru_caches->getOrSet(local_path, [&] { return new_cache_controller; }); + if (!new_cache) + { + LOG_ERROR( + log, + "Insert the new cache failed. new file size:{}, current total size:{}", + remote_file_metadata->file_size, + lru_caches->weight()); + return {nullptr, std::move(read_buffer)}; + } + new_cache->value().startBackgroundDownload(std::move(read_buffer), context->getSchedulePool()); + return {std::make_unique(std::move(new_cache)), nullptr}; +} + +} diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h new file mode 100644 index 00000000000..3e2bbb05104 --- /dev/null +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -0,0 +1,97 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +using RemoteFileCacheType = LRUResourceCache; + +class LocalFileHolder +{ +public: + explicit LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller); + ~LocalFileHolder() = default; + + RemoteFileCacheType::MappedHolderPtr file_cache_controller; + std::unique_ptr file_buffer; +}; + +class RemoteReadBuffer : public BufferWithOwnMemory +{ +public: + explicit RemoteReadBuffer(size_t buff_size); + ~RemoteReadBuffer() override = default; + static std::unique_ptr create(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr read_buffer, size_t buff_size); + + bool nextImpl() override; + off_t seek(off_t off, int whence) override; + off_t getPosition() override; + std::optional getTotalSize() override { return remote_file_size; } + +private: + std::unique_ptr local_file_holder; + size_t remote_file_size = 0; +}; + + +class ExternalDataSourceCache : private boost::noncopyable +{ +public: + ~ExternalDataSourceCache(); + // global instance + static ExternalDataSourceCache & instance(); + + void initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_); + + inline bool isInitialized() const { return initialized; } + + std::pair, std::unique_ptr> + createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer); + + void updateTotalSize(size_t size) { total_size += size; } + +protected: + ExternalDataSourceCache(); + +private: + // root directory of local cache for remote filesystem + String root_dir; + size_t local_cache_bytes_read_before_flush = 0; + + std::atomic initialized = false; + std::atomic total_size; + std::mutex mutex; + std::unique_ptr lru_caches; + + Poco::Logger * log = &Poco::Logger::get("ExternalDataSourceCache"); + + String calculateLocalPath(IRemoteFileMetadataPtr meta) const; + + BackgroundSchedulePool::TaskHolder recover_task_holder; + void recoverTask(); +}; +} diff --git a/src/Storages/Cache/IRemoteFileMetadata.h b/src/Storages/Cache/IRemoteFileMetadata.h new file mode 100644 index 00000000000..0b07103b786 --- /dev/null +++ b/src/Storages/Cache/IRemoteFileMetadata.h @@ -0,0 +1,30 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace DB +{ +class IRemoteFileMetadata +{ +public: + virtual ~IRemoteFileMetadata() = default; + virtual String getName() const = 0; //class name + + // deserialize + virtual bool fromString(const String & buf) = 0; + // serialize + virtual String toString() const = 0; + + // used for comparing two file metadatas are the same or not. + virtual String getVersion() const = 0; + + String remote_path; + size_t file_size = 0; + UInt64 last_modification_timestamp = 0; +}; + +using IRemoteFileMetadataPtr = std::shared_ptr; +} diff --git a/src/Storages/Cache/RemoteCacheController.cpp b/src/Storages/Cache/RemoteCacheController.cpp new file mode 100644 index 00000000000..b0bb31c09e7 --- /dev/null +++ b/src/Storages/Cache/RemoteCacheController.cpp @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace fs = std::filesystem; +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +std::shared_ptr RemoteCacheController::recover(const std::filesystem::path & local_path_) +{ + auto * log = &Poco::Logger::get("RemoteCacheController"); + + if (!std::filesystem::exists(local_path_ / "data.bin")) + { + LOG_TRACE(log, "Invalid cached directory: {}", local_path_.string()); + return nullptr; + } + + auto cache_controller = std::make_shared(nullptr, local_path_, 0); + if (cache_controller->file_status != DOWNLOADED) + { + // do not load this invalid cached file and clear it. the clear action is in + // ExternalDataSourceCache::recoverTask(), because deleting directories during iteration will + // cause unexpected behaviors + LOG_INFO(log, "Recover cached file failed. local path:{}", local_path_.string()); + return nullptr; + } + try + { + cache_controller->file_metadata_ptr = RemoteFileMetadataFactory::instance().get(cache_controller->metadata_class); + } + catch (const Exception & e) + { + LOG_ERROR(log, "Get metadata class failed for {}. {}", cache_controller->metadata_class, e.message()); + cache_controller->file_metadata_ptr = nullptr; + } + if (!cache_controller->file_metadata_ptr) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid metadata class:{}", cache_controller->metadata_class); + } + ReadBufferFromFile file_readbuffer((local_path_ / "metadata.txt").string()); + std::string metadata_content; + readStringUntilEOF(metadata_content, file_readbuffer); + if (!cache_controller->file_metadata_ptr->fromString(metadata_content)) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Invalid metadata file({}) for meta class {}", + local_path_.string(), + cache_controller->metadata_class); + } + + cache_controller->current_offset = fs::file_size(local_path_ / "data.bin"); + + ExternalDataSourceCache::instance().updateTotalSize(cache_controller->file_metadata_ptr->file_size); + return cache_controller; +} + +RemoteCacheController::RemoteCacheController( + IRemoteFileMetadataPtr file_metadata_, const std::filesystem::path & local_path_, size_t cache_bytes_before_flush_) + : file_metadata_ptr(file_metadata_) + , local_path(local_path_) + , valid(true) + , local_cache_bytes_read_before_flush(cache_bytes_before_flush_) + , current_offset(0) +{ + // on recover, file_metadata_ptr is null, but it will be allocated after loading from metadata.txt + // when we allocate a whole new file cache,file_metadata_ptr must not be null. + if (file_metadata_ptr) + { + metadata_class = file_metadata_ptr->getName(); + auto metadata_file_writer = std::make_unique((local_path_ / "metadata.txt").string()); + auto str_buf = file_metadata_ptr->toString(); + metadata_file_writer->write(str_buf.c_str(), str_buf.size()); + metadata_file_writer->close(); + } + else + { + auto info_path = local_path_ / "info.txt"; + if (fs::exists(info_path)) + { + std::ifstream info_file(info_path); + Poco::JSON::Parser info_parser; + auto info_json = info_parser.parse(info_file).extract(); + file_status = static_cast(info_json->get("file_status").convert()); + metadata_class = info_json->get("metadata_class").convert(); + info_file.close(); + } + } +} + +void RemoteCacheController::waitMoreData(size_t start_offset_, size_t end_offset_) +{ + std::unique_lock lock{mutex}; + if (file_status == DOWNLOADED) + { + // finish reading + if (start_offset_ >= current_offset) + { + lock.unlock(); + return; + } + } + else // block until more data is ready + { + if (current_offset >= end_offset_) + { + lock.unlock(); + return; + } + else + more_data_signal.wait(lock, [this, end_offset_] { return file_status == DOWNLOADED || current_offset >= end_offset_; }); + } + lock.unlock(); +} + +bool RemoteCacheController::isModified(IRemoteFileMetadataPtr file_metadata_) +{ + return file_metadata_ptr->getVersion() != file_metadata_->getVersion(); +} + +void RemoteCacheController::startBackgroundDownload(std::unique_ptr in_readbuffer_, BackgroundSchedulePool & thread_pool) +{ + data_file_writer = std::make_unique((fs::path(local_path) / "data.bin").string()); + flush(true); + ReadBufferPtr in_readbuffer(in_readbuffer_.release()); + download_task_holder = thread_pool.createTask("download remote file", [this, in_readbuffer] { backgroundDownload(in_readbuffer); }); + download_task_holder->activateAndSchedule(); +} + +void RemoteCacheController::backgroundDownload(ReadBufferPtr remote_read_buffer) +{ + file_status = DOWNLOADING; + size_t before_unflush_bytes = 0; + size_t total_bytes = 0; + while (!remote_read_buffer->eof()) + { + size_t bytes = remote_read_buffer->available(); + + data_file_writer->write(remote_read_buffer->position(), bytes); + remote_read_buffer->position() += bytes; + total_bytes += bytes; + before_unflush_bytes += bytes; + if (before_unflush_bytes >= local_cache_bytes_read_before_flush) + { + std::unique_lock lock(mutex); + current_offset += total_bytes; + total_bytes = 0; + flush(); + lock.unlock(); + more_data_signal.notify_all(); + before_unflush_bytes = 0; + } + } + std::unique_lock lock(mutex); + current_offset += total_bytes; + file_status = DOWNLOADED; + flush(true); + data_file_writer.reset(); + lock.unlock(); + more_data_signal.notify_all(); + ExternalDataSourceCache::instance().updateTotalSize(file_metadata_ptr->file_size); + LOG_TRACE(log, "Finish download into local path: {}, file metadata:{} ", local_path.string(), file_metadata_ptr->toString()); +} + +void RemoteCacheController::flush(bool need_flush_status) +{ + if (data_file_writer) + { + data_file_writer->sync(); + } + if (need_flush_status) + { + auto file_writer = std::make_unique(local_path / "info.txt"); + Poco::JSON::Object jobj; + jobj.set("file_status", static_cast(file_status)); + jobj.set("metadata_class", metadata_class); + std::stringstream buf; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + jobj.stringify(buf); + file_writer->write(buf.str().c_str(), buf.str().size()); + file_writer->close(); + } +} + +RemoteCacheController::~RemoteCacheController() +{ + if (download_task_holder) + download_task_holder->deactivate(); +} + +void RemoteCacheController::close() +{ + // delete directory + LOG_TRACE(log, "Removing the local cache. local path: {}", local_path.string()); + if (fs::exists(local_path)) + fs::remove_all(local_path); +} + +std::unique_ptr RemoteCacheController::allocFile() +{ + ReadSettings settings; + //settings.local_fs_method = LocalFSReadMethod::read; + auto file_buffer = createReadBufferFromFileBase((local_path / "data.bin").string(), settings); + + return file_buffer; +} + +} diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h new file mode 100644 index 00000000000..6047dbd5eb4 --- /dev/null +++ b/src/Storages/Cache/RemoteCacheController.h @@ -0,0 +1,97 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class RemoteCacheController +{ +public: + enum LocalFileStatus + { + TO_DOWNLOAD = 0, + DOWNLOADING = 1, + DOWNLOADED = 2, + }; + + RemoteCacheController( + IRemoteFileMetadataPtr file_metadata_, const std::filesystem::path & local_path_, size_t cache_bytes_before_flush_); + ~RemoteCacheController(); + + // recover from local disk + static std::shared_ptr recover(const std::filesystem::path & local_path); + + /** + * Called by LocalCachedFileReader, must be used in pair + * The second value of the return tuple is the local_path to store file. + */ + std::unique_ptr allocFile(); + void close(); + + /** + * called in LocalCachedFileReader read(), the reading process would be blocked until + * enough data be downloaded. + * If the file has finished download, the process would unblocked + */ + void waitMoreData(size_t start_offset_, size_t end_offset_); + + inline size_t size() const { return current_offset; } + + inline const std::filesystem::path & getLocalPath() { return local_path; } + inline String getRemotePath() const { return file_metadata_ptr->remote_path; } + + inline UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; } + bool isModified(IRemoteFileMetadataPtr file_metadata_); + inline void markInvalid() + { + std::lock_guard lock(mutex); + valid = false; + } + inline bool isValid() + { + std::lock_guard lock(mutex); + return valid; + } + IRemoteFileMetadataPtr getFileMetadata() { return file_metadata_ptr; } + inline size_t getFileSize() const { return file_metadata_ptr->file_size; } + + void startBackgroundDownload(std::unique_ptr in_readbuffer_, BackgroundSchedulePool & thread_pool); + +private: + // flush file and status information + void flush(bool need_flush_status = false); + + BackgroundSchedulePool::TaskHolder download_task_holder; + void backgroundDownload(ReadBufferPtr remote_read_buffer); + + std::mutex mutex; + std::condition_variable more_data_signal; + + String metadata_class; + LocalFileStatus file_status = TO_DOWNLOAD; // for tracking download process + IRemoteFileMetadataPtr file_metadata_ptr; + std::filesystem::path local_path; + + bool valid = true; + size_t local_cache_bytes_read_before_flush; + size_t current_offset; + + //std::shared_ptr remote_read_buffer; + std::unique_ptr data_file_writer; + + Poco::Logger * log = &Poco::Logger::get("RemoteCacheController"); +}; +using RemoteCacheControllerPtr = std::shared_ptr; + +} diff --git a/src/Storages/Cache/RemoteFileCachePolicy.h b/src/Storages/Cache/RemoteFileCachePolicy.h new file mode 100644 index 00000000000..7d742d6ea14 --- /dev/null +++ b/src/Storages/Cache/RemoteFileCachePolicy.h @@ -0,0 +1,17 @@ +#pragma once +namespace DB +{ +struct RemoteFileCacheWeightFunction +{ + size_t operator()(const RemoteCacheController & cache) const { return cache.getFileSize(); } +}; + +struct RemoteFileCacheReleaseFunction +{ + void operator()(std::shared_ptr controller) + { + if (controller) + controller->close(); + } +}; +} diff --git a/src/Storages/Cache/RemoteFileMetadataFactory.cpp b/src/Storages/Cache/RemoteFileMetadataFactory.cpp new file mode 100644 index 00000000000..a123d67088b --- /dev/null +++ b/src/Storages/Cache/RemoteFileMetadataFactory.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; +} + +RemoteFileMetadataFactory & RemoteFileMetadataFactory::instance() +{ + static RemoteFileMetadataFactory g_factory; + return g_factory; +} + +IRemoteFileMetadataPtr RemoteFileMetadataFactory::get(const String & name) +{ + auto it = remote_file_metadatas.find(name); + if (it == remote_file_metadatas.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found metadata class:{}", name); + return (it->second)(); +} + +void RemoteFileMetadataFactory::registerRemoteFileMatadata(const String & name, MetadataCreator creator) +{ + auto it = remote_file_metadatas.find(name); + if (it != remote_file_metadatas.end()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Metadata class ({}) has already been registered.", name); + } + remote_file_metadatas[name] = creator; +} +} diff --git a/src/Storages/Cache/RemoteFileMetadataFactory.h b/src/Storages/Cache/RemoteFileMetadataFactory.h new file mode 100644 index 00000000000..2cd46ca0682 --- /dev/null +++ b/src/Storages/Cache/RemoteFileMetadataFactory.h @@ -0,0 +1,29 @@ +#pragma once +#include +#include +#include +#include +namespace DB +{ + +class RemoteFileMetadataFactory : private boost::noncopyable +{ +public: + using MetadataCreator = std::function; + + ~RemoteFileMetadataFactory() = default; + + static RemoteFileMetadataFactory & instance(); + + IRemoteFileMetadataPtr get(const String & name); + + void registerRemoteFileMatadata(const String &name, MetadataCreator creator); + +protected: + RemoteFileMetadataFactory() = default; + +private: + std::unordered_map remote_file_metadatas; +}; + +} diff --git a/src/Storages/Cache/registerRemoteFileMetadatas.cpp b/src/Storages/Cache/registerRemoteFileMetadatas.cpp new file mode 100644 index 00000000000..39705b810b7 --- /dev/null +++ b/src/Storages/Cache/registerRemoteFileMetadatas.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + +namespace DB +{ + +#if USE_HIVE +void registerStorageHiveMetadata(RemoteFileMetadataFactory & factory); +#endif + +void registerRemoteFileMetadatas() +{ + [[maybe_unused]] auto & factory = RemoteFileMetadataFactory::instance(); + +#if USE_HIVE + registerStorageHiveMetadata(factory); +#endif +} + +} diff --git a/src/Storages/Cache/registerRemoteFileMetadatas.h b/src/Storages/Cache/registerRemoteFileMetadatas.h new file mode 100644 index 00000000000..a7818b663c9 --- /dev/null +++ b/src/Storages/Cache/registerRemoteFileMetadatas.h @@ -0,0 +1,6 @@ +#pragma once + +namespace DB +{ +void registerRemoteFileMetadatas(); +} diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index 926ad64b515..1e08b088b1d 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -98,7 +98,7 @@ getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfigura struct URLBasedDataSourceConfiguration { String url; - String format; + String format = "auto"; String compression_method = "auto"; String structure = "auto"; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 8aceed05b72..c99d100d6d4 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -199,6 +199,21 @@ HDFSFSPtr createHDFSFS(hdfsBuilder * builder) return fs; } +String getNameNodeUrl(const String & hdfs_url) +{ + const size_t pos = hdfs_url.find('/', hdfs_url.find("//") + 2); + String namenode_url = hdfs_url.substr(0, pos) + "/"; + return namenode_url; +} + +String getNameNodeCluster(const String &hdfs_url) +{ + auto pos1 = hdfs_url.find("//") + 2; + auto pos2 = hdfs_url.find('/', pos1); + + return hdfs_url.substr(pos1, pos2 - pos1); +} + void checkHDFSURL(const String & url) { if (!re2::RE2::FullMatch(url, HDFS_URL_REGEXP)) diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/HDFS/HDFSCommon.h index 82127beb520..31a8df74652 100644 --- a/src/Storages/HDFS/HDFSCommon.h +++ b/src/Storages/HDFS/HDFSCommon.h @@ -98,6 +98,10 @@ using HDFSFSPtr = std::unique_ptr, detail::HDFSFsD HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::AbstractConfiguration &); HDFSFSPtr createHDFSFS(hdfsBuilder * builder); + +String getNameNodeUrl(const String & hdfs_url); +String getNameNodeCluster(const String & hdfs_url); + /// Check that url satisfy structure 'hdfs://:/' /// and throw exception if it doesn't; void checkHDFSURL(const String & url); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 429599a5d3d..82a7df665ee 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -547,17 +547,23 @@ void registerStorageHDFS(StorageFactory & factory) { ASTs & engine_args = args.engine_args; - if (engine_args.size() != 2 && engine_args.size() != 3) + if (engine_args.empty() || engine_args.size() > 3) throw Exception( - "Storage HDFS requires 2 or 3 arguments: url, name of used format and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + "Storage HDFS requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); String url = engine_args[0]->as().value.safeGet(); - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); + String format_name = "auto"; + if (engine_args.size() > 1) + { + engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); + format_name = engine_args[1]->as().value.safeGet(); + } - String format_name = engine_args[1]->as().value.safeGet(); + if (format_name == "auto") + format_name = FormatFactory::instance().getFormatFromFileName(url, true); String compression_method; if (engine_args.size() == 3) diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp new file mode 100644 index 00000000000..aa19ff042e2 --- /dev/null +++ b/src/Storages/Hive/HiveCommon.cpp @@ -0,0 +1,244 @@ +#include + +#if USE_HIVE + +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NO_HIVEMETASTORE; + extern const int BAD_ARGUMENTS; +} + +bool HiveMetastoreClient::shouldUpdateTableMetadata( + const String & db_name, const String & table_name, const std::vector & partitions) +{ + String cache_key = getCacheKey(db_name, table_name); + HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key); + if (!metadata) + return true; + + auto old_partiton_infos = metadata->getPartitionInfos(); + if (old_partiton_infos.size() != partitions.size()) + return true; + + for (const auto & partition : partitions) + { + auto it = old_partiton_infos.find(partition.sd.location); + if (it == old_partiton_infos.end()) + return true; + + const auto & old_partition_info = it->second; + if (!old_partition_info.haveSameParameters(partition)) + return true; + } + return false; +} + +HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata(const String & db_name, const String & table_name) +{ + LOG_TRACE(log, "Get table metadata for {}.{}", db_name, table_name); + std::lock_guard lock{mutex}; + + auto table = std::make_shared(); + std::vector partitions; + try + { + client->get_table(*table, db_name, table_name); + + /// Query the latest partition info to check new change. + client->get_partitions(partitions, db_name, table_name, -1); + } + catch (apache::thrift::transport::TTransportException & e) + { + setExpired(); + throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", String(e.what())); + } + + bool update_cache = shouldUpdateTableMetadata(db_name, table_name, partitions); + String cache_key = getCacheKey(db_name, table_name); + + HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key); + + if (update_cache) + { + LOG_INFO(log, "Reload hive partition metadata info for {}.{}", db_name, table_name); + + /// Generate partition infos from partitions and old partition infos(if exists). + std::map new_partition_infos; + if (metadata) + { + auto & old_partiton_infos = metadata->getPartitionInfos(); + for (const auto & partition : partitions) + { + auto it = old_partiton_infos.find(partition.sd.location); + if (it == old_partiton_infos.end() || !it->second.haveSameParameters(partition) || !it->second.initialized) + { + new_partition_infos.emplace(partition.sd.location, PartitionInfo(partition)); + continue; + } + else + { + PartitionInfo new_partition_info(partition); + new_partition_info.files = std::move(it->second.files); + new_partition_info.initialized = true; + } + } + } + else + { + for (const auto & partition : partitions) + new_partition_infos.emplace(partition.sd.location, PartitionInfo(partition)); + } + + metadata = std::make_shared( + db_name, table_name, table, std::move(new_partition_infos), getContext()); + table_metadata_cache.set(cache_key, metadata); + } + return metadata; +} + +void HiveMetastoreClient::clearTableMetadata(const String & db_name, const String & table_name) +{ + String cache_key = getCacheKey(db_name, table_name); + + std::lock_guard lock{mutex}; + HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key); + if (metadata) + table_metadata_cache.remove(cache_key); +} + +void HiveMetastoreClient::setClient(std::shared_ptr client_) +{ + std::lock_guard lock{mutex}; + client = client_; + clearExpired(); +} + +bool HiveMetastoreClient::PartitionInfo::haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const +{ + /// Parameters include keys:numRows,numFiles,rawDataSize,totalSize,transient_lastDdlTime + auto it1 = partition.parameters.cbegin(); + auto it2 = other.parameters.cbegin(); + for (; it1 != partition.parameters.cend() && it2 != other.parameters.cend(); ++it1, ++it2) + { + if (it1->first != it2->first || it1->second != it2->second) + return false; + } + return (it1 == partition.parameters.cend() && it2 == other.parameters.cend()); +} + +std::vector HiveMetastoreClient::HiveTableMetadata::getPartitions() const +{ + std::vector result; + + std::lock_guard lock{mutex}; + for (const auto & partition_info : partition_infos) + result.emplace_back(partition_info.second.partition); + return result; +} + +std::vector HiveMetastoreClient::HiveTableMetadata::getFilesByLocation(const HDFSFSPtr & fs, const String & location) +{ + LOG_TRACE(log, "List directory {}", location); + std::map::iterator it; + if (!empty_partition_keys) + { + std::lock_guard lock{mutex}; + it = partition_infos.find(location); + if (it == partition_infos.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid location {}", location); + + if (it->second.initialized) + { + LOG_TRACE(log, "Get {} files under directory {}", it->second.files.size(), location); + return it->second.files; + } + } + + Poco::URI location_uri(location); + HDFSFileInfo ls; + ls.file_info = hdfsListDirectory(fs.get(), location_uri.getPath().c_str(), &ls.length); + std::vector result; + for (int i = 0; i < ls.length; ++i) + { + auto & file_info = ls.file_info[i]; + if (file_info.mKind != 'D' && file_info.mSize > 0) + result.emplace_back(String(file_info.mName), file_info.mLastMod, file_info.mSize); + } + + if (!empty_partition_keys) + { + std::lock_guard lock{mutex}; + it = partition_infos.find(location); + if (it == partition_infos.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid location {}", location); + + it->second.files = result; + it->second.initialized = true; + } + LOG_TRACE(log, "Get {} files under directory {}", result.size(), location); + return result; +} + +HiveMetastoreClientFactory & HiveMetastoreClientFactory::instance() +{ + static HiveMetastoreClientFactory factory; + return factory; +} + +HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & name, ContextPtr context) +{ + using namespace apache::thrift; + using namespace apache::thrift::protocol; + using namespace apache::thrift::transport; + using namespace Apache::Hadoop::Hive; + + std::lock_guard lock(mutex); + auto it = clients.find(name); + if (it == clients.end() || it->second->isExpired()) + { + /// Connect to hive metastore + Poco::URI hive_metastore_url(name); + const auto & host = hive_metastore_url.getHost(); + auto port = hive_metastore_url.getPort(); + + std::shared_ptr socket = std::make_shared(host, port); + socket->setKeepAlive(true); + socket->setConnTimeout(conn_timeout_ms); + socket->setRecvTimeout(recv_timeout_ms); + socket->setSendTimeout(send_timeout_ms); + std::shared_ptr transport(new TBufferedTransport(socket)); + std::shared_ptr protocol(new TBinaryProtocol(transport)); + std::shared_ptr thrift_client = std::make_shared(protocol); + try + { + transport->open(); + } + catch (TException & tx) + { + throw Exception("connect to hive metastore:" + name + " failed." + tx.what(), ErrorCodes::BAD_ARGUMENTS); + } + + if (it == clients.end()) + { + HiveMetastoreClientPtr client = std::make_shared(std::move(thrift_client), context); + clients[name] = client; + return client; + } + else + { + it->second->setClient(std::move(thrift_client)); + return it->second; + } + } + return it->second; +} + +} +#endif diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h new file mode 100644 index 00000000000..e88e67b0257 --- /dev/null +++ b/src/Storages/Hive/HiveCommon.h @@ -0,0 +1,143 @@ +#pragma once + +#include + +#if USE_HIVE + +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +class HiveMetastoreClient : public WithContext +{ +public: + + struct FileInfo + { + String path; + UInt64 last_modify_time; /// In ms + size_t size; + + FileInfo() = default; + FileInfo(const String & path_, UInt64 last_modify_time_, size_t size_) + : path(path_), last_modify_time(last_modify_time_), size(size_) + { + } + }; + + struct PartitionInfo + { + Apache::Hadoop::Hive::Partition partition; + std::vector files; + bool initialized = false; /// If true, files are initialized. + + explicit PartitionInfo(const Apache::Hadoop::Hive::Partition & partition_): partition(partition_) {} + bool haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const; + }; + + + /// Used for speeding up metadata query process. + struct HiveTableMetadata : public WithContext + { + public: + HiveTableMetadata( + const String & db_name_, + const String & table_name_, + std::shared_ptr table_, + const std::map & partition_infos_, + ContextPtr context_) + : WithContext(context_) + , db_name(db_name_) + , table_name(table_name_) + , table(table_) + , partition_infos(partition_infos_) + , empty_partition_keys(table->partitionKeys.empty()) + { + } + + + std::map & getPartitionInfos() + { + std::lock_guard lock{mutex}; + return partition_infos; + } + + std::shared_ptr getTable() const + { + std::lock_guard lock{mutex}; + return table; + } + + std::vector getPartitions() const; + + std::vector getFilesByLocation(const HDFSFSPtr & fs, const String & location); + + private: + String db_name; + String table_name; + + mutable std::mutex mutex; + std::shared_ptr table; + std::map partition_infos; + const bool empty_partition_keys; + + Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); + }; + + using HiveTableMetadataPtr = std::shared_ptr; + + explicit HiveMetastoreClient(std::shared_ptr client_, ContextPtr context_) + : WithContext(context_), client(client_), table_metadata_cache(1000) + { + } + + HiveTableMetadataPtr getTableMetadata(const String & db_name, const String & table_name); + void clearTableMetadata(const String & db_name, const String & table_name); + void setClient(std::shared_ptr client_); + bool isExpired() const { return expired; } + void setExpired() { expired = true; } + void clearExpired() { expired = false; } + +private: + static String getCacheKey(const String & db_name, const String & table_name) { return db_name + "." + table_name; } + + bool shouldUpdateTableMetadata( + const String & db_name, const String & table_name, const std::vector & partitions); + + std::shared_ptr client; + LRUCache table_metadata_cache; + mutable std::mutex mutex; + std::atomic expired{false}; + + Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); +}; + +using HiveMetastoreClientPtr = std::shared_ptr; +class HiveMetastoreClientFactory final : private boost::noncopyable +{ +public: + static HiveMetastoreClientFactory & instance(); + + HiveMetastoreClientPtr getOrCreate(const String & name, ContextPtr context); + +private: + std::mutex mutex; + std::map clients; + + const int conn_timeout_ms = 10000; + const int recv_timeout_ms = 10000; + const int send_timeout_ms = 10000; +}; + +} + + +#endif diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp new file mode 100644 index 00000000000..b0cfa9809e1 --- /dev/null +++ b/src/Storages/Hive/HiveFile.cpp @@ -0,0 +1,182 @@ +#include + +#if USE_HIVE + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +namespace DB +{ + +template +Range createRangeFromOrcStatistics(const StatisticsType * stats) +{ + /// We must check if there are minimum or maximum values in statistics in case of + /// null values or NaN/Inf values of double type. + if (stats->hasMinimum() && stats->hasMaximum()) + { + return Range(FieldType(stats->getMinimum()), true, FieldType(stats->getMaximum()), true); + } + else if (stats->hasMinimum()) + { + return Range::createLeftBounded(FieldType(stats->getMinimum()), true); + } + else if (stats->hasMaximum()) + { + return Range::createRightBounded(FieldType(stats->getMaximum()), true); + } + else + { + return Range(); + } +} + +template +Range createRangeFromParquetStatistics(std::shared_ptr stats) +{ + /// We must check if there are minimum or maximum values in statistics in case of + /// null values or NaN/Inf values of double type. + if (!stats->HasMinMax()) + return Range(); + return Range(FieldType(stats->min()), true, FieldType(stats->max()), true); +} + +Range createRangeFromParquetStatistics(std::shared_ptr stats) +{ + if (!stats->HasMinMax()) + return Range(); + String min_val(reinterpret_cast(stats->min().ptr), stats->min().len); + String max_val(reinterpret_cast(stats->max().ptr), stats->max().len); + return Range(min_val, true, max_val, true); +} + +Range HiveOrcFile::buildRange(const orc::ColumnStatistics * col_stats) +{ + if (!col_stats || col_stats->hasNull()) + return {}; + + if (const auto * int_stats = dynamic_cast(col_stats)) + { + return createRangeFromOrcStatistics(int_stats); + } + else if (const auto * double_stats = dynamic_cast(col_stats)) + { + return createRangeFromOrcStatistics(double_stats); + } + else if (const auto * string_stats = dynamic_cast(col_stats)) + { + return createRangeFromOrcStatistics(string_stats); + } + else if (const auto * bool_stats = dynamic_cast(col_stats)) + { + auto false_cnt = bool_stats->getFalseCount(); + auto true_cnt = bool_stats->getTrueCount(); + if (false_cnt && true_cnt) + { + return Range(UInt8(0), true, UInt8(1), true); + } + else if (false_cnt) + { + return Range::createLeftBounded(UInt8(0), true); + } + else if (true_cnt) + { + return Range::createRightBounded(UInt8(1), true); + } + } + else if (const auto * timestamp_stats = dynamic_cast(col_stats)) + { + return createRangeFromOrcStatistics(timestamp_stats); + } + else if (const auto * date_stats = dynamic_cast(col_stats)) + { + return createRangeFromOrcStatistics(date_stats); + } + return {}; +} + +void HiveOrcFile::prepareReader() +{ + // TODO To be implemented + throw Exception("Unimplemented HiveOrcFile::prepareReader", ErrorCodes::NOT_IMPLEMENTED); +} + +void HiveOrcFile::prepareColumnMapping() +{ + // TODO To be implemented + throw Exception("Unimplemented HiveOrcFile::prepareColumnMapping", ErrorCodes::NOT_IMPLEMENTED); +} + +bool HiveOrcFile::hasMinMaxIndex() const +{ + return false; +} + + +std::unique_ptr HiveOrcFile::buildMinMaxIndex(const orc::Statistics * /*statistics*/) +{ + // TODO To be implemented + throw Exception("Unimplemented HiveOrcFile::buildMinMaxIndex", ErrorCodes::NOT_IMPLEMENTED); +} + + +void HiveOrcFile::loadMinMaxIndex() +{ + // TODO To be implemented + throw Exception("Unimplemented HiveOrcFile::loadMinMaxIndex", ErrorCodes::NOT_IMPLEMENTED); +} + +bool HiveOrcFile::hasSubMinMaxIndex() const +{ + // TODO To be implemented + return false; +} + + +void HiveOrcFile::loadSubMinMaxIndex() +{ + // TODO To be implemented + throw Exception("Unimplemented HiveOrcFile::loadSubMinMaxIndex", ErrorCodes::NOT_IMPLEMENTED); +} + +bool HiveParquetFile::hasSubMinMaxIndex() const +{ + // TODO To be implemented + return false; +} + +void HiveParquetFile::prepareReader() +{ + // TODO To be implemented + throw Exception("Unimplemented HiveParquetFile::prepareReader", ErrorCodes::NOT_IMPLEMENTED); +} + + +void HiveParquetFile::loadSubMinMaxIndex() +{ + // TODO To be implemented + throw Exception("Unimplemented HiveParquetFile::loadSubMinMaxIndex", ErrorCodes::NOT_IMPLEMENTED); +} + +} +#endif diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h new file mode 100644 index 00000000000..63cca2562eb --- /dev/null +++ b/src/Storages/Hive/HiveFile.h @@ -0,0 +1,269 @@ +#pragma once +#include + +#if USE_HIVE + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace orc +{ +class Reader; +} + +namespace parquet +{ +class ParquetFileReader; +namespace arrow +{ + class FileReader; +} +} + +namespace arrow +{ +namespace io +{ + class RandomAccessFile; +} + +class Buffer; +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class IHiveFile : public WithContext +{ +public: + using MinMaxIndex = IMergeTreeDataPart::MinMaxIndex; + using MinMaxIndexPtr = std::shared_ptr; + + enum class FileFormat + { + RC_FILE, + TEXT, + LZO_TEXT, + SEQUENCE_FILE, + AVRO, + PARQUET, + ORC, + }; + + inline static const String RCFILE_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; + inline static const String TEXT_INPUT_FORMAT = "org.apache.hadoop.mapred.TextInputFormat"; + inline static const String LZO_TEXT_INPUT_FORMAT = "com.hadoop.mapred.DeprecatedLzoTextInputFormat"; + inline static const String SEQUENCE_INPUT_FORMAT = "org.apache.hadoop.mapred.SequenceFileInputFormat"; + inline static const String PARQUET_INPUT_FORMAT = "com.cloudera.impala.hive.serde.ParquetInputFormat"; + inline static const String MR_PARQUET_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"; + inline static const String AVRO_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat"; + inline static const String ORC_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"; + inline static const std::map VALID_HDFS_FORMATS = { + {RCFILE_INPUT_FORMAT, FileFormat::RC_FILE}, + {TEXT_INPUT_FORMAT, FileFormat::TEXT}, + {LZO_TEXT_INPUT_FORMAT, FileFormat::LZO_TEXT}, + {SEQUENCE_INPUT_FORMAT, FileFormat::SEQUENCE_FILE}, + {PARQUET_INPUT_FORMAT, FileFormat::PARQUET}, + {MR_PARQUET_INPUT_FORMAT, FileFormat::PARQUET}, + {AVRO_INPUT_FORMAT, FileFormat::AVRO}, + {ORC_INPUT_FORMAT, FileFormat::ORC}, + }; + + static inline bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.count(format_class) > 0; } + static inline FileFormat toFileFormat(const String & format_class) + { + if (isFormatClass(format_class)) + { + return VALID_HDFS_FORMATS.find(format_class)->second; + } + throw Exception("Unsupported hdfs file format " + format_class, ErrorCodes::NOT_IMPLEMENTED); + } + + IHiveFile( + const FieldVector & values_, + const String & namenode_url_, + const String & path_, + UInt64 last_modify_time_, + size_t size_, + const NamesAndTypesList & index_names_and_types_, + const std::shared_ptr & storage_settings_, + ContextPtr context_) + : WithContext(context_) + , partition_values(values_) + , namenode_url(namenode_url_) + , path(path_) + , last_modify_time(last_modify_time_) + , size(size_) + , index_names_and_types(index_names_and_types_) + , storage_settings(storage_settings_) + { + } + virtual ~IHiveFile() = default; + + virtual FileFormat getFormat() const = 0; + + virtual String getName() const = 0; + + virtual String getPath() const { return path; } + + virtual FieldVector getPartitionValues() const { return partition_values; } + + virtual String getNamenodeUrl() { return namenode_url; } + + virtual bool hasMinMaxIndex() const { return false; } + + virtual void loadMinMaxIndex() + { + throw Exception("Method loadMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + virtual MinMaxIndexPtr getMinMaxIndex() const { return minmax_idx; } + + // Do hive file contains sub-file level minmax index? + virtual bool hasSubMinMaxIndex() const { return false; } + + virtual void loadSubMinMaxIndex() + { + throw Exception("Method loadSubMinMaxIndex is not supported by hive file:" + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + virtual const std::vector & getSubMinMaxIndexes() const { return sub_minmax_idxes; } + + virtual void setSkipSplits(const std::set & splits) { skip_splits = splits; } + + virtual const std::set & getSkipSplits() const { return skip_splits; } + + inline std::string describeMinMaxIndex(const MinMaxIndexPtr & idx) const + { + if (!idx) + return ""; + + std::vector strs; + strs.reserve(index_names_and_types.size()); + size_t i = 0; + for (const auto & name_type : index_names_and_types) + { + strs.push_back(name_type.name + ":" + name_type.type->getName() + idx->hyperrectangle[i++].toString()); + } + return boost::algorithm::join(strs, "|"); + } + + inline UInt64 getLastModTs() const { return last_modify_time; } + inline size_t getSize() const { return size; } + +protected: + FieldVector partition_values; + String namenode_url; + String path; + UInt64 last_modify_time; + size_t size; + NamesAndTypesList index_names_and_types; + MinMaxIndexPtr minmax_idx; + std::vector sub_minmax_idxes; + std::set skip_splits; // skip splits for this file after applying minmax index (if any) + std::shared_ptr storage_settings; +}; + +using HiveFilePtr = std::shared_ptr; +using HiveFiles = std::vector; + +class HiveTextFile : public IHiveFile +{ +public: + HiveTextFile( + const FieldVector & values_, + const String & namenode_url_, + const String & path_, + UInt64 last_modify_time_, + size_t size_, + const NamesAndTypesList & index_names_and_types_, + const std::shared_ptr & hive_settings_, + ContextPtr context_) + : IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) + { + } + + virtual FileFormat getFormat() const override { return FileFormat::TEXT; } + virtual String getName() const override { return "TEXT"; } +}; + +class HiveOrcFile : public IHiveFile +{ +public: + HiveOrcFile( + const FieldVector & values_, + const String & namenode_url_, + const String & path_, + UInt64 last_modify_time_, + size_t size_, + const NamesAndTypesList & index_names_and_types_, + const std::shared_ptr & hive_settings_, + ContextPtr context_) + : IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) + { + } + + virtual FileFormat getFormat() const override { return FileFormat::ORC; } + virtual String getName() const override { return "ORC"; } + virtual bool hasMinMaxIndex() const override; + virtual void loadMinMaxIndex() override; + + virtual bool hasSubMinMaxIndex() const override; + virtual void loadSubMinMaxIndex() override; + +protected: + virtual std::unique_ptr buildMinMaxIndex(const orc::Statistics * statistics); + virtual Range buildRange(const orc::ColumnStatistics * col_stats); + virtual void prepareReader(); + virtual void prepareColumnMapping(); + + std::shared_ptr reader; + std::map orc_column_positions; +}; + +class HiveParquetFile : public IHiveFile +{ +public: + HiveParquetFile( + const FieldVector & values_, + const String & namenode_url_, + const String & path_, + UInt64 last_modify_time_, + size_t size_, + const NamesAndTypesList & index_names_and_types_, + const std::shared_ptr & hive_settings_, + ContextPtr context_) + : IHiveFile(values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_) + { + } + + virtual FileFormat getFormat() const override { return FileFormat::PARQUET; } + virtual String getName() const override { return "PARQUET"; } + + virtual bool hasSubMinMaxIndex() const override; + virtual void loadSubMinMaxIndex() override; + +protected: + virtual void prepareReader(); + + std::shared_ptr fs; + std::shared_ptr reader; + std::map parquet_column_positions; +}; +} + + +#endif diff --git a/src/Storages/Hive/HiveSettings.cpp b/src/Storages/Hive/HiveSettings.cpp new file mode 100644 index 00000000000..9519ce8b1f5 --- /dev/null +++ b/src/Storages/Hive/HiveSettings.cpp @@ -0,0 +1,64 @@ +#include + +#if USE_HIVE + +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(HiveSettingsTraits, LIST_OF_HIVE_SETTINGS) + +void HiveSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) +{ + if (!config.has(config_elem)) + return; + + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(config_elem, config_keys); + + try + { + for (const String & key : config_keys) + set(key, config.getString(config_elem + "." + key)); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("in MergeTree config"); + throw; + } +} + +void HiveSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} +#endif diff --git a/src/Storages/Hive/HiveSettings.h b/src/Storages/Hive/HiveSettings.h new file mode 100644 index 00000000000..ed430ba97cd --- /dev/null +++ b/src/Storages/Hive/HiveSettings.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +#if USE_HIVE + +#include +#include +#include + +namespace DB +{ +class ASTStorage; + +#define HIVE_RELATED_SETTINGS(M) \ + M(Char, hive_text_field_delimeter, '\x01', "How to split one row of hive data with format text", 0) \ + M(Bool, enable_orc_stripe_minmax_index, false, "Enable using ORC stripe level minmax index.", 0) \ + M(Bool, enable_parquet_rowgroup_minmax_index, false, "Enable using Parquet row-group level minmax index.", 0) \ + M(Bool, enable_orc_file_minmax_index, true, "Enable using ORC file level minmax index.", 0) + +#define LIST_OF_HIVE_SETTINGS(M) \ + HIVE_RELATED_SETTINGS(M) \ + FORMAT_FACTORY_SETTINGS(M) + +DECLARE_SETTINGS_TRAITS(HiveSettingsTraits, LIST_OF_HIVE_SETTINGS) + + +/** Settings for the Hive engine. + * Could be loaded from a CREATE TABLE query (SETTINGS clause). + */ +class HiveSettings : public BaseSettings +{ +public: + void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); + void loadFromQuery(ASTStorage & storage_def); +}; +} + +#endif diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp new file mode 100644 index 00000000000..af357e13ca7 --- /dev/null +++ b/src/Storages/Hive/StorageHive.cpp @@ -0,0 +1,685 @@ +#include + +#if USE_HIVE + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int NOT_IMPLEMENTED; + extern const int INVALID_PARTITION_VALUE; + extern const int BAD_ARGUMENTS; + extern const int CANNOT_OPEN_FILE; +} + + +static std::string getBaseName(const String & path) +{ + size_t basename_start = path.rfind('/'); + return path.substr(basename_start + 1); +} + +class StorageHiveSource : public SourceWithProgress, WithContext +{ +public: + using FileFormat = StorageHive::FileFormat; + struct SourcesInfo + { + HiveMetastoreClientPtr hive_metastore_client; + std::string database; + std::string table_name; + HiveFiles hive_files; + NamesAndTypesList partition_name_types; + + std::atomic next_uri_to_read = 0; + + bool need_path_column = false; + bool need_file_column = false; + }; + + using SourcesInfoPtr = std::shared_ptr; + + static Block getHeader(Block header, const SourcesInfoPtr & source_info) + { + if (source_info->need_path_column) + header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); + if (source_info->need_file_column) + header.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); + + return header; + } + + static ColumnsDescription getColumnsDescription(Block header, const SourcesInfoPtr & source_info) + { + ColumnsDescription columns_description{header.getNamesAndTypesList()}; + if (source_info->need_path_column) + columns_description.add({"_path", std::make_shared()}); + if (source_info->need_file_column) + columns_description.add({"_file", std::make_shared()}); + return columns_description; + } + + StorageHiveSource( + SourcesInfoPtr source_info_, + String hdfs_namenode_url_, + String format_, + String compression_method_, + Block sample_block_, + ContextPtr context_, + UInt64 max_block_size_, + const Names & text_input_field_names_ = {}) + : SourceWithProgress(getHeader(sample_block_, source_info_)) + , WithContext(context_) + , source_info(std::move(source_info_)) + , hdfs_namenode_url(hdfs_namenode_url_) + , format(std::move(format_)) + , compression_method(compression_method_) + , max_block_size(max_block_size_) + , sample_block(std::move(sample_block_)) + , to_read_block(sample_block) + , columns_description(getColumnsDescription(sample_block, source_info)) + , text_input_field_names(text_input_field_names_) + , format_settings(getFormatSettings(getContext())) + { + /// Initialize to_read_block, which is used to read data from HDFS. + to_read_block = sample_block; + for (const auto & name_type : source_info->partition_name_types) + { + to_read_block.erase(name_type.name); + } + + /// Initialize format settings + format_settings.hive_text.input_field_names = text_input_field_names; + } + + String getName() const override { return "Hive"; } + + Chunk generate() override + { + while (true) + { + if (!reader) + { + current_idx = source_info->next_uri_to_read.fetch_add(1); + if (current_idx >= source_info->hive_files.size()) + return {}; + + const auto & curr_file = source_info->hive_files[current_idx]; + current_path = curr_file->getPath(); + + String uri_with_path = hdfs_namenode_url + current_path; + auto compression = chooseCompressionMethod(current_path, compression_method); + std::unique_ptr raw_read_buf; + try + { + raw_read_buf = std::make_unique( + hdfs_namenode_url, current_path, getContext()->getGlobalContext()->getConfigRef()); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::CANNOT_OPEN_FILE) + { + source_info->hive_metastore_client->clearTableMetadata(source_info->database, source_info->table_name); + throw; + } + } + + /// Use local cache for remote storage if enabled. + std::unique_ptr remote_read_buf; + if (ExternalDataSourceCache::instance().isInitialized() && getContext()->getSettingsRef().use_local_cache_for_remote_storage) + { + size_t buff_size = raw_read_buf->internalBuffer().size(); + if (buff_size == 0) + buff_size = DBMS_DEFAULT_BUFFER_SIZE; + remote_read_buf = RemoteReadBuffer::create(getContext(), + std::make_shared("Hive", getNameNodeCluster(hdfs_namenode_url), uri_with_path, curr_file->getSize(), curr_file->getLastModTs()), + std::move(raw_read_buf), buff_size); + } + else + remote_read_buf = std::move(raw_read_buf); + + if (curr_file->getFormat() == FileFormat::TEXT) + read_buf = wrapReadBufferWithCompressionMethod(std::move(remote_read_buf), compression); + else + read_buf = std::move(remote_read_buf); + + auto input_format = FormatFactory::instance().getInputFormat( + format, *read_buf, to_read_block, getContext(), max_block_size, format_settings); + + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns_description, *input_format, getContext()); + }); + } + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); + } + + Block res; + if (reader->pull(res)) + { + Columns columns = res.getColumns(); + UInt64 num_rows = res.rows(); + + /// Enrich with partition columns. + auto types = source_info->partition_name_types.getTypes(); + for (size_t i = 0; i < types.size(); ++i) + { + auto column = types[i]->createColumnConst(num_rows, source_info->hive_files[current_idx]->getPartitionValues()[i]); + auto previous_idx = sample_block.getPositionByName(source_info->partition_name_types.getNames()[i]); + columns.insert(columns.begin() + previous_idx, column->convertToFullColumnIfConst()); + } + + /// Enrich with virtual columns. + if (source_info->need_path_column) + { + auto column = DataTypeString().createColumnConst(num_rows, current_path); + columns.push_back(column->convertToFullColumnIfConst()); + } + + if (source_info->need_file_column) + { + size_t last_slash_pos = current_path.find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); + + auto column = DataTypeString().createColumnConst(num_rows, std::move(file_name)); + columns.push_back(column->convertToFullColumnIfConst()); + } + return Chunk(std::move(columns), num_rows); + } + reader.reset(); + pipeline.reset(); + read_buf.reset(); + } + } + +private: + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + SourcesInfoPtr source_info; + String hdfs_namenode_url; + String format; + String compression_method; + UInt64 max_block_size; + Block sample_block; + Block to_read_block; + ColumnsDescription columns_description; + const Names & text_input_field_names; + FormatSettings format_settings; + + String current_path; + size_t current_idx = 0; + + Poco::Logger * log = &Poco::Logger::get("StorageHive"); +}; + + +StorageHive::StorageHive( + const String & hive_metastore_url_, + const String & hive_database_, + const String & hive_table_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment_, + const ASTPtr & partition_by_ast_, + std::unique_ptr storage_settings_, + ContextPtr context_) + : IStorage(table_id_) + , WithContext(context_) + , hive_metastore_url(hive_metastore_url_) + , hive_database(hive_database_) + , hive_table(hive_table_) + , partition_by_ast(partition_by_ast_) + , storage_settings(std::move(storage_settings_)) +{ + getContext()->getRemoteHostFilter().checkURL(Poco::URI(hive_metastore_url)); + + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); + + auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); + auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table); + + hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->getTable()->sd.location); + table_schema = hive_table_metadata->getTable()->sd.cols; + + FileFormat hdfs_file_format = IHiveFile::toFileFormat(hive_table_metadata->getTable()->sd.inputFormat); + switch (hdfs_file_format) + { + case FileFormat::TEXT: + case FileFormat::LZO_TEXT: + format_name = "HiveText"; + break; + case FileFormat::RC_FILE: + /// TODO to be implemented + throw Exception("Unsopported hive format rc_file", ErrorCodes::NOT_IMPLEMENTED); + case FileFormat::SEQUENCE_FILE: + /// TODO to be implemented + throw Exception("Unsopported hive format sequence_file", ErrorCodes::NOT_IMPLEMENTED); + case FileFormat::AVRO: + format_name = "Avro"; + break; + case FileFormat::PARQUET: + format_name = "Parquet"; + break; + case FileFormat::ORC: + format_name = "ORC"; + break; + } + + /// Need to specify text_input_fields_names from table_schema for TextInputFormated Hive table + if (format_name == "HiveText") + { + size_t i = 0; + text_input_field_names.resize(table_schema.size()); + for (const auto & field : table_schema) + { + String name{field.name}; + boost::to_lower(name); + text_input_field_names[i++] = std::move(name); + } + } + + initMinMaxIndexExpression(); +} + +void StorageHive::initMinMaxIndexExpression() +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + ASTPtr partition_key_expr_list = extractKeyExpressionList(partition_by_ast); + if (!partition_key_expr_list->children.empty()) + { + auto syntax_result = TreeRewriter(getContext()).analyze(partition_key_expr_list, metadata_snapshot->getColumns().getAllPhysical()); + partition_key_expr = ExpressionAnalyzer(partition_key_expr_list, syntax_result, getContext()).getActions(false); + + /// Add all columns used in the partition key to the min-max index. + partition_name_types = partition_key_expr->getRequiredColumnsWithTypes(); + partition_names = partition_name_types.getNames(); + partition_types = partition_name_types.getTypes(); + partition_minmax_idx_expr = std::make_shared( + std::make_shared(partition_name_types), ExpressionActionsSettings::fromContext(getContext())); + } + + NamesAndTypesList all_name_types = metadata_snapshot->getColumns().getAllPhysical(); + for (const auto & column : all_name_types) + { + if (!partition_name_types.contains(column.name)) + hivefile_name_types.push_back(column); + } + hivefile_minmax_idx_expr = std::make_shared( + std::make_shared(hivefile_name_types), ExpressionActionsSettings::fromContext(getContext())); +} + +ASTPtr StorageHive::extractKeyExpressionList(const ASTPtr & node) +{ + if (!node) + return std::make_shared(); + + const auto * expr_func = node->as(); + if (expr_func && expr_func->name == "tuple") + { + /// Primary key is specified in tuple, extract its arguments. + return expr_func->arguments->clone(); + } + else + { + /// Primary key consists of one column. + auto res = std::make_shared(); + res->children.push_back(node); + return res; + } +} + + +HiveFilePtr createHiveFile( + const String & format_name, + const FieldVector & fields, + const String & namenode_url, + const String & path, + UInt64 ts, + size_t size, + const NamesAndTypesList & index_names_and_types, + const std::shared_ptr & hive_settings, + ContextPtr context) +{ + HiveFilePtr hive_file; + if (format_name == "HiveText") + { + hive_file = std::make_shared(fields, namenode_url, path, ts, size, index_names_and_types, hive_settings, context); + } + else if (format_name == "ORC") + { + hive_file = std::make_shared(fields, namenode_url, path, ts, size, index_names_and_types, hive_settings, context); + } + else if (format_name == "Parquet") + { + hive_file = std::make_shared(fields, namenode_url, path, ts, size, index_names_and_types, hive_settings, context); + } + else + { + throw Exception("IHiveFile not implemented for format " + format_name, ErrorCodes::NOT_IMPLEMENTED); + } + return hive_file; +} + +std::vector StorageHive::collectHiveFilesFromPartition( + const Apache::Hadoop::Hive::Partition & partition, + SelectQueryInfo & query_info, + HiveTableMetadataPtr hive_table_metadata, + const HDFSFSPtr & fs, + ContextPtr context_) +{ + LOG_DEBUG(log, "Collect hive files from partition {}", boost::join(partition.values, ",")); + + /// Skip partition "__HIVE_DEFAULT_PARTITION__" + bool has_default_partition = false; + for (const auto & value : partition.values) + { + if (value == "__HIVE_DEFAULT_PARTITION__") + { + has_default_partition = true; + break; + } + } + if (has_default_partition) + return {}; + + /// Check partition values + if (partition.values.size() != partition_names.size()) + throw Exception( + fmt::format("Partition value size not match, expect {}, but got {}", partition_names.size(), partition.values.size()), + ErrorCodes::INVALID_PARTITION_VALUE); + + /// Join partition values in CSV format + WriteBufferFromOwnString wb; + for (size_t i = 0; i < partition.values.size(); ++i) + { + if (i != 0) + writeString(",", wb); + writeString(partition.values[i], wb); + } + writeString("\n", wb); + + ReadBufferFromString buffer(wb.str()); + auto format = FormatFactory::instance().getInputFormat( + "CSV", buffer, partition_key_expr->getSampleBlock(), getContext(), getContext()->getSettingsRef().max_block_size); + auto pipeline = QueryPipeline(std::move(format)); + auto reader = std::make_unique(pipeline); + Block block; + if (!reader->pull(block) || !block.rows()) + throw Exception("Could not parse partition value: " + wb.str(), ErrorCodes::INVALID_PARTITION_VALUE); + + std::vector ranges; + ranges.reserve(partition_names.size()); + FieldVector fields(partition_names.size()); + for (size_t i = 0; i < partition_names.size(); ++i) + { + block.getByPosition(i).column->get(0, fields[i]); + ranges.emplace_back(fields[i]); + } + + const KeyCondition partition_key_condition(query_info, getContext(), partition_names, partition_minmax_idx_expr); + if (!partition_key_condition.checkInHyperrectangle(ranges, partition_types).can_be_true) + return {}; + + auto file_infos = listDirectory(partition.sd.location, hive_table_metadata, fs); + std::vector hive_files; + hive_files.reserve(file_infos.size()); + for (const auto & file_info : file_infos) + { + auto hive_file = createHiveFileIfNeeded(file_info, fields, query_info, context_); + if (hive_file) + hive_files.push_back(hive_file); + } + return hive_files; +} + +std::vector +StorageHive::listDirectory(const String & path, HiveTableMetadataPtr hive_table_metadata, const HDFSFSPtr & fs) +{ + return hive_table_metadata->getFilesByLocation(fs, path); +} + +HiveFilePtr StorageHive::createHiveFileIfNeeded( + const FileInfo & file_info, const FieldVector & fields, SelectQueryInfo & query_info, ContextPtr context_) +{ + LOG_TRACE(log, "Append hive file {}", file_info.path); + String filename = getBaseName(file_info.path); + /// Skip temporary files starts with '.' + if (filename.find('.') == 0) + return {}; + + auto hive_file = createHiveFile( + format_name, + fields, + hdfs_namenode_url, + file_info.path, + file_info.last_modify_time, + file_info.size, + hivefile_name_types, + storage_settings, + context_); + + /// Load file level minmax index and apply + const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr); + if (hive_file->hasMinMaxIndex()) + { + hive_file->loadMinMaxIndex(); + if (!hivefile_key_condition.checkInHyperrectangle(hive_file->getMinMaxIndex()->hyperrectangle, hivefile_name_types.getTypes()) + .can_be_true) + { + LOG_TRACE(log, "Skip hive file {} by index {}", hive_file->getPath(), hive_file->describeMinMaxIndex(hive_file->getMinMaxIndex())); + return {}; + } + } + + /// Load sub-file level minmax index and apply + if (hive_file->hasSubMinMaxIndex()) + { + std::set skip_splits; + hive_file->loadSubMinMaxIndex(); + const auto & sub_minmax_idxes = hive_file->getSubMinMaxIndexes(); + for (size_t i = 0; i < sub_minmax_idxes.size(); ++i) + { + if (!hivefile_key_condition.checkInHyperrectangle(sub_minmax_idxes[i]->hyperrectangle, hivefile_name_types.getTypes()) + .can_be_true) + { + LOG_TRACE(log, "Skip split {} of hive file {}", i, hive_file->getPath()); + skip_splits.insert(i); + } + } + hive_file->setSkipSplits(skip_splits); + } + return hive_file; +} + +Pipe StorageHive::read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + SelectQueryInfo & query_info, + ContextPtr context_, + QueryProcessingStage::Enum /* processed_stage */, + size_t max_block_size, + unsigned num_streams) +{ + HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, context_->getGlobalContext()->getConfigRef()); + HDFSFSPtr fs = createHDFSFS(builder.get()); + auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); + auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table); + + std::vector partitions = hive_table_metadata->getPartitions(); + /// Hive files to read + HiveFiles hive_files; + /// Mutext to protect hive_files, which maybe appended in multiple threads + std::mutex hive_files_mutex; + + ThreadPool pool{num_streams}; + if (!partitions.empty()) + { + for (const auto & partition : partitions) + { + pool.scheduleOrThrowOnError([&]() + { + auto hive_files_in_partition = collectHiveFilesFromPartition(partition, query_info, hive_table_metadata, fs, context_); + if (!hive_files_in_partition.empty()) + { + std::lock_guard lock(hive_files_mutex); + hive_files.insert(std::end(hive_files), std::begin(hive_files_in_partition), std::end(hive_files_in_partition)); + } + }); + } + pool.wait(); + } + else if (partition_name_types.empty()) /// Partition keys is empty + { + auto file_infos = listDirectory(hive_table_metadata->getTable()->sd.location, hive_table_metadata, fs); + for (const auto & file_info : file_infos) + { + pool.scheduleOrThrowOnError([&] + { + auto hive_file = createHiveFileIfNeeded(file_info, {}, query_info, context_); + if (hive_file) + { + std::lock_guard lock(hive_files_mutex); + hive_files.push_back(hive_file); + } + }); + } + pool.wait(); + } + else /// Partition keys is not empty but partitions is empty + return {}; + + auto sources_info = std::make_shared(); + sources_info->hive_files = std::move(hive_files); + sources_info->database = hive_database; + sources_info->table_name = hive_table; + sources_info->hive_metastore_client = hive_metastore_client; + sources_info->partition_name_types = partition_name_types; + for (const auto & column : column_names) + { + if (column == "_path") + sources_info->need_path_column = true; + if (column == "_file") + sources_info->need_file_column = true; + } + + if (num_streams > sources_info->hive_files.size()) + num_streams = sources_info->hive_files.size(); + + Pipes pipes; + for (size_t i = 0; i < num_streams; ++i) + { + pipes.emplace_back(std::make_shared( + sources_info, + hdfs_namenode_url, + format_name, + compression_method, + metadata_snapshot->getSampleBlock(), + context_, + max_block_size, + text_input_field_names)); + } + return Pipe::unitePipes(std::move(pipes)); +} + +SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /* metadata_snapshot*/, ContextPtr /*context*/) +{ + throw Exception("Method write is not implemented for StorageHive", ErrorCodes::NOT_IMPLEMENTED); +} + +NamesAndTypesList StorageHive::getVirtuals() const +{ + return NamesAndTypesList{{"_path", std::make_shared()}, {"_file", std::make_shared()}}; +} + +void registerStorageHive(StorageFactory & factory) +{ + factory.registerStorage( + "Hive", + [](const StorageFactory::Arguments & args) + { + bool have_settings = args.storage_def->settings; + std::unique_ptr hive_settings = std::make_unique(); + if (have_settings) + hive_settings->loadFromQuery(*args.storage_def); + + ASTs & engine_args = args.engine_args; + if (engine_args.size() != 3) + throw Exception( + "Storage Hive requires 3 arguments: hive metastore address, hive database and hive table", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + auto * partition_by = args.storage_def->partition_by; + if (!partition_by) + throw Exception("Storage Hive requires partition by clause", ErrorCodes::BAD_ARGUMENTS); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); + + const String & hive_metastore_url = engine_args[0]->as().value.safeGet(); + const String & hive_database = engine_args[1]->as().value.safeGet(); + const String & hive_table = engine_args[2]->as().value.safeGet(); + return StorageHive::create( + hive_metastore_url, + hive_database, + hive_table, + args.table_id, + args.columns, + args.constraints, + args.comment, + partition_by->ptr(), + std::move(hive_settings), + args.getContext()); + }, + StorageFactory::StorageFeatures{ + .supports_settings = true, + .supports_sort_order = true, + }); +} + +} +#endif diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h new file mode 100644 index 00000000000..9629629e057 --- /dev/null +++ b/src/Storages/Hive/StorageHive.h @@ -0,0 +1,122 @@ +#pragma once + +#include + +#if USE_HIVE + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class HiveSettings; +/** + * This class represents table engine for external hdfs files. + * Read method is supported for now. + */ +class StorageHive final : public shared_ptr_helper, public IStorage, WithContext +{ + friend struct shared_ptr_helper; + +public: + String getName() const override { return "Hive"; } + + bool supportsIndexForIn() const override { return true; } + bool mayBenefitFromIndexForIn( + const ASTPtr & /* left_in_operand */, + ContextPtr /* query_context */, + const StorageMetadataPtr & /* metadata_snapshot */) const override + { + return false; + } + + + Pipe read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) override; + + NamesAndTypesList getVirtuals() const override; + +protected: + friend class StorageHiveSource; + StorageHive( + const String & hive_metastore_url_, + const String & hive_database_, + const String & hive_table_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment_, + const ASTPtr & partition_by_ast_, + std::unique_ptr storage_settings_, + ContextPtr context_); + +private: + using FileFormat = IHiveFile::FileFormat; + using FileInfo = HiveMetastoreClient::FileInfo; + using HiveTableMetadataPtr = HiveMetastoreClient::HiveTableMetadataPtr; + + static ASTPtr extractKeyExpressionList(const ASTPtr & node); + + static std::vector listDirectory(const String & path, HiveTableMetadataPtr hive_table_metadata, const HDFSFSPtr & fs); + + void initMinMaxIndexExpression(); + + std::vector collectHiveFilesFromPartition( + const Apache::Hadoop::Hive::Partition & partition, + SelectQueryInfo & query_info, + HiveTableMetadataPtr hive_table_metadata, + const HDFSFSPtr & fs, + ContextPtr context_); + + HiveFilePtr + createHiveFileIfNeeded(const FileInfo & file_info, const FieldVector & fields, SelectQueryInfo & query_info, ContextPtr context_); + + String hive_metastore_url; + + /// Hive database and table + String hive_database; + String hive_table; + + /// Hive table meta + std::vector table_schema; + Names text_input_field_names; /// Defines schema of hive file, only used when text input format is TEXT + + String hdfs_namenode_url; + + String format_name; + String compression_method; + + const ASTPtr partition_by_ast; + NamesAndTypesList partition_name_types; + Names partition_names; + DataTypes partition_types; + ExpressionActionsPtr partition_key_expr; + ExpressionActionsPtr partition_minmax_idx_expr; + + NamesAndTypesList hivefile_name_types; + ExpressionActionsPtr hivefile_minmax_idx_expr; + + std::shared_ptr storage_settings; + + Poco::Logger * log = &Poco::Logger::get("StorageHive"); +}; +} + +#endif diff --git a/src/Storages/Hive/StorageHiveMetadata.cpp b/src/Storages/Hive/StorageHiveMetadata.cpp new file mode 100644 index 00000000000..0aed7c01064 --- /dev/null +++ b/src/Storages/Hive/StorageHiveMetadata.cpp @@ -0,0 +1,54 @@ +#include + +#if USE_HIVE + +#include +#include +#include +#include + +namespace DB +{ + +String StorageHiveMetadata::toString() const +{ + Poco::JSON::Object jobj; + jobj.set("schema", schema); + jobj.set("cluster", cluster); + jobj.set("remote_path", remote_path); + jobj.set("last_modification_timestamp", last_modification_timestamp); + jobj.set("file_size", file_size); + std::stringstream buf; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + jobj.stringify(buf); + return buf.str(); + +} + +bool StorageHiveMetadata::fromString(const String &buf) +{ + std::stringstream istream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + istream << buf; + Poco::JSON::Parser parser; + auto jobj = parser.parse(istream).extract(); + remote_path = jobj->get("remote_path").convert(); + schema = jobj->get("schema").convert(); + cluster = jobj->get("cluster").convert(); + last_modification_timestamp = jobj->get("last_modification_timestamp").convert(); + file_size =jobj->get("file_size").convert(); + return true; +} + +String StorageHiveMetadata::getVersion() const +{ + return std::to_string(last_modification_timestamp); +} + +void registerStorageHiveMetadata(RemoteFileMetadataFactory & factory) +{ + auto creator = []() -> IRemoteFileMetadataPtr { return std::make_shared(); }; + factory.registerRemoteFileMatadata("StorageHiveMetadata", creator); +} + +} +#endif + diff --git a/src/Storages/Hive/StorageHiveMetadata.h b/src/Storages/Hive/StorageHiveMetadata.h new file mode 100644 index 00000000000..d385274588f --- /dev/null +++ b/src/Storages/Hive/StorageHiveMetadata.h @@ -0,0 +1,44 @@ +#pragma once + +#include + +#if USE_HIVE + +#include +namespace DB +{ + +class StorageHiveMetadata : public IRemoteFileMetadata +{ +public: + StorageHiveMetadata() = default; + + explicit StorageHiveMetadata( + const String & schema_, + const String & cluster_, + const String & remote_path_, + size_t file_size_, + UInt64 last_modification_timestamp_) + : schema(schema_), cluster(cluster_) + { + remote_path = remote_path_; + file_size = file_size_; + last_modification_timestamp = last_modification_timestamp_; + } + + ~StorageHiveMetadata() override = default; + + String getName() const override { return "StorageHiveMetadata"; } + String getSchema() const { return schema; } + String getCluster() const { return cluster; } + + String toString() const override; + bool fromString(const String & buf) override; + String getVersion() const override; +private: + String schema; + String cluster; +}; + +} +#endif diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index 4139e9599aa..2df32df7f92 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -136,6 +136,8 @@ struct MergeListElement : boost::noncopyable MergeListElement * ptr() { return this; } ~MergeListElement(); + + MergeListElement & ref() { return *this; } }; /** Maintains a list of currently running merges. diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index e53f5adec52..5df50ab9a7c 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -169,6 +169,7 @@ struct SelectQueryInfo bool ignore_projections = false; bool is_projection_query = false; bool merge_tree_empty_result = false; + bool settings_limit_offset_done = false; Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; }; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 19869b77106..6f165dfb4a5 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -126,7 +127,12 @@ namespace /// select query has database, table and table function names as AST pointers /// Creates a copy of query, changes database, table and table function names. -ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, const std::string & table, ASTPtr table_function_ptr = nullptr) +ASTPtr rewriteSelectQuery( + ContextPtr context, + const ASTPtr & query, + const std::string & remote_database, + const std::string & remote_table, + ASTPtr table_function_ptr = nullptr) { auto modified_query_ast = query->clone(); @@ -140,7 +146,7 @@ ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, co if (table_function_ptr) select_query.addTableFunction(table_function_ptr); else - select_query.replaceDatabaseAndTable(database, table); + select_query.replaceDatabaseAndTable(remote_database, remote_table); /// Restore long column names (cause our short names are ambiguous). /// TODO: aliased table functions & CREATE TABLE AS table function cases @@ -148,12 +154,20 @@ ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, co { RestoreQualifiedNamesVisitor::Data data; data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query->as(), 0)); - data.remote_table.database = database; - data.remote_table.table = table; - data.rename = true; + data.remote_table.database = remote_database; + data.remote_table.table = remote_table; RestoreQualifiedNamesVisitor(data).visit(modified_query_ast); } + /// To make local JOIN works, default database should be added to table names. + /// But only for JOIN section, since the following should work using default_database: + /// - SELECT * FROM d WHERE value IN (SELECT l.value FROM l) ORDER BY value + /// (see 01487_distributed_in_not_default_db) + AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase(), + /* only_replace_current_database_function_= */false, + /* only_replace_in_join_= */true); + visitor.visit(modified_query_ast); + return modified_query_ast; } @@ -601,7 +615,8 @@ void StorageDistributed::read( throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature"); const auto & modified_query_ast = rewriteSelectQuery( - query_info.query, remote_database, remote_table, remote_table_function_ptr); + local_context, query_info.query, + remote_database, remote_table, remote_table_function_ptr); Block header = InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 3d988472b54..fda6333d614 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -687,9 +687,9 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt } else { - if (engine_args.size() < 2 || engine_args.size() > 5) + if (engine_args.empty() || engine_args.size() > 5) throw Exception( - "Storage S3 requires 2 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", + "Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & engine_arg : engine_args) @@ -707,13 +707,16 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt configuration.compression_method = engine_args.back()->as().value.safeGet(); configuration.format = engine_args[engine_args.size() - 2]->as().value.safeGet(); } - else + else if (engine_args.size() != 1) { configuration.compression_method = "auto"; configuration.format = engine_args.back()->as().value.safeGet(); } } + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true); + return configuration; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 471b460d349..68c21caf4ab 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -624,20 +624,24 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex } else { - if (args.size() != 2 && args.size() != 3) + if (args.empty() || args.size() > 3) throw Exception( - "Storage URL requires 2 or 3 arguments: url, name of used format and optional compression method.", + "Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, local_context); configuration.url = args[0]->as().value.safeGet(); - configuration.format = args[1]->as().value.safeGet(); + if (args.size() > 1) + configuration.format = args[1]->as().value.safeGet(); if (args.size() == 3) configuration.compression_method = args[2]->as().value.safeGet(); } + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true); + return configuration; } diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index dcf664db6fe..bcf7d7856cf 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -140,7 +140,8 @@ void StorageView::read( current_inner_query = query_info.view_query->clone(); } - InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, {}, column_names); + auto options = SelectQueryOptions(QueryProcessingStage::Complete, 0, false, query_info.settings_limit_offset_done); + InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); interpreter.buildQueryPlan(query_plan); /// It's expected that the columns read from storage are not constant. diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 6df3b2a92ac..d61baf2eb63 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -3,6 +3,7 @@ #include #include "config_core.h" +#include "config_formats.h" namespace DB { @@ -34,6 +35,11 @@ void registerStorageCOS(StorageFactory & factory); #if USE_HDFS void registerStorageHDFS(StorageFactory & factory); + +#if USE_HIVE +void registerStorageHive(StorageFactory & factory); +#endif + #endif void registerStorageODBC(StorageFactory & factory); @@ -106,6 +112,11 @@ void registerStorages() #if USE_HDFS registerStorageHDFS(factory); + + #if USE_HIVE + registerStorageHive(factory); + #endif + #endif registerStorageODBC(factory); diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 4395c318983..5328abd1654 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -53,23 +53,28 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context ASTs & args = args_func.at(0)->children; - if (args.size() < 2) - throw Exception("Table function '" + getName() + "' requires at least 2 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (args.empty()) + throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); filename = args[0]->as().value.safeGet(); - format = args[1]->as().value.safeGet(); - if (args.size() == 2) + if (args.size() > 1) + format = args[1]->as().value.safeGet(); + + if (format == "auto") + format = FormatFactory::instance().getFormatFromFileName(filename, true); + + if (args.size() <= 2) { checkIfFormatSupportsAutoStructure(getName(), format); return; } if (args.size() != 3 && args.size() != 4) - throw Exception("Table function '" + getName() + "' requires 2, 3 or 4 arguments: filename, format, structure (default auto) and compression method (default auto)", + throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); structure = args[2]->as().value.safeGet(); diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 2ceafdee229..6e00aac9c37 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -17,7 +17,7 @@ protected: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String filename; - String format; + String format = "auto"; String structure = "auto"; String compression_method = "auto"; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c4be01c6b5c..be217868c15 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -71,6 +71,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con /// Size -> argument indexes static auto size_to_args = std::map> { + {1, {{}}}, {2, {{"format", 1}}}, {3, {{"format", 1}, {"structure", 2}}}, {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}}, @@ -113,6 +114,9 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con configuration.secret_access_key = args[args_to_idx["secret_access_key"]]->as().value.safeGet(); } + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true); + s3_configuration = std::move(configuration); } diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 7c4d7b4a444..0a794831d01 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -50,6 +51,8 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co filename = configuration.url; format = configuration.format; + if (format == "auto") + format = FormatFactory::instance().getFormatFromFileName(filename, true); structure = configuration.structure; compression_method = configuration.compression_method; } diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index f37ea49e387..e21e7d0138d 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -6,80 +6,98 @@ import json import os import sys import time -from github import Github +from typing import List, Optional, Tuple from env_helper import REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH from s3_helper import S3Helper from pr_info import PRInfo -from get_robot_token import get_best_robot_token -from version_helper import get_version_from_repo, update_version_local +from version_helper import ( + ClickHouseVersion, + get_version_from_repo, + update_version_local, +) from ccache_utils import get_ccache_if_not_exists, upload_ccache -from ci_config import CI_CONFIG +from ci_config import CI_CONFIG, BuildConfig from docker_pull_helper import get_image_with_version from tee_popen import TeePopen -def get_build_config(build_check_name, build_name): - if build_check_name == 'ClickHouse build check (actions)': - build_config_name = 'build_config' +def get_build_config(build_check_name: str, build_name: str) -> BuildConfig: + if build_check_name == "ClickHouse build check (actions)": + build_config_name = "build_config" else: raise Exception(f"Unknown build check name {build_check_name}") return CI_CONFIG[build_config_name][build_name] -def _can_export_binaries(build_config): - if build_config['package_type'] != 'deb': +def _can_export_binaries(build_config: BuildConfig) -> bool: + if build_config["package_type"] != "deb": return False - if build_config['bundled'] != "bundled": + if build_config["bundled"] != "bundled": return False - if build_config['splitted'] == 'splitted': + if build_config["splitted"] == "splitted": return False - if build_config['sanitizer'] != '': + if build_config["sanitizer"] != "": return True - if build_config['build_type'] != '': + if build_config["build_type"] != "": return True return False -def get_packager_cmd(build_config, packager_path, output_path, build_version, image_version, ccache_path, pr_info): - package_type = build_config['package_type'] - comp = build_config['compiler'] - cmd = f"cd {packager_path} && ./packager --output-dir={output_path} --package-type={package_type} --compiler={comp}" +def get_packager_cmd( + build_config: BuildConfig, + packager_path: str, + output_path: str, + build_version: str, + image_version: str, + ccache_path: str, + pr_info: PRInfo, +) -> str: + package_type = build_config["package_type"] + comp = build_config["compiler"] + cmd = ( + f"cd {packager_path} && ./packager --output-dir={output_path} " + f"--package-type={package_type} --compiler={comp}" + ) - if build_config['build_type']: - cmd += ' --build-type={}'.format(build_config['build_type']) - if build_config['sanitizer']: - cmd += ' --sanitizer={}'.format(build_config['sanitizer']) - if build_config['splitted'] == 'splitted': - cmd += ' --split-binary' - if build_config['tidy'] == 'enable': - cmd += ' --clang-tidy' + if build_config["build_type"]: + cmd += " --build-type={}".format(build_config["build_type"]) + if build_config["sanitizer"]: + cmd += " --sanitizer={}".format(build_config["sanitizer"]) + if build_config["splitted"] == "splitted": + cmd += " --split-binary" + if build_config["tidy"] == "enable": + cmd += " --clang-tidy" - cmd += ' --cache=ccache' - cmd += ' --ccache_dir={}'.format(ccache_path) + cmd += " --cache=ccache" + cmd += " --ccache_dir={}".format(ccache_path) - if 'alien_pkgs' in build_config and build_config['alien_pkgs']: - if pr_info.number == 0 or 'release' in pr_info.labels: - cmd += ' --alien-pkgs rpm tgz' + if "alien_pkgs" in build_config and build_config["alien_pkgs"]: + if pr_info.number == 0 or "release" in pr_info.labels: + cmd += " --alien-pkgs rpm tgz" - cmd += ' --docker-image-version={}'.format(image_version) - cmd += ' --version={}'.format(build_version) + cmd += " --docker-image-version={}".format(image_version) + cmd += " --version={}".format(build_version) if _can_export_binaries(build_config): - cmd += ' --with-binaries=tests' + cmd += " --with-binaries=tests" return cmd -def get_image_name(build_config): - if build_config['package_type'] != 'deb': - return 'clickhouse/binary-builder' + +def get_image_name(build_config: BuildConfig) -> str: + if build_config["package_type"] != "deb": + return "clickhouse/binary-builder" else: - return 'clickhouse/deb-builder' + return "clickhouse/deb-builder" -def build_clickhouse(packager_cmd, logs_path, build_output_path): - build_log_path = os.path.join(logs_path, 'build_log.log') +def build_clickhouse( + packager_cmd: str, logs_path: str, build_output_path: str +) -> Tuple[str, bool]: + build_log_path = os.path.join(logs_path, "build_log.log") + success = False with TeePopen(packager_cmd, build_log_path) as process: retcode = process.wait() if os.path.exists(build_output_path): @@ -88,16 +106,21 @@ def build_clickhouse(packager_cmd, logs_path, build_output_path): build_results = [] if retcode == 0: - if len(build_results) != 0: + if len(build_results) > 0: + success = True logging.info("Built successfully") else: - logging.info("Success exit code, but no build artifacts => build failed") + logging.info( + "Success exit code, but no build artifacts => build failed" + ) else: logging.info("Build failed") - return build_log_path, retcode == 0 and len(build_results) > 0 + return build_log_path, success -def get_build_results_if_exists(s3_helper, s3_prefix): +def get_build_results_if_exists( + s3_helper: S3Helper, s3_prefix: str +) -> Optional[List[str]]: try: content = s3_helper.list_prefix(s3_prefix) return content @@ -105,8 +128,19 @@ def get_build_results_if_exists(s3_helper, s3_prefix): logging.info("Got exception %s listing %s", ex, s3_prefix) return None -def create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, elapsed, success): - subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True) + +def create_json_artifact( + temp_path: str, + build_name: str, + log_url: str, + build_urls: List[str], + build_config: BuildConfig, + elapsed: int, + success: bool, +): + subprocess.check_call( + f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True + ) result = { "log_url": log_url, @@ -116,48 +150,76 @@ def create_json_artifact(temp_path, build_name, log_url, build_urls, build_confi "status": success, } - json_name = "build_urls_" + build_name + '.json' + json_name = "build_urls_" + build_name + ".json" - print ("Dump json report", result, "to", json_name, "with env", "build_urls_{build_name}") + print( + "Dump json report", + result, + "to", + json_name, + "with env", + "build_urls_{build_name}", + ) - with open(os.path.join(temp_path, json_name), 'w') as build_links: + with open(os.path.join(temp_path, json_name), "w") as build_links: json.dump(result, build_links) -if __name__ == "__main__": +def get_release_or_pr( + pr_info: PRInfo, build_config: BuildConfig, version: ClickHouseVersion +) -> str: + if "release" in pr_info.labels or "release-lts" in pr_info.labels: + # for release pull requests we use branch names prefixes, not pr numbers + return pr_info.head_ref + elif pr_info.number == 0 and build_config["package_type"] != "performance": + # for pushes to master - major version, but not for performance builds + # they havily relies on a fixed path for build package and nobody going + # to deploy them somewhere, so it's ok. + return ".".join(version.as_tuple()[:2]) + # PR number for anything else + return str(pr_info.number) + + +def upload_master_static_binaries( + pr_info: PRInfo, + build_config: BuildConfig, + s3_helper: S3Helper, + build_output_path: str, +): + """Upload binary artifacts to a static S3 links""" + static_binary_name = build_config.get("static_binary_name", False) + if pr_info.number != 0: + return + elif not static_binary_name: + return + elif pr_info.base_ref != "master": + return + + s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) + binary = os.path.join(build_output_path, "clickhouse") + url = s3_helper.upload_build_file_to_s3(binary, s3_path) + print(f"::notice ::Binary static URL: {url}") + + +def main(): logging.basicConfig(level=logging.INFO) - repo_path = REPO_COPY - temp_path = TEMP_PATH - caches_path = CACHES_PATH build_check_name = sys.argv[1] build_name = sys.argv[2] build_config = get_build_config(build_check_name, build_name) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + if not os.path.exists(TEMP_PATH): + os.makedirs(TEMP_PATH) pr_info = PRInfo() - logging.info("Repo copy path %s", repo_path) + logging.info("Repo copy path %s", REPO_COPY) - gh = Github(get_best_robot_token()) - s3_helper = S3Helper('https://s3.amazonaws.com') + s3_helper = S3Helper("https://s3.amazonaws.com") - version = get_version_from_repo(repo_path) - release_or_pr = None - if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: - # for release pull requests we use branch names prefixes, not pr numbers - release_or_pr = pr_info.head_ref - elif pr_info.number == 0 and build_config['package_type'] != "performance": - # for pushes to master - major version, but not for performance builds - # they havily relies on a fixed path for build package and nobody going - # to deploy them somewhere, so it's ok. - release_or_pr = ".".join(version.as_tuple()[:2]) - else: - # PR number for anything else - release_or_pr = str(pr_info.number) + version = get_version_from_repo(REPO_COPY) + release_or_pr = get_release_or_pr(pr_info, build_config, version) s3_path_prefix = "/".join((release_or_pr, pr_info.sha, build_name)) @@ -167,14 +229,27 @@ if __name__ == "__main__": if build_results is not None and len(build_results) > 0: logging.info("Some build results found %s", build_results) build_urls = [] - log_url = '' + log_url = "" for url in build_results: - if 'build_log.log' in url: - log_url = 'https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20') + if "build_log.log" in url: + log_url = "https://s3.amazonaws.com/clickhouse-builds/" + url.replace( + "+", "%2B" + ).replace(" ", "%20") else: - build_urls.append('https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20')) - create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, 0, len(build_urls) > 0) - sys.exit(0) + build_urls.append( + "https://s3.amazonaws.com/clickhouse-builds/" + + url.replace("+", "%2B").replace(" ", "%20") + ) + create_json_artifact( + TEMP_PATH, + build_name, + log_url, + build_urls, + build_config, + 0, + len(build_urls) > 0, + ) + return image_name = get_image_name(build_config) docker_image = get_image_with_version(IMAGES_PATH, image_name) @@ -182,65 +257,93 @@ if __name__ == "__main__": logging.info("Got version from repo %s", version.get_version_string()) - version_type = 'testing' - if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: - version_type = 'stable' + version_type = "testing" + if "release" in pr_info.labels or "release-lts" in pr_info.labels: + version_type = "stable" - update_version_local(repo_path, pr_info.sha, version, version_type) + update_version_local(REPO_COPY, pr_info.sha, version, version_type) logging.info("Updated local files with version") logging.info("Build short name %s", build_name) - build_output_path = os.path.join(temp_path, build_name) + build_output_path = os.path.join(TEMP_PATH, build_name) if not os.path.exists(build_output_path): os.makedirs(build_output_path) - ccache_path = os.path.join(caches_path, build_name + '_ccache') + ccache_path = os.path.join(CACHES_PATH, build_name + "_ccache") logging.info("Will try to fetch cache for our build") - get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, temp_path) + get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, TEMP_PATH) if not os.path.exists(ccache_path): logging.info("cache was not fetched, will create empty dir") os.makedirs(ccache_path) - if build_config['package_type'] == "performance" and pr_info.number != 0: + if build_config["package_type"] == "performance" and pr_info.number != 0: # because perf tests store some information about git commits - subprocess.check_call(f"cd {repo_path} && git fetch origin master:master", shell=True) + subprocess.check_call( + f"cd {REPO_COPY} && git fetch origin master:master", shell=True + ) - packager_cmd = get_packager_cmd(build_config, os.path.join(repo_path, "docker/packager"), build_output_path, version.get_version_string(), image_version, ccache_path, pr_info) + packager_cmd = get_packager_cmd( + build_config, + os.path.join(REPO_COPY, "docker/packager"), + build_output_path, + version.get_version_string(), + image_version, + ccache_path, + pr_info, + ) logging.info("Going to run packager with %s", packager_cmd) - build_clickhouse_log = os.path.join(temp_path, "build_log") + build_clickhouse_log = os.path.join(TEMP_PATH, "build_log") if not os.path.exists(build_clickhouse_log): os.makedirs(build_clickhouse_log) start = time.time() - log_path, success = build_clickhouse(packager_cmd, build_clickhouse_log, build_output_path) + log_path, success = build_clickhouse( + packager_cmd, build_clickhouse_log, build_output_path + ) elapsed = int(time.time() - start) - subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True) + subprocess.check_call( + f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True + ) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {ccache_path}", shell=True) logging.info("Build finished with %s, log path %s", success, log_path) - logging.info("Will upload cache") - upload_ccache(ccache_path, s3_helper, pr_info.number, temp_path) + upload_ccache(ccache_path, s3_helper, pr_info.number, TEMP_PATH) if os.path.exists(log_path): - log_url = s3_helper.upload_build_file_to_s3(log_path, s3_path_prefix + "/" + os.path.basename(log_path)) + log_url = s3_helper.upload_build_file_to_s3( + log_path, s3_path_prefix + "/" + os.path.basename(log_path) + ) logging.info("Log url %s", log_url) else: logging.info("Build log doesn't exist") - build_urls = s3_helper.upload_build_folder_to_s3(build_output_path, s3_path_prefix, keep_dirs_in_s3_path=False, upload_symlinks=False) + build_urls = s3_helper.upload_build_folder_to_s3( + build_output_path, + s3_path_prefix, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) logging.info("Got build URLs %s", build_urls) - print("::notice ::Build URLs: {}".format('\n'.join(build_urls))) + print("::notice ::Build URLs: {}".format("\n".join(build_urls))) print("::notice ::Log URL: {}".format(log_url)) - create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, elapsed, success) + create_json_artifact( + TEMP_PATH, build_name, log_url, build_urls, build_config, elapsed, success + ) + + upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) # Fail build job if not successeded if not success: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index d5f8757ffdf..70fdf06d40b 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,5 +1,10 @@ #!/usr/bin/env python3 +from typing import Dict, TypeVar + +ConfValue = TypeVar("ConfValue", str, bool) +BuildConfig = Dict[str, ConfValue] + CI_CONFIG = { "build_config": { "package_release": { @@ -99,6 +104,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "amd64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -109,6 +115,7 @@ CI_CONFIG = { "build_type": "debug", "sanitizer": "", "package_type": "binary", + "static_binary_name": "debug-amd64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "enable", @@ -129,6 +136,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "macos", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -139,6 +147,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "aarch64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -149,6 +158,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "freebsd", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -159,6 +169,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "macos-aarch64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -198,6 +209,8 @@ CI_CONFIG = { ], }, "tests_config": { + # required_build - build name for artifacts + # force_tests - force success status for tests "Stateful tests (address, actions)": { "required_build": "package_asan", }, @@ -216,6 +229,10 @@ CI_CONFIG = { "Stateful tests (release, actions)": { "required_build": "package_release", }, + "Stateful tests (aarch64, actions)": { + "required_build": "package_aarch64", + "force_tests": True, + }, "Stateful tests (release, DatabaseOrdinary, actions)": { "required_build": "package_release", }, @@ -240,6 +257,10 @@ CI_CONFIG = { "Stateless tests (release, actions)": { "required_build": "package_release", }, + "Stateless tests (aarch64, actions)": { + "required_build": "package_aarch64", + "force_tests": True, + }, "Stateless tests (release, wide parts enabled, actions)": { "required_build": "package_release", }, @@ -334,4 +355,4 @@ CI_CONFIG = { "required_build": "performance", }, }, -} +} # type: dict diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 58fd8c4aece..0d8aee552f5 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -3,7 +3,7 @@ import time import logging import json -import requests +import requests # type: ignore from get_robot_token import get_parameter_from_ssm class ClickHouseHelper: diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 8396303c5a3..dd57f742ff1 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -2,10 +2,17 @@ import time from env_helper import GITHUB_REPOSITORY +from ci_config import CI_CONFIG RETRY = 5 +def override_status(status, check_name): + if CI_CONFIG["tests_config"][check_name].get("force_tests", False): + return "success" + return status + + def get_commit(gh, commit_sha, retry_count=RETRY): for i in range(retry_count): try: @@ -25,7 +32,12 @@ def post_commit_status(gh, sha, check_name, description, state, report_url): for i in range(RETRY): try: commit = get_commit(gh, sha, 1) - commit.create_status(context=check_name, description=description, state=state, target_url=report_url) + commit.create_status( + context=check_name, + description=description, + state=state, + target_url=report_url, + ) break except Exception as ex: if i == RETRY - 1: diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index e389d612f44..d698d18a58b 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 -import subprocess -import logging +import argparse import json +import logging import os -import time import shutil +import subprocess +import time +from typing import List, Tuple + from github import Github from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP @@ -12,40 +15,52 @@ from s3_helper import S3Helper from pr_info import PRInfo from get_robot_token import get_best_robot_token, get_parameter_from_ssm from upload_result_helper import upload_results -from commit_status_helper import get_commit +from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch NAME = "Push to Dockerhub (actions)" -def get_changed_docker_images(pr_info, repo_path, image_file_path): +TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") + + +def get_changed_docker_images( + pr_info: PRInfo, repo_path: str, image_file_path: str +) -> List[Tuple[str, str]]: images_dict = {} path_to_images_file = os.path.join(repo_path, image_file_path) if os.path.exists(path_to_images_file): - with open(path_to_images_file, 'r') as dict_file: + with open(path_to_images_file, "r") as dict_file: images_dict = json.load(dict_file) else: - logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path) + logging.info( + "Image file %s doesnt exists in repo %s", image_file_path, repo_path + ) - dockerhub_repo_name = 'yandex' if not images_dict: - return [], dockerhub_repo_name + return [] files_changed = pr_info.changed_files - logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed)) + logging.info( + "Changed files for PR %s @ %s: %s", + pr_info.number, + pr_info.sha, + str(files_changed), + ) changed_images = [] for dockerfile_dir, image_description in images_dict.items(): - if image_description['name'].startswith('clickhouse/'): - dockerhub_repo_name = 'clickhouse' - for f in files_changed: if f.startswith(dockerfile_dir): logging.info( - "Found changed file '%s' which affects docker image '%s' with path '%s'", - f, image_description['name'], dockerfile_dir) + "Found changed file '%s' which affects " + "docker image '%s' with path '%s'", + f, + image_description["name"], + dockerfile_dir, + ) changed_images.append(dockerfile_dir) break @@ -54,15 +69,20 @@ def get_changed_docker_images(pr_info, repo_path, image_file_path): index = 0 while index < len(changed_images): image = changed_images[index] - for dependent in images_dict[image]['dependent']: + for dependent in images_dict[image]["dependent"]: logging.info( - "Marking docker image '%s' as changed because it depends on changed docker image '%s'", - dependent, image) + "Marking docker image '%s' as changed because it " + "depends on changed docker image '%s'", + dependent, + image, + ) changed_images.append(dependent) index += 1 - if index > 100: + if index > 5 * len(images_dict): # Sanity check to prevent infinite loop. - raise RuntimeError("Too many changed docker images, this is a bug." + str(changed_images)) + raise RuntimeError( + f"Too many changed docker images, this is a bug. {changed_images}" + ) # If a dependent image was already in the list because its own files # changed, but then it was added as a dependent of a changed base, we @@ -76,140 +96,248 @@ def get_changed_docker_images(pr_info, repo_path, image_file_path): seen.add(x) no_dups_reversed.append(x) - result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] - logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result) - return result, dockerhub_repo_name + result = [(x, images_dict[x]["name"]) for x in reversed(no_dups_reversed)] + logging.info( + "Changed docker images for PR %s @ %s: '%s'", + pr_info.number, + pr_info.sha, + result, + ) + return result -def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): - logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) - build_log = None - push_log = None - with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: - cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) - retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - build_log = str(pl.name) + +def build_and_push_one_image( + path_to_dockerfile_folder: str, image_name: str, version_string: str, push: bool +) -> Tuple[bool, str]: + path = path_to_dockerfile_folder + logging.info( + "Building docker image %s with version %s from path %s", + image_name, + version_string, + path, + ) + build_log = os.path.join( + TEMP_PATH, + "build_and_push_log_{}_{}".format( + str(image_name).replace("/", "_"), version_string + ), + ) + push_arg = "" + if push: + push_arg = "--push " + + with open(build_log, "w") as bl: + cmd = ( + "docker buildx build --builder default " + f"--build-arg FROM_TAG={version_string} " + f"--build-arg BUILDKIT_INLINE_CACHE=1 " + f"--tag {image_name}:{version_string} " + f"--cache-from type=registry,ref={image_name}:{version_string} " + f"{push_arg}" + f"--progress plain {path}" + ) + logging.info("Docker command to run: %s", cmd) + retcode = subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl).wait() if retcode != 0: - return False, build_log, None - - with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: - cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) - retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - build_log = str(pl.name) - if retcode != 0: - return False, build_log, None - - logging.info("Pushing image %s to dockerhub", image_name) - - with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: - cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) - retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - push_log = str(pl.name) - if retcode != 0: - return False, build_log, push_log + return False, build_log logging.info("Processing of %s successfully finished", image_name) - return True, build_log, push_log + return True, build_log -def process_single_image(versions, path_to_dockerfile_folder, image_name): - logging.info("Image will be pushed with versions %s", ', '.join(versions)) + +def process_single_image( + versions: List[str], path_to_dockerfile_folder: str, image_name: str, push: bool +) -> List[Tuple[str, str, str]]: + logging.info("Image will be pushed with versions %s", ", ".join(versions)) result = [] for ver in versions: for i in range(5): - success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver) + success, build_log = build_and_push_one_image( + path_to_dockerfile_folder, image_name, ver, push + ) if success: - result.append((image_name + ":" + ver, build_log, push_log, 'OK')) + result.append((image_name + ":" + ver, build_log, "OK")) break - logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5) + logging.info( + "Got error will retry %s time and sleep for %s seconds", i, i * 5 + ) time.sleep(i * 5) else: - result.append((image_name + ":" + ver, build_log, push_log, 'FAIL')) + result.append((image_name + ":" + ver, build_log, "FAIL")) logging.info("Processing finished") return result -def process_test_results(s3_client, test_results, s3_path_prefix): - overall_status = 'success' +def process_test_results( + s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str +) -> Tuple[str, List[Tuple[str, str]]]: + overall_status = "success" processed_test_results = [] - for image, build_log, push_log, status in test_results: - if status != 'OK': - overall_status = 'failure' - url_part = '' + for image, build_log, status in test_results: + if status != "OK": + overall_status = "failure" + url_part = "" if build_log is not None and os.path.exists(build_log): build_url = s3_client.upload_test_report_to_s3( - build_log, - s3_path_prefix + "/" + os.path.basename(build_log)) + build_log, s3_path_prefix + "/" + os.path.basename(build_log) + ) url_part += 'build_log'.format(build_url) - if push_log is not None and os.path.exists(push_log): - push_url = s3_client.upload_test_report_to_s3( - push_log, - s3_path_prefix + "/" + os.path.basename(push_log)) - if url_part: - url_part += ', ' - url_part += 'push_log'.format(push_url) if url_part: - test_name = image + ' (' + url_part + ')' + test_name = image + " (" + url_part + ")" else: test_name = image processed_test_results.append((test_name, status)) return overall_status, processed_test_results -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Program to build changed or given docker images with all " + "dependant images. Example for local running: " + "python docker_images_check.py --no-push-images --no-reports " + "--image-path docker/packager/binary", + ) + + parser.add_argument( + "--suffix", + type=str, + help="suffix for all built images tags and resulting json file; the parameter " + "significantly changes the script behavior, e.g. changed_images.json is called " + "changed_images_{suffix}.json and contains list of all tags", + ) + parser.add_argument( + "--repo", + type=str, + default="clickhouse", + help="docker hub repository prefix", + ) + parser.add_argument( + "--image-path", + type=str, + action="append", + help="list of image paths to build instead of using pr_info + diff URL, " + "e.g. 'docker/packager/binary'", + ) + parser.add_argument( + "--no-reports", + action="store_true", + help="don't push reports to S3 and github", + ) + parser.add_argument( + "--no-push-images", + action="store_true", + help="don't push images to docker hub", + ) + + return parser.parse_args() + + +def main(): + logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() + args = parse_args() + if args.suffix: + global NAME + NAME += f" {args.suffix}" + changed_json = os.path.join(TEMP_PATH, f"changed_images_{args.suffix}.json") + else: + changed_json = os.path.join(TEMP_PATH, "changed_images.json") + + push = not args.no_push_images + if push: + subprocess.check_output( # pylint: disable=unexpected-keyword-arg + "docker login --username 'robotclickhouse' --password-stdin", + input=get_parameter_from_ssm("dockerhub_robot_password"), + encoding="utf-8", + shell=True, + ) + repo_path = GITHUB_WORKSPACE - temp_path = os.path.join(RUNNER_TEMP, 'docker_images_check') - dockerhub_password = get_parameter_from_ssm('dockerhub_robot_password') - if os.path.exists(temp_path): - shutil.rmtree(temp_path) + if os.path.exists(TEMP_PATH): + shutil.rmtree(TEMP_PATH) + os.makedirs(TEMP_PATH) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + if args.image_path: + pr_info = PRInfo() + pr_info.changed_files = set(i for i in args.image_path) + else: + pr_info = PRInfo(need_changed_files=True) - pr_info = PRInfo(need_changed_files=True) - changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") - logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) - pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + changed_images = get_changed_docker_images(pr_info, repo_path, "docker/images.json") + logging.info( + "Has changed images %s", ", ".join([str(image[0]) for image in changed_images]) + ) + pr_commit_version = str(pr_info.number) + "-" + pr_info.sha + # The order is important, PR number is used as cache during the build versions = [str(pr_info.number), pr_commit_version] + result_version = pr_commit_version if pr_info.number == 0: - versions.append("latest") + # First get the latest for cache + versions.insert(0, "latest") - subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + if args.suffix: + # We should build architecture specific images separately and merge a + # manifest lately in a different script + versions = [f"{v}-{args.suffix}" for v in versions] + # changed_images_{suffix}.json should contain all changed images + result_version = versions result_images = {} images_processing_result = [] for rel_path, image_name in changed_images: full_path = os.path.join(repo_path, rel_path) - images_processing_result += process_single_image(versions, full_path, image_name) - result_images[image_name] = pr_commit_version + images_processing_result += process_single_image( + versions, full_path, image_name, push + ) + result_images[image_name] = result_version if changed_images: - description = "Updated " + ','.join([im[1] for im in changed_images]) + description = "Updated " + ",".join([im[1] for im in changed_images]) else: description = "Nothing to update" if len(description) >= 140: description = description[:136] + "..." - s3_helper = S3Helper('https://s3.amazonaws.com') - - s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) - - ch_helper = ClickHouseHelper() - url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) - - with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: + with open(changed_json, "w") as images_file: json.dump(result_images, images_file) - print("::notice ::Report url: {}".format(url)) - print("::set-output name=url_output::\"{}\"".format(url)) - gh = Github(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=url) + s3_helper = S3Helper("https://s3.amazonaws.com") - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, status, stopwatch.duration_seconds, stopwatch.start_time_str, url, NAME) + s3_path_prefix = ( + str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_") + ) + status, test_results = process_test_results( + s3_helper, images_processing_result, s3_path_prefix + ) + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) + + print("::notice ::Report url: {}".format(url)) + print('::set-output name=url_output::"{}"'.format(url)) + + if args.no_reports: + return + + gh = Github(get_best_robot_token()) + post_commit_status(gh, pr_info.sha, NAME, description, status, url) + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + url, + NAME, + ) + ch_helper = ClickHouseHelper() ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py new file mode 100644 index 00000000000..c6814b911ff --- /dev/null +++ b/tests/ci/docker_manifests_merge.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +import argparse +import json +import logging +import os +import subprocess + +from typing import List, Dict, Tuple +from github import Github + +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from env_helper import RUNNER_TEMP +from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from pr_info import PRInfo +from s3_helper import S3Helper +from stopwatch import Stopwatch +from upload_result_helper import upload_results + +NAME = "Push multi-arch images to Dockerhub (actions)" +CHANGED_IMAGES = "changed_images_{}.json" +Images = Dict[str, List[str]] + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="The program gets images from changed_images_*.json, merges imeges " + "with different architectures into one manifest and pushes back to docker hub", + ) + + parser.add_argument( + "--suffix", + dest="suffixes", + type=str, + required=True, + action="append", + help="suffixes for existing images' tags. More than two should be given", + ) + parser.add_argument( + "--path", + type=str, + default=RUNNER_TEMP, + help="path to changed_images_*.json files", + ) + parser.add_argument( + "--no-reports", + action="store_true", + help="don't push reports to S3 and github", + ) + parser.add_argument( + "--no-push-images", + action="store_true", + help="don't push images to docker hub", + ) + + args = parser.parse_args() + if len(args.suffixes) < 2: + raise parser.error("more than two --suffix should be given") + + return args + + +def load_images(path: str, suffix: str) -> Images: + with open(os.path.join(path, CHANGED_IMAGES.format(suffix)), "r") as images: + return json.load(images) + + +def strip_suffix(suffix: str, images: Images) -> Images: + result = {} + for image, versions in images.items(): + for v in versions: + if not v.endswith(f"-{suffix}"): + raise ValueError( + f"version {image}:{v} does not contain suffix {suffix}" + ) + result[image] = [v[: -len(suffix) - 1] for v in versions] + + return result + + +def check_sources(to_merge: Dict[str, Images]) -> Images: + result = {} # type: Images + first_suffix = "" + for suffix, images in to_merge.items(): + if not result: + first_suffix = suffix + result = strip_suffix(suffix, images) + continue + if not result == strip_suffix(suffix, images): + raise ValueError( + f"images in {images} are not equal to {to_merge[first_suffix]}" + ) + + return result + + +def get_changed_images(images: Images) -> Dict[str, str]: + """The original json format is {"image": "tag"}, so the output artifact is + produced here. The latest version is {PR_NUMBER}-{SHA1} + """ + return {k: v[-1] for k, v in images.items()} + + +def merge_images(to_merge: Dict[str, Images]) -> Dict[str, List[List[str]]]: + """The function merges image-name:version-suffix1 and image-name:version-suffix2 + into image-name:version""" + suffixes = to_merge.keys() + result_images = check_sources(to_merge) + merge = {} # type: Dict[str, List[List[str]]] + + for image, versions in result_images.items(): + merge[image] = [] + for i, v in enumerate(versions): + merged_v = [v] # type: List[str] + for suf in suffixes: + merged_v.append(to_merge[suf][image][i]) + merge[image].append(merged_v) + + return merge + + +def create_manifest(image: str, tags: List[str], push: bool) -> Tuple[str, str]: + tag = tags[0] + manifest = f"{image}:{tag}" + cmd = "docker manifest create --amend {}".format( + " ".join((f"{image}:{t}" for t in tags)) + ) + logging.info("running: %s", cmd) + popen = subprocess.Popen( + cmd, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + universal_newlines=True, + ) + retcode = popen.wait() + if retcode != 0: + output = popen.stdout.read() # type: ignore + logging.error("failed to create manifest for %s:\n %s\n", manifest, output) + return manifest, "FAIL" + if not push: + return manifest, "OK" + + cmd = f"docker manifest push {manifest}" + logging.info("running: %s", cmd) + popen = subprocess.Popen( + cmd, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + universal_newlines=True, + ) + retcode = popen.wait() + if retcode != 0: + output = popen.stdout.read() # type: ignore + logging.error("failed to push %s:\n %s\n", manifest, output) + return manifest, "FAIL" + + return manifest, "OK" + + +def main(): + logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() + + args = parse_args() + push = not args.no_push_images + if push: + subprocess.check_output( # pylint: disable=unexpected-keyword-arg + "docker login --username 'robotclickhouse' --password-stdin", + input=get_parameter_from_ssm("dockerhub_robot_password"), + encoding="utf-8", + shell=True, + ) + + to_merge = {} + for suf in args.suffixes: + to_merge[suf] = load_images(args.path, suf) + + changed_images = get_changed_images(check_sources(to_merge)) + + os.environ["DOCKER_CLI_EXPERIMENTAL"] = "enabled" + merged = merge_images(to_merge) + + status = "success" + test_results = [] # type: List[Tuple[str, str]] + for image, versions in merged.items(): + for tags in versions: + manifest, test_result = create_manifest(image, tags, push) + test_results.append((manifest, test_result)) + if test_result != "OK": + status = "failure" + + with open(os.path.join(args.path, "changed_images.json"), "w") as ci: + json.dump(changed_images, ci) + + pr_info = PRInfo() + s3_helper = S3Helper("https://s3.amazonaws.com") + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) + + print("::notice ::Report url: {}".format(url)) + print('::set-output name=url_output::"{}"'.format(url)) + + if args.no_reports: + return + + if changed_images: + description = "Updated " + ", ".join(changed_images.keys()) + else: + description = "Nothing to update" + + if len(description) >= 140: + description = description[:136] + "..." + + gh = Github(get_best_robot_token()) + post_commit_status(gh, pr_info.sha, NAME, description, status, url) + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + url, + NAME, + ) + ch_helper = ClickHouseHelper() + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index 2daa75f9663..23e90aa5b60 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -57,6 +57,7 @@ if __name__ == "__main__": cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" run_log_path = os.path.join(test_output, 'runlog.log') + logging.info("Running command: '%s'", cmd) with TeePopen(cmd, run_log_path) as process: retcode = process.wait() diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 4419ba1c920..7220b86a482 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -15,7 +15,7 @@ from pr_info import PRInfo from build_download_helper import download_all_deb_packages from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit +from commit_status_helper import post_commit_status, get_commit, override_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper @@ -197,7 +197,9 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) s3_helper = S3Helper('https://s3.amazonaws.com') + state, description, test_results, additional_logs = process_results(result_path, server_log_path) + state = override_status(state, check_name) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index db37ee311c5..fae277fe319 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import boto3 -from github import Github +import boto3 # type: ignore +from github import Github # type: ignore def get_parameter_from_ssm(name, decrypt=True, client=None): if not client: diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index dff0a7d715e..1b63af30b59 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -1,54 +1,97 @@ #!/usr/bin/env python3 -import requests import argparse -import jwt import sys import json import time from collections import namedtuple + +import jwt +import requests import boto3 +from botocore.exceptions import ClientError + def get_dead_runners_in_ec2(runners): - ids = {runner.name: runner for runner in runners if runner.offline == True and runner.busy == False} + ids = { + runner.name: runner + for runner in runners + # Only `i-deadbead123` are valid names for an instance ID + if runner.offline and not runner.busy and runner.name.startswith("i-") + } + result_to_delete = [ + runner + for runner in runners + if not ids.get(runner.name) and runner.offline and not runner.busy + ] if not ids: return [] - client = boto3.client('ec2') + client = boto3.client("ec2") + + i = 0 + inc = 100 + + print("Checking ids", ids.keys()) + instances_statuses = [] + while i < len(ids.keys()): + try: + instances_statuses.append( + client.describe_instance_status( + InstanceIds=list(ids.keys())[i : i + inc] + ) + ) + i += inc + except ClientError as e: + # The list of non-existent instances is in the message: + # The instance IDs 'i-069b1c256c06cf4e3, i-0f26430432b044035, + # i-0faa2ff44edbc147e, i-0eccf2514585045ec, i-0ee4ee53e0daa7d4a, + # i-07928f15acd473bad, i-0eaddda81298f9a85' do not exist + message = e.response["Error"]["Message"] + if message.startswith("The instance IDs '") and message.endswith( + "' do not exist" + ): + non_existent = message[18:-14].split(", ") + for n in non_existent: + result_to_delete.append(ids.pop(n)) + else: + raise - print("Checking ids", list(ids.keys())) - instances_statuses = client.describe_instance_status(InstanceIds=list(ids.keys())) found_instances = set([]) print("Response", instances_statuses) - for instance_status in instances_statuses['InstanceStatuses']: - if instance_status['InstanceState']['Name'] in ('pending', 'running'): - found_instances.add(instance_status['InstanceId']) + for instances_status in instances_statuses: + for instance_status in instances_status["InstanceStatuses"]: + if instance_status["InstanceState"]["Name"] in ("pending", "running"): + found_instances.add(instance_status["InstanceId"]) print("Found instances", found_instances) - result_to_delete = [] + for runner in result_to_delete: + print("Instance", runner.name, "is not alive, going to remove it") for instance_id, runner in ids.items(): if instance_id not in found_instances: print("Instance", instance_id, "is not alive, going to remove it") result_to_delete.append(runner) return result_to_delete + def get_key_and_app_from_aws(): import boto3 + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( - service_name='secretsmanager', + service_name="secretsmanager", ) - get_secret_value_response = client.get_secret_value( - SecretId=secret_name - ) - data = json.loads(get_secret_value_response['SecretString']) - return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + get_secret_value_response = client.get_secret_value(SecretId=secret_name) + data = json.loads(get_secret_value_response["SecretString"]) + return data["clickhouse-app-key"], int(data["clickhouse-app-id"]) + def handler(event, context): private_key, app_id = get_key_and_app_from_aws() main(private_key, app_id, True, True) + def get_installation_id(jwt_token): headers = { "Authorization": f"Bearer {jwt_token}", @@ -57,54 +100,81 @@ def get_installation_id(jwt_token): response = requests.get("https://api.github.com/app/installations", headers=headers) response.raise_for_status() data = response.json() - return data[0]['id'] + return data[0]["id"] + def get_access_token(jwt_token, installation_id): headers = { "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github.v3+json", } - response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response = requests.post( + f"https://api.github.com/app/installations/{installation_id}/access_tokens", + headers=headers, + ) response.raise_for_status() data = response.json() - return data['token'] + return data["token"] -RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) +RunnerDescription = namedtuple( + "RunnerDescription", ["id", "name", "tags", "offline", "busy"] +) + def list_runners(access_token): headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", } - response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers) + response = requests.get( + "https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", + headers=headers, + ) response.raise_for_status() data = response.json() - total_runners = data['total_count'] - runners = data['runners'] + total_runners = data["total_count"] + runners = data["runners"] total_pages = int(total_runners / 100 + 1) print("Total pages", total_pages) for i in range(2, total_pages + 1): - response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers) + response = requests.get( + "https://api.github.com/orgs/ClickHouse/actions/runners" + f"?page={i}&per_page=100", + headers=headers, + ) response.raise_for_status() data = response.json() - runners += data['runners'] + runners += data["runners"] print("Total runners", len(runners)) result = [] for runner in runners: - tags = [tag['name'] for tag in runner['labels']] - desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, - offline=runner['status']=='offline', busy=runner['busy']) + tags = [tag["name"] for tag in runner["labels"]] + desc = RunnerDescription( + id=runner["id"], + name=runner["name"], + tags=tags, + offline=runner["status"] == "offline", + busy=runner["busy"], + ) result.append(desc) return result + def group_runners_by_tag(listed_runners): result = {} - RUNNER_TYPE_LABELS = ['style-checker', 'builder', 'func-tester', 'stress-tester', 'fuzzer-unit-tester'] + RUNNER_TYPE_LABELS = [ + "builder", + "func-tester", + "func-tester-aarch64", + "fuzzer-unit-tester", + "stress-tester", + "style-checker", + ] for runner in listed_runners: for tag in runner.tags: if tag in RUNNER_TYPE_LABELS: @@ -113,57 +183,72 @@ def group_runners_by_tag(listed_runners): result[tag].append(runner) break else: - if 'unlabeled' not in result: - result['unlabeled'] = [] - result['unlabeled'].append(runner) + if "unlabeled" not in result: + result["unlabeled"] = [] + result["unlabeled"].append(runner) return result def push_metrics_to_cloudwatch(listed_runners, namespace): - client = boto3.client('cloudwatch') + client = boto3.client("cloudwatch") metrics_data = [] - busy_runners = sum(1 for runner in listed_runners if runner.busy and not runner.offline) - metrics_data.append({ - 'MetricName': 'BusyRunners', - 'Value': busy_runners, - 'Unit': 'Count', - }) + busy_runners = sum( + 1 for runner in listed_runners if runner.busy and not runner.offline + ) + metrics_data.append( + { + "MetricName": "BusyRunners", + "Value": busy_runners, + "Unit": "Count", + } + ) total_active_runners = sum(1 for runner in listed_runners if not runner.offline) - metrics_data.append({ - 'MetricName': 'ActiveRunners', - 'Value': total_active_runners, - 'Unit': 'Count', - }) + metrics_data.append( + { + "MetricName": "ActiveRunners", + "Value": total_active_runners, + "Unit": "Count", + } + ) total_runners = len(listed_runners) - metrics_data.append({ - 'MetricName': 'TotalRunners', - 'Value': total_runners, - 'Unit': 'Count', - }) + metrics_data.append( + { + "MetricName": "TotalRunners", + "Value": total_runners, + "Unit": "Count", + } + ) if total_active_runners == 0: busy_ratio = 100 else: busy_ratio = busy_runners / total_active_runners * 100 - metrics_data.append({ - 'MetricName': 'BusyRunnersRatio', - 'Value': busy_ratio, - 'Unit': 'Percent', - }) + metrics_data.append( + { + "MetricName": "BusyRunnersRatio", + "Value": busy_ratio, + "Unit": "Percent", + } + ) client.put_metric_data(Namespace=namespace, MetricData=metrics_data) + def delete_runner(access_token, runner): headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", } - response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response = requests.delete( + f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", + headers=headers, + ) response.raise_for_status() print(f"Response code deleting {runner.name} is {response.status_code}") return response.status_code == 204 + def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_runners): payload = { "iat": int(time.time()) - 60, @@ -179,11 +264,11 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru for group, group_runners in grouped_runners.items(): if push_to_cloudwatch: print(group) - push_metrics_to_cloudwatch(group_runners, 'RunnersMetrics/' + group) + push_metrics_to_cloudwatch(group_runners, "RunnersMetrics/" + group) else: print(group, f"({len(group_runners)})") for runner in group_runners: - print('\t', runner) + print("\t", runner) if delete_offline_runners: print("Going to delete offline runners") @@ -192,26 +277,46 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru print("Deleting runner", runner) delete_runner(access_token, runner) + if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Get list of runners and their states') - parser.add_argument('-p', '--private-key-path', help='Path to file with private key') - parser.add_argument('-k', '--private-key', help='Private key') - parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) - parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store') - parser.add_argument('--delete-offline', action='store_true', help='Remove offline runners') + parser = argparse.ArgumentParser(description="Get list of runners and their states") + parser.add_argument( + "-p", "--private-key-path", help="Path to file with private key" + ) + parser.add_argument("-k", "--private-key", help="Private key") + parser.add_argument( + "-a", "--app-id", type=int, help="GitHub application ID", required=True + ) + parser.add_argument( + "--push-to-cloudwatch", + action="store_true", + help="Push metrics for active and busy runners to cloudwatch", + ) + parser.add_argument( + "--delete-offline", action="store_true", help="Remove offline runners" + ) args = parser.parse_args() if not args.private_key_path and not args.private_key: - print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + print( + "Either --private-key-path or --private-key must be specified", + file=sys.stderr, + ) if args.private_key_path and args.private_key: - print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + print( + "Either --private-key-path or --private-key must be specified", + file=sys.stderr, + ) if args.private_key: private_key = args.private_key - else: - with open(args.private_key_path, 'r') as key_file: + elif args.private_key_path: + with open(args.private_key_path, "r") as key_file: private_key = key_file.read() + else: + print("Attempt to get key and id from AWS secret manager") + private_key, args.app_id = get_key_and_app_from_aws() main(private_key, args.app_id, args.push_to_cloudwatch, args.delete_offline) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 48464439dbc..a155786d815 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,8 +2,8 @@ import json import os -import requests -from unidiff import PatchSet +import requests # type: ignore +from unidiff import PatchSet # type: ignore from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID, GITHUB_EVENT_PATH @@ -38,9 +38,18 @@ class PRInfo: with open(GITHUB_EVENT_PATH, 'r', encoding='utf-8') as event_file: github_event = json.load(event_file) else: - github_event = {'commits': 1, 'after': 'HEAD', 'ref': None} + github_event = { + 'commits': 1, + 'before': 'HEAD~', + 'after': 'HEAD', + 'ref': None, + } self.event = github_event self.changed_files = set([]) + self.body = "" + ref = github_event.get("ref", "refs/head/master") + if ref.startswith('refs/heads/'): + ref = ref[11:] # workflow completed event, used for PRs only if 'action' in github_event and github_event['action'] == 'completed': @@ -67,6 +76,7 @@ class PRInfo: self.base_name = github_event['pull_request']['base']['repo']['full_name'] self.head_ref = github_event['pull_request']['head']['ref'] self.head_name = github_event['pull_request']['head']['repo']['full_name'] + self.body = github_event['pull_request']['body'] if labels_from_api: response = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/issues/{self.number}/labels") @@ -90,13 +100,14 @@ class PRInfo: self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY - if pull_request is None or pull_request['state'] == 'closed': # it's merged PR to master + if pull_request is None or pull_request['state'] == 'closed': + # it's merged PR to master self.number = 0 self.labels = {} - self.pr_html_url = f"{repo_prefix}/commits/master" - self.base_ref = "master" + self.pr_html_url = f"{repo_prefix}/commits/{ref}" + self.base_ref = ref self.base_name = self.repo_full_name - self.head_ref = "master" + self.head_ref = ref self.head_name = self.repo_full_name self.diff_url = \ f"https://api.github.com/repos/{GITHUB_REPOSITORY}/compare/{github_event['before']}...{self.sha}" @@ -126,10 +137,10 @@ class PRInfo: self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY - self.pr_html_url = f"{repo_prefix}/commits/master" - self.base_ref = "master" + self.pr_html_url = f"{repo_prefix}/commits/{ref}" + self.base_ref = ref self.base_name = self.repo_full_name - self.head_ref = "master" + self.head_ref = ref self.head_name = self.repo_full_name if need_changed_files: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index c7156dbef26..3e93d240256 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import sys import logging +import re from github import Github from env_helper import GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL @@ -8,10 +9,10 @@ from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit -NAME = 'Run Check (actions)' +NAME = "Run Check (actions)" TRUSTED_ORG_IDS = { - 7409213, # yandex + 7409213, # yandex 28471076, # altinity 54801242, # clickhouse } @@ -22,55 +23,59 @@ DO_NOT_TEST_LABEL = "do not test" # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in # a trusted org, to save GitHub API calls. -TRUSTED_CONTRIBUTORS = {e.lower() for e in [ - "achimbab", - "adevyatova ", # DOCSUP - "Algunenano", # Raúl Marín, Tinybird - "AnaUvarova", # DOCSUP - "anauvarova", # technical writer, Yandex - "annvsh", # technical writer, Yandex - "atereh", # DOCSUP - "azat", - "bharatnc", # Newbie, but already with many contributions. - "bobrik", # Seasoned contributor, CloundFlare - "BohuTANG", - "codyrobert", # Flickerbox engineer - "cwurm", # Employee - "damozhaeva", # DOCSUP - "den-crane", - "flickerbox-tom", # Flickerbox - "gyuton", # technical writer, Yandex - "hagen1778", # Roman Khavronenko, seasoned contributor - "hczhcz", - "hexiaoting", # Seasoned contributor - "ildus", # adjust, ex-pgpro - "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto - "ka1bi4", # DOCSUP - "kirillikoff", # DOCSUP - "kitaisreal", # Seasoned contributor - "kreuzerkrieg", - "lehasm", # DOCSUP - "michon470", # DOCSUP - "MyroTk", # Tester in Altinity - "myrrc", # Michael Kot, Altinity - "nikvas0", - "nvartolomei", - "olgarev", # DOCSUP - "otrazhenia", # Yandex docs contractor - "pdv-ru", # DOCSUP - "podshumok", # cmake expert from QRator Labs - "s-mx", # Maxim Sabyanin, former employee, present contributor - "sevirov", # technical writer, Yandex - "spongedu", # Seasoned contributor - "ucasFL", # Amos Bird's friend - "vdimir", # Employee - "vzakaznikov", - "YiuRULE", - "zlobober", # Developer of YT - "ilejn", # Arenadata, responsible for Kerberized Kafka - "thomoco", # ClickHouse - "BoloniniD", # Seasoned contributor, HSE -]} +TRUSTED_CONTRIBUTORS = { + e.lower() + for e in [ + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "amosbird", + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloudFlare + "BohuTANG", + "codyrobert", # Flickerbox engineer + "cwurm", # Employee + "damozhaeva", # DOCSUP + "den-crane", + "flickerbox-tom", # Flickerbox + "gyuton", # technical writer, Yandex + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kitaisreal", # Seasoned contributor + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasFL", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober", # Developer of YT + "ilejn", # Arenadata, responsible for Kerberized Kafka + "thomoco", # ClickHouse + "BoloniniD", # Seasoned contributor, HSE + ] +} def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): @@ -82,33 +87,120 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): for org_id in pr_user_orgs: if org_id in TRUSTED_ORG_IDS: - logging.info("Org '%s' is trusted; will mark user %s as trusted", org_id, pr_user_login) + logging.info( + "Org '%s' is trusted; will mark user %s as trusted", + org_id, + pr_user_login, + ) return True logging.info("Org '%s' is not trusted", org_id) return False + # Returns whether we should look into individual checks for this PR. If not, it # can be skipped entirely. def should_run_checks_for_pr(pr_info): # Consider the labels and whether the user is trusted. print("Got labels", pr_info.labels) - force_labels = set(['force tests']).intersection(pr_info.labels) + force_labels = set(["force tests"]).intersection(pr_info.labels) if force_labels: - return True, "Labeled '{}'".format(', '.join(force_labels)) + return True, "Labeled '{}'".format(", ".join(force_labels)) - if 'do not test' in pr_info.labels: + if "do not test" in pr_info.labels: return False, "Labeled 'do not test'" - if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): + if "can be tested" not in pr_info.labels and not pr_is_by_trusted_user( + pr_info.user_login, pr_info.user_orgs + ): return False, "Needs 'can be tested' label" - if 'release' in pr_info.labels or 'pr-backport' in pr_info.labels or 'pr-cherrypick' in pr_info.labels: + if ( + "release" in pr_info.labels + or "pr-backport" in pr_info.labels + or "pr-cherrypick" in pr_info.labels + ): return False, "Don't try new checks for release/backports/cherry-picks" return True, "No special conditions apply" +def check_pr_description(pr_info): + description = pr_info.body + + lines = list( + map(lambda x: x.strip(), description.split("\n") if description else []) + ) + lines = [re.sub(r"\s+", " ", line) for line in lines] + + category = "" + entry = "" + + i = 0 + while i < len(lines): + if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category + # itself. Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + + # Should not have more than one category. Require empty line + # after the first found category. + if i >= len(lines): + break + if lines[i]: + second_category = re.sub(r"^[-*\s]*", "", lines[i]) + result_status = ( + "More than one changelog category specified: '" + + category + + "', '" + + second_category + + "'" + ) + return result_status[:140] + + elif re.match( + r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. + # Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + # Don't accept changelog entries like '...'. + entry = re.sub(r"[#>*_.\- ]", "", entry) + else: + i += 1 + + if not category: + return "Changelog category is empty" + + # Filter out the PR categories that are not for changelog. + if re.match( + r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", + category, + ): + return "" + + if not entry: + return f"Changelog entry required for category '{category}'" + + return "" + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) @@ -116,15 +208,40 @@ if __name__ == "__main__": can_run, description = should_run_checks_for_pr(pr_info) gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) + + description_report = check_pr_description(pr_info)[:139] + if description_report: + print("::notice ::Cannot run, description does not match the template") + url = ( + f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" + "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1" + ) + commit.create_status( + context=NAME, + description=description_report, + state="failure", + target_url=url, + ) + sys.exit(1) + url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" if not can_run: print("::notice ::Cannot run") - commit.create_status(context=NAME, description=description, state="failure", target_url=url) + commit.create_status( + context=NAME, description=description, state="failure", target_url=url + ) sys.exit(1) else: - if 'pr-documentation' in pr_info.labels or 'pr-doc-fix' in pr_info.labels: - commit.create_status(context=NAME, description="Skipping checks for documentation", state="success", target_url=url) + if "pr-documentation" in pr_info.labels or "pr-doc-fix" in pr_info.labels: + commit.create_status( + context=NAME, + description="Skipping checks for documentation", + state="success", + target_url=url, + ) print("::notice ::Can run, but it's documentation PR, skipping") else: print("::notice ::Can run") - commit.create_status(context=NAME, description=description, state="pending", target_url=url) + commit.create_status( + context=NAME, description=description, state="pending", target_url=url + ) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 753f036a8d7..902b97fdb95 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -7,7 +7,7 @@ import shutil import time from multiprocessing.dummy import Pool -import boto3 +import boto3 # type: ignore from env_helper import S3_TEST_REPORTS_BUCKET, S3_BUILDS_BUCKET, RUNNER_TEMP, CI from compress_files import compress_file_fast diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index e9d5a1bacdb..8490b33dd22 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -14,7 +14,11 @@ from get_robot_token import get_best_robot_token from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from clickhouse_helper import ( + ClickHouseHelper, + mark_flaky_tests, + prepare_tests_results_for_clickhouse, +) from stopwatch import Stopwatch from rerun_helper import RerunHelper @@ -25,17 +29,22 @@ def process_result(result_folder): test_results = [] additional_files = [] # Just upload all files from result_folder. - # If task provides processed results, then it's responsible for content of result_folder. + # If task provides processed results, then it's responsible + # for content of result_folder. if os.path.exists(result_folder): - test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + test_files = [ + f + for f in os.listdir(result_folder) + if os.path.isfile(os.path.join(result_folder, f)) + ] additional_files = [os.path.join(result_folder, f) for f in test_files] status = [] status_path = os.path.join(result_folder, "check_status.tsv") if os.path.exists(status_path): logging.info("Found test_results.tsv") - with open(status_path, 'r', encoding='utf-8') as status_file: - status = list(csv.reader(status_file, delimiter='\t')) + with open(status_path, "r", encoding="utf-8") as status_file: + status = list(csv.reader(status_file, delimiter="\t")) if len(status) != 1 or len(status[0]) != 2: logging.info("Files in result folder %s", os.listdir(result_folder)) return "error", "Invalid check_status.tsv", test_results, additional_files @@ -43,7 +52,7 @@ def process_result(result_folder): try: results_path = os.path.join(result_folder, "test_results.tsv") - test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + test_results = list(csv.reader(open(results_path, "r"), delimiter="\t")) if len(test_results) == 0: raise Exception("Empty results") @@ -60,7 +69,7 @@ if __name__ == "__main__": stopwatch = Stopwatch() repo_path = GITHUB_WORKSPACE - temp_path = os.path.join(RUNNER_TEMP, 'style_check') + temp_path = os.path.join(RUNNER_TEMP, "style_check") pr_info = PRInfo() @@ -74,17 +83,32 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - docker_image = get_image_with_version(temp_path, 'clickhouse/style-test') - s3_helper = S3Helper('https://s3.amazonaws.com') + docker_image = get_image_with_version(temp_path, "clickhouse/style-test") + s3_helper = S3Helper("https://s3.amazonaws.com") - subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) + subprocess.check_output( + f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " + f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " + f"{docker_image}", + shell=True, + ) state, description, test_results, additional_files = process_result(temp_path) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, NAME, test_results) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME) + report_url = upload_results( + s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME + ) print("::notice ::Report url: {}".format(report_url)) post_commit_status(gh, pr_info.sha, NAME, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME) + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + state, + stopwatch.duration_seconds, + stopwatch.start_time_str, + report_url, + NAME, + ) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index cbb915e6de7..20302dacb97 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -15,11 +15,19 @@ class TeePopen: self.command = command self.log_file = log_file self.env = env + self.process = None def __enter__(self): - # pylint: disable=W0201 - self.process = Popen(self.command, shell=True, universal_newlines=True, env=self.env, stderr=STDOUT, stdout=PIPE, bufsize=1) - self.log_file = open(self.log_file, 'w', encoding='utf-8') + self.process = Popen( + self.command, + shell=True, + universal_newlines=True, + env=self.env, + stderr=STDOUT, + stdout=PIPE, + bufsize=1, + ) + self.log_file = open(self.log_file, "w", encoding="utf-8") return self def __exit__(self, t, value, traceback): diff --git a/tests/ci/worker/ubuntu_ami_for_ci.sh b/tests/ci/worker/ubuntu_ami_for_ci.sh index 3fabbb1f8a4..23d3b18c810 100644 --- a/tests/ci/worker/ubuntu_ami_for_ci.sh +++ b/tests/ci/worker/ubuntu_ami_for_ci.sh @@ -28,6 +28,7 @@ apt-get update apt-get install --yes --no-install-recommends \ apt-transport-https \ + binfmt-support \ build-essential \ ca-certificates \ curl \ @@ -37,6 +38,7 @@ apt-get install --yes --no-install-recommends \ pigz \ python3-dev \ python3-pip \ + qemu-user-static \ unzip curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg @@ -61,6 +63,10 @@ EOT systemctl restart docker +# buildx builder is user-specific +sudo -u ubuntu docker buildx version +sudo -u ubuntu docker buildx create --use --name default-builder + pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory mkdir -p $RUNNER_HOME && cd $RUNNER_HOME diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 396431a2e5f..988feef2f6d 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -9,7 +9,7 @@ import jwt import requests import boto3 -API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse' +API_URL = "https://api.github.com/repos/ClickHouse/ClickHouse" SUSPICIOUS_CHANGED_FILES_NUMBER = 200 @@ -25,76 +25,98 @@ SUSPICIOUS_PATTERNS = [ MAX_RETRY = 5 MAX_WORKFLOW_RERUN = 7 -WorkflowDescription = namedtuple('WorkflowDescription', - ['name', 'action', 'run_id', 'event', 'workflow_id', 'conclusion', 'status', 'api_url', - 'fork_owner_login', 'fork_branch', 'rerun_url', 'jobs_url', 'attempt', 'url']) - -TRUSTED_WORKFLOW_IDS = { - 14586616, # Cancel workflows, always trusted -} +WorkflowDescription = namedtuple( + "WorkflowDescription", + [ + "name", + "action", + "run_id", + "event", + "workflow_id", + "conclusion", + "status", + "api_url", + "fork_owner_login", + "fork_branch", + "rerun_url", + "jobs_url", + "attempt", + "url", + ], +) +# See https://api.github.com/orgs/{name} TRUSTED_ORG_IDS = { - 7409213, # yandex + 7409213, # yandex 28471076, # altinity 54801242, # clickhouse } +# See {API_URL}/actions/workflows +TRUSTED_WORKFLOW_IDS = { + 14586616, # Cancel workflows, always trusted +} + NEED_RERUN_WORKFLOWS = { - 13241696, # PR - 14738810, # DocsRelease - 15834118, # Docs - 15522500, # MasterCI - 15516108, # ReleaseCI - 15797242, # BackportPR + 14738810, # DocsRelease + 15834118, # Docs + 15522500, # MasterCI + 15516108, # ReleaseCI + 15797242, # BackportPR + 16441423, # PullRequestCI } # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in # a trusted org, to save GitHub API calls. -TRUSTED_CONTRIBUTORS = {e.lower() for e in [ - "achimbab", - "adevyatova ", # DOCSUP - "Algunenano", # Raúl Marín, Tinybird - "AnaUvarova", # DOCSUP - "anauvarova", # technical writer, Yandex - "annvsh", # technical writer, Yandex - "atereh", # DOCSUP - "azat", - "bharatnc", # Newbie, but already with many contributions. - "bobrik", # Seasoned contributor, CloundFlare - "BohuTANG", - "cwurm", # Employee - "damozhaeva", # DOCSUP - "den-crane", - "gyuton", # DOCSUP - "hagen1778", # Roman Khavronenko, seasoned contributor - "hczhcz", - "hexiaoting", # Seasoned contributor - "ildus", # adjust, ex-pgpro - "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto - "ka1bi4", # DOCSUP - "kirillikoff", # DOCSUP - "kreuzerkrieg", - "lehasm", # DOCSUP - "michon470", # DOCSUP - "MyroTk", # Tester in Altinity - "myrrc", # Michael Kot, Altinity - "nikvas0", - "nvartolomei", - "olgarev", # DOCSUP - "otrazhenia", # Yandex docs contractor - "pdv-ru", # DOCSUP - "podshumok", # cmake expert from QRator Labs - "s-mx", # Maxim Sabyanin, former employee, present contributor - "sevirov", # technical writer, Yandex - "spongedu", # Seasoned contributor - "ucasfl", # Amos Bird's friend - "vdimir", # Employee - "vzakaznikov", - "YiuRULE", - "zlobober", # Developer of YT - "BoloniniD", # Seasoned contributor, HSE -]} +TRUSTED_CONTRIBUTORS = { + e.lower() + for e in [ + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "amosbird", + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloudFlare + "BohuTANG", + "cwurm", # Employee + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasfl", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober", # Developer of YT + "BoloniniD", # Seasoned contributor, HSE + ] +} def get_installation_id(jwt_token): @@ -105,29 +127,32 @@ def get_installation_id(jwt_token): response = requests.get("https://api.github.com/app/installations", headers=headers) response.raise_for_status() data = response.json() - return data[0]['id'] + return data[0]["id"] + def get_access_token(jwt_token, installation_id): headers = { "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github.v3+json", } - response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response = requests.post( + f"https://api.github.com/app/installations/{installation_id}/access_tokens", + headers=headers, + ) response.raise_for_status() data = response.json() - return data['token'] + return data["token"] + def get_key_and_app_from_aws(): secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( - service_name='secretsmanager', + service_name="secretsmanager", ) - get_secret_value_response = client.get_secret_value( - SecretId=secret_name - ) - data = json.loads(get_secret_value_response['SecretString']) - return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + get_secret_value_response = client.get_secret_value(SecretId=secret_name) + data = json.loads(get_secret_value_response["SecretString"]) + return data["clickhouse-app-key"], int(data["clickhouse-app-id"]) def is_trusted_contributor(pr_user_login, pr_user_orgs): @@ -139,12 +164,15 @@ def is_trusted_contributor(pr_user_login, pr_user_orgs): for org_id in pr_user_orgs: if org_id in TRUSTED_ORG_IDS: - print(f"Org '{org_id}' is trusted; will mark user {pr_user_login} as trusted") + print( + f"Org '{org_id}' is trusted; will mark user {pr_user_login} as trusted" + ) return True print(f"Org '{org_id}' is not trusted") return False + def _exec_get_with_retry(url): for i in range(MAX_RETRY): try: @@ -157,10 +185,9 @@ def _exec_get_with_retry(url): raise Exception("Cannot execute GET request with retries") + def _exec_post_with_retry(url, token, data=None): - headers = { - "Authorization": f"token {token}" - } + headers = {"Authorization": f"token {token}"} for i in range(MAX_RETRY): try: if data: @@ -169,7 +196,11 @@ def _exec_post_with_retry(url, token, data=None): response = requests.post(url, headers=headers) if response.status_code == 403: data = response.json() - if 'message' in data and data['message'] == 'This workflow run is not waiting for approval': + if ( + "message" in data + and data["message"] + == "This workflow run is not waiting for approval" + ): print("Workflow doesn't need approval") return data response.raise_for_status() @@ -180,25 +211,27 @@ def _exec_post_with_retry(url, token, data=None): raise Exception("Cannot execute POST request with retry") + def _get_pull_requests_from(owner, branch): url = f"{API_URL}/pulls?head={owner}:{branch}" return _exec_get_with_retry(url) + def get_workflow_description_from_event(event): - action = event['action'] - run_id = event['workflow_run']['id'] - event_type = event['workflow_run']['event'] - fork_owner = event['workflow_run']['head_repository']['owner']['login'] - fork_branch = event['workflow_run']['head_branch'] - name = event['workflow_run']['name'] - workflow_id = event['workflow_run']['workflow_id'] - conclusion = event['workflow_run']['conclusion'] - attempt = event['workflow_run']['run_attempt'] - status = event['workflow_run']['status'] - jobs_url = event['workflow_run']['jobs_url'] - rerun_url = event['workflow_run']['rerun_url'] - url = event['workflow_run']['html_url'] - api_url = event['workflow_run']['url'] + action = event["action"] + run_id = event["workflow_run"]["id"] + event_type = event["workflow_run"]["event"] + fork_owner = event["workflow_run"]["head_repository"]["owner"]["login"] + fork_branch = event["workflow_run"]["head_branch"] + name = event["workflow_run"]["name"] + workflow_id = event["workflow_run"]["workflow_id"] + conclusion = event["workflow_run"]["conclusion"] + attempt = event["workflow_run"]["run_attempt"] + status = event["workflow_run"]["status"] + jobs_url = event["workflow_run"]["jobs_url"] + rerun_url = event["workflow_run"]["rerun_url"] + url = event["workflow_run"]["html_url"] + api_url = event["workflow_run"]["url"] return WorkflowDescription( name=name, action=action, @@ -213,16 +246,18 @@ def get_workflow_description_from_event(event): jobs_url=jobs_url, rerun_url=rerun_url, url=url, - api_url=api_url + api_url=api_url, ) + def get_pr_author_and_orgs(pull_request): - author = pull_request['user']['login'] - orgs = _exec_get_with_retry(pull_request['user']['organizations_url']) - return author, [org['id'] for org in orgs] + author = pull_request["user"]["login"] + orgs = _exec_get_with_retry(pull_request["user"]["organizations_url"]) + return author, [org["id"] for org in orgs] + def get_changed_files_for_pull_request(pull_request): - number = pull_request['number'] + number = pull_request["number"] changed_files = set([]) for i in range(1, 31): @@ -235,15 +270,19 @@ def get_changed_files_for_pull_request(pull_request): break for change in data: - #print("Adding changed file", change['filename']) - changed_files.add(change['filename']) + # print("Adding changed file", change['filename']) + changed_files.add(change["filename"]) if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: - print(f"More than {len(changed_files)} changed files. Will stop fetching new files.") + print( + f"More than {len(changed_files)} changed files. " + "Will stop fetching new files." + ) break return changed_files + def check_suspicious_changed_files(changed_files): if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: print(f"Too many files changed {len(changed_files)}, need manual approve") @@ -252,23 +291,29 @@ def check_suspicious_changed_files(changed_files): for path in changed_files: for pattern in SUSPICIOUS_PATTERNS: if fnmatch.fnmatch(path, pattern): - print(f"File {path} match suspicious pattern {pattern}, will not approve automatically") + print( + f"File {path} match suspicious pattern {pattern}, " + "will not approve automatically" + ) return True print("No changed files match suspicious patterns, run will be approved") return False + def approve_run(run_id, token): url = f"{API_URL}/actions/runs/{run_id}/approve" _exec_post_with_retry(url, token) + def label_manual_approve(pull_request, token): - number = pull_request['number'] + number = pull_request["number"] url = f"{API_URL}/issues/{number}/labels" - data = {"labels" : "manual approve"} + data = {"labels": "manual approve"} _exec_post_with_retry(url, token, data) + def get_token_from_aws(): private_key, app_id = get_key_and_app_from_aws() payload = { @@ -281,57 +326,78 @@ def get_token_from_aws(): installation_id = get_installation_id(encoded_jwt) return get_access_token(encoded_jwt, installation_id) + def get_workflow_jobs(workflow_description): - jobs_url = workflow_description.api_url + f"/attempts/{workflow_description.attempt}/jobs" + jobs_url = ( + workflow_description.api_url + f"/attempts/{workflow_description.attempt}/jobs" + ) jobs = [] i = 1 while True: got_jobs = _exec_get_with_retry(jobs_url + f"?page={i}") - if len(got_jobs['jobs']) == 0: + if len(got_jobs["jobs"]) == 0: break - jobs += got_jobs['jobs'] + jobs += got_jobs["jobs"] i += 1 return jobs + def check_need_to_rerun(workflow_description): if workflow_description.attempt >= MAX_WORKFLOW_RERUN: - print("Not going to rerun workflow because it's already tried more than two times") + print( + "Not going to rerun workflow because it's already tried more than two times" + ) return False print("Going to check jobs") jobs = get_workflow_jobs(workflow_description) print("Got jobs", len(jobs)) for job in jobs: - if job['conclusion'] not in ('success', 'skipped'): - print("Job", job['name'], "failed, checking steps") - for step in job['steps']: + if job["conclusion"] not in ("success", "skipped"): + print("Job", job["name"], "failed, checking steps") + for step in job["steps"]: # always the last job - if step['name'] == 'Complete job': - print("Found Complete job step for job", job['name']) + if step["name"] == "Complete job": + print("Found Complete job step for job", job["name"]) break else: - print("Checked all steps and doesn't found Complete job, going to rerun") + print( + "Checked all steps and doesn't found Complete job, going to rerun" + ) return True return False + def rerun_workflow(workflow_description, token): print("Going to rerun workflow") _exec_post_with_retry(workflow_description.rerun_url, token) + def main(event): token = get_token_from_aws() - event_data = json.loads(event['body']) + event_data = json.loads(event["body"]) workflow_description = get_workflow_description_from_event(event_data) print("Got workflow description", workflow_description) - if workflow_description.action == 'completed' and workflow_description.conclusion == 'failure': - print("Workflow", workflow_description.url, "completed and failed, let's check for rerun") + if ( + workflow_description.action == "completed" + and workflow_description.conclusion == "failure" + ): + print( + "Workflow", + workflow_description.url, + "completed and failed, let's check for rerun", + ) if workflow_description.workflow_id not in NEED_RERUN_WORKFLOWS: - print("Workflow", workflow_description.workflow_id, "not in list of rerunable workflows") + print( + "Workflow", + workflow_description.workflow_id, + "not in list of rerunable workflows", + ) return if check_need_to_rerun(workflow_description): @@ -347,7 +413,9 @@ def main(event): approve_run(workflow_description.run_id, token) return - pull_requests = _get_pull_requests_from(workflow_description.fork_owner_login, workflow_description.fork_branch) + pull_requests = _get_pull_requests_from( + workflow_description.fork_owner_login, workflow_description.fork_branch + ) print("Got pull requests for workflow", len(pull_requests)) if len(pull_requests) > 1: @@ -357,9 +425,9 @@ def main(event): raise Exception("Cannot find any pull requests for workflow run") pull_request = pull_requests[0] - print("Pull request for workflow number", pull_request['number']) + print("Pull request for workflow number", pull_request["number"]) - author, author_orgs = get_pr_author_and_orgs(pull_request) + author, author_orgs = get_pr_author_and_orgs(pull_request) if is_trusted_contributor(author, author_orgs): print("Contributor is trusted, approving run") approve_run(workflow_description.run_id, token) @@ -368,11 +436,14 @@ def main(event): changed_files = get_changed_files_for_pull_request(pull_request) print(f"Totally have {len(changed_files)} changed files in PR:", changed_files) if check_suspicious_changed_files(changed_files): - print(f"Pull Request {pull_request['number']} has suspicious changes, label it for manuall approve") + print( + f"Pull Request {pull_request['number']} has suspicious changes, label it for manuall approve" + ) label_manual_approve(pull_request, token) else: print(f"Pull Request {pull_request['number']} has no suspicious changes") approve_run(workflow_description.run_id, token) + def handler(event, _): main(event) diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index 21163ece190..8cea9044dd0 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -5,7 +5,8 @@ 10000 - 30000 + 100000 + 10000 false 240000 diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index bb36d3452d7..627e3725232 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -271,6 +271,7 @@ class ClickHouseCluster: self.with_cassandra = False self.with_jdbc_bridge = False self.with_nginx = False + self.with_hive = False self.with_minio = False self.minio_dir = os.path.join(self.instances_dir, "minio") @@ -320,6 +321,8 @@ class ClickHouseCluster: # available when with_mongo == True self.mongo_host = "mongo1" self.mongo_port = get_free_port() + self.mongo_no_cred_host = "mongo2" + self.mongo_no_cred_port = get_free_port() # available when with_cassandra == True self.cassandra_host = "cassandra1" @@ -727,8 +730,8 @@ class ClickHouseCluster: env_variables['MONGO_HOST'] = self.mongo_host env_variables['MONGO_EXTERNAL_PORT'] = str(self.mongo_port) env_variables['MONGO_INTERNAL_PORT'] = "27017" - env_variables['MONGO_EXTERNAL_PORT_2'] = "27018" - env_variables['MONGO_INTERNAL_PORT_2'] = "27017" + env_variables['MONGO_NO_CRED_EXTERNAL_PORT'] = str(self.mongo_no_cred_port) + env_variables['MONGO_NO_CRED_INTERNAL_PORT'] = "27017" self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]) self.base_mongo_cmd = ['docker-compose', '--env-file', instance.env_file, '--project-name', self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')] @@ -778,17 +781,24 @@ class ClickHouseCluster: '--file', p.join(docker_compose_yml_dir, 'docker_compose_nginx.yml')] return self.base_nginx_cmd + def setup_hive(self, instance, env_variables, docker_compose_yml_dir): + self.with_hive = True + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hive.yml')]) + self.base_hive_cmd = ['docker-compose', '--env-file', instance.env_file, '--project-name', self.project_name, + '--file', p.join(docker_compose_yml_dir, 'docker_compose_hive.yml')] + return self.base_hive_cmd + def add_instance(self, name, base_config_dir=None, main_configs=None, user_configs=None, dictionaries=None, macros=None, with_zookeeper=False, with_zookeeper_secure=False, with_mysql_client=False, with_mysql=False, with_mysql8=False, with_mysql_cluster=False, with_kafka=False, with_kerberized_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_postgres_cluster=False, with_hdfs=False, with_kerberized_hdfs=False, with_mongo=False, with_mongo_secure=False, with_nginx=False, - with_redis=False, with_minio=False, with_azurite=False, with_cassandra=False, with_jdbc_bridge=False, + with_redis=False, with_minio=False, with_azurite=False, with_cassandra=False, with_jdbc_bridge=False, with_hive=False, hostname=None, env_variables=None, image="clickhouse/integration-test", tag=None, stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, external_dirs=None, tmpfs=None, zookeeper_docker_compose_path=None, minio_certs_dir=None, use_keeper=True, - main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True, config_root_name="clickhouse") -> 'ClickHouseInstance': + main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True, config_root_name="clickhouse", extra_configs=[]) -> 'ClickHouseInstance': """Add an instance to the cluster. @@ -798,6 +808,7 @@ class ClickHouseCluster: user_configs - a list of config files that will be added to users.d/ directory with_zookeeper - if True, add ZooKeeper configuration to configs and ZooKeeper instances to the cluster. with_zookeeper_secure - if True, add ZooKeeper Secure configuration to configs and ZooKeeper instances to the cluster. + extra_configs - config files cannot put into config.d and users.d """ if self.is_up: @@ -843,6 +854,7 @@ class ClickHouseCluster: with_azurite=with_azurite, with_cassandra=with_cassandra, with_jdbc_bridge=with_jdbc_bridge, + with_hive = with_hive, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -863,7 +875,8 @@ class ClickHouseCluster: copy_common_configs=copy_common_configs, external_dirs=external_dirs, tmpfs=tmpfs or [], - config_root_name=config_root_name) + config_root_name=config_root_name, + extra_configs = extra_configs) docker_compose_yml_dir = get_docker_compose_path() @@ -959,6 +972,9 @@ class ClickHouseCluster: if with_jdbc_bridge and not self.with_jdbc_bridge: cmds.append(self.setup_jdbc_bridge_cmd(instance, env_variables, docker_compose_yml_dir)) + if with_hive: + cmds.append(self.setup_hive(instance, env_variables, docker_compose_yml_dir)) + logging.debug("Cluster name:{} project_name:{}. Added instance name:{} tag:{} base_cmd:{} docker_compose_yml_dir:{}".format( self.name, self.project_name, name, tag, self.base_cmd, docker_compose_yml_dir)) return instance @@ -1650,6 +1666,12 @@ class ClickHouseCluster: self.up_called = True time.sleep(10) + if self.with_hive and self.base_hive_cmd: + logging.debug('Setup hive') + subprocess_check_call(self.base_hive_cmd + common_opts) + self.up_called = True + time.sleep(300) + if self.with_minio and self.base_minio_cmd: # Copy minio certificates to minio/certs os.mkdir(self.minio_dir) @@ -1893,13 +1915,13 @@ class ClickHouseInstance: self, cluster, base_path, name, base_config_dir, custom_main_configs, custom_user_configs, custom_dictionaries, macros, with_zookeeper, zookeeper_config_path, with_mysql_client, with_mysql, with_mysql8, with_mysql_cluster, with_kafka, with_kerberized_kafka, - with_rabbitmq, with_nginx, with_kerberized_hdfs, with_mongo, with_redis, with_minio, with_azurite, with_jdbc_bridge, + with_rabbitmq, with_nginx, with_kerberized_hdfs, with_mongo, with_redis, with_minio, with_azurite, with_jdbc_bridge, with_hive, with_cassandra, server_bin_path, odbc_bridge_bin_path, library_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, with_postgres, with_postgres_cluster, clickhouse_start_command=CLICKHOUSE_START_COMMAND, main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True, hostname=None, env_variables=None, image="clickhouse/integration-test", tag="latest", - stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, external_dirs=None, tmpfs=None, config_root_name="clickhouse"): + stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, external_dirs=None, tmpfs=None, config_root_name="clickhouse", extra_configs=[]): self.name = name self.base_cmd = cluster.base_cmd @@ -1913,6 +1935,7 @@ class ClickHouseInstance: self.custom_main_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_main_configs] self.custom_user_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_user_configs] self.custom_dictionaries_paths = [p.abspath(p.join(base_path, c)) for c in custom_dictionaries] + self.custom_extra_config_paths = [p.abspath(p.join(base_path,c)) for c in extra_configs] self.clickhouse_path_dir = p.abspath(p.join(base_path, clickhouse_path_dir)) if clickhouse_path_dir else None self.kerberos_secrets_dir = p.abspath(p.join(base_path, 'secrets')) self.macros = macros if macros is not None else {} @@ -1940,6 +1963,7 @@ class ClickHouseInstance: self.with_azurite = with_azurite self.with_cassandra = with_cassandra self.with_jdbc_bridge = with_jdbc_bridge + self.with_hive = with_hive self.main_config_name = main_config_name self.users_config_name = users_config_name @@ -2507,6 +2531,8 @@ class ClickHouseInstance: os.mkdir(users_d_dir) dictionaries_dir = p.abspath(p.join(instance_config_dir, 'dictionaries')) os.mkdir(dictionaries_dir) + extra_conf_dir = p.abspath(p.join(instance_config_dir, 'extra_conf.d')) + os.mkdir(extra_conf_dir) def write_embedded_config(name, dest_dir, fix_log_level=False): with open(p.join(HELPERS_DIR, name), 'r') as f: @@ -2553,6 +2579,8 @@ class ClickHouseInstance: # Copy dictionaries configs to configs/dictionaries for path in self.custom_dictionaries_paths: shutil.copy(path, dictionaries_dir) + for path in self.custom_extra_config_paths: + shutil.copy(path, extra_conf_dir) db_dir = p.abspath(p.join(self.path, 'database')) logging.debug(f"Setup database dir {db_dir}") diff --git a/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py index 58a8a609b10..a79015835db 100644 --- a/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py +++ b/tests/integration/test_cleanup_dir_after_bad_zk_conn/test.py @@ -41,8 +41,7 @@ def test_cleanup_dir_after_bad_zk_conn(start_cluster): pm.drop_instance_zk_connections(node1) time.sleep(3) error = node1.query_and_get_error(query_create) - assert "Poco::Exception. Code: 1000" and \ - "All connection tries failed while connecting to ZooKeeper" in error + time.sleep(3) error = node1.query_and_get_error(query_create) assert "Directory for table data data/replica/test/ already exists" not in error node1.query_with_retry(query_create) diff --git a/tests/integration/test_delayed_replica_failover/test.py b/tests/integration/test_delayed_replica_failover/test.py index 18184c3304d..404848e155b 100644 --- a/tests/integration/test_delayed_replica_failover/test.py +++ b/tests/integration/test_delayed_replica_failover/test.py @@ -79,7 +79,15 @@ SELECT sum(x) FROM distributed WITH TOTALS SETTINGS pm.drop_instance_zk_connections(node_1_2) pm.drop_instance_zk_connections(node_2_2) - time.sleep(4) # allow pings to zookeeper to timeout (must be greater than ZK session timeout). + # allow pings to zookeeper to timeout (must be greater than ZK session timeout). + for _ in range(30): + try: + node_2_2.query("SELECT * FROM system.zookeeper where path = '/'") + time.sleep(0.5) + except: + break + else: + raise Exception("Connection with zookeeper was not lost") # At this point all replicas are stale, but the query must still go to second replicas which are the least stale ones. assert instance_with_dist_table.query(''' @@ -96,14 +104,20 @@ SELECT sum(x) FROM distributed SETTINGS max_replica_delay_for_distributed_queries=1 ''').strip() == '3' - # If we forbid stale replicas, the query must fail. - with pytest.raises(Exception): - print(instance_with_dist_table.query(''' + # If we forbid stale replicas, the query must fail. But sometimes we must have bigger timeouts. + for _ in range(20): + try: + instance_with_dist_table.query(''' SELECT count() FROM distributed SETTINGS load_balancing='in_order', max_replica_delay_for_distributed_queries=1, fallback_to_stale_replicas_for_distributed_queries=0 -''')) +''') + time.sleep(0.5) + except: + break + else: + raise Exception("Didn't raise when stale replicas are not allowed") # Now partition off the remote replica of the local shard and test that failover still works. pm.partition_instances(node_1_1, node_1_2, port=9000) diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index ce295e11586..53333fe2012 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -282,6 +282,7 @@ def test_postgres_schema(started_cluster): cursor.execute('INSERT INTO test_schema.test_table SELECT i, i FROM generate_series(0, 99) as t(i)') node1.query(''' + DROP DICTIONARY IF EXISTS postgres_dict; CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) PRIMARY KEY id SOURCE(POSTGRESQL( @@ -311,6 +312,7 @@ def test_predefined_connection_configuration(started_cluster): cursor.execute('INSERT INTO test_table SELECT i, i FROM generate_series(0, 99) as t(i)') node1.query(''' + DROP DICTIONARY IF EXISTS postgres_dict; CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) PRIMARY KEY id SOURCE(POSTGRESQL(NAME postgres1)) diff --git a/tests/integration/test_hive_query/__init__.py b/tests/integration/test_hive_query/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_hive_query/configs/config.xml b/tests/integration/test_hive_query/configs/config.xml new file mode 100644 index 00000000000..e89ccdcab6a --- /dev/null +++ b/tests/integration/test_hive_query/configs/config.xml @@ -0,0 +1,32 @@ + + + + + + + localhost + 9000 + + + + + + + true + /tmp/clickhouse_local_cache + 207374182400 + 1048576 + + + + /etc/clickhouse-server/extra_conf.d/hdfs-site.xml + + + + system +
metric_log
+ 1000 + 1000 + + + diff --git a/tests/integration/test_hive_query/configs/hdfs-site.xml b/tests/integration/test_hive_query/configs/hdfs-site.xml new file mode 100644 index 00000000000..82c525ea414 --- /dev/null +++ b/tests/integration/test_hive_query/configs/hdfs-site.xml @@ -0,0 +1,6 @@ + + + dfs.replication + 1 + + diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py new file mode 100644 index 00000000000..a68ae0b066d --- /dev/null +++ b/tests/integration/test_hive_query/test.py @@ -0,0 +1,132 @@ +import logging +import os + +import time +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +logging.getLogger().setLevel(logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler()) + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance('h0_0_0', main_configs=['configs/config.xml'], extra_configs=[ 'configs/hdfs-site.xml'], with_hive=True) + + logging.info("Starting cluster ...") + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_create_parquet_table(started_cluster): + logging.info('Start testing creating hive table ...') + node = started_cluster.instances['h0_0_0'] + node.query("set input_format_parquet_allow_missing_columns = true") + result = node.query(""" + DROP TABLE IF EXISTS default.demo_parquet; + CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) + """) + logging.info("create result {}".format(result)) + time.sleep(120) + assert result.strip() == '' + +def test_create_orc_table(started_cluster): + logging.info('Start testing creating hive table ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + DROP TABLE IF EXISTS default.demo_orc; + CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) + """) + logging.info("create result {}".format(result)) + + assert result.strip() == '' + +def test_create_text_table(started_cluster): + logging.info('Start testing creating hive table ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + DROP TABLE IF EXISTS default.demo_text; + CREATE TABLE default.demo_text (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_text') PARTITION BY (tuple()) + """) + logging.info("create result {}".format(result)) + + assert result.strip() == '' + +def test_parquet_groupby(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT day, count(*) FROM default.demo_parquet group by day order by day + """) + expected_result = """2021-11-01 1 +2021-11-05 2 +2021-11-11 1 +2021-11-16 2 +""" + assert result == expected_result +def test_orc_groupby(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT day, count(*) FROM default.demo_orc group by day order by day + """) + expected_result = """2021-11-01 1 +2021-11-05 2 +2021-11-11 1 +2021-11-16 2 +""" + assert result == expected_result + +def test_text_count(started_cluster): + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT day, count(*) FROM default.demo_orc group by day order by day SETTINGS format_csv_delimiter = '\x01' + """) + expected_result = """2021-11-01 1 +2021-11-05 2 +2021-11-11 1 +2021-11-16 2 +""" + assert result == expected_result + +def test_parquet_groupby_with_cache(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT day, count(*) FROM default.demo_parquet group by day order by day + """) + expected_result = """2021-11-01 1 +2021-11-05 2 +2021-11-11 1 +2021-11-16 2 +""" + assert result == expected_result +def test_cache_read_bytes(started_cluster): + node = started_cluster.instances['h0_0_0'] + node.query("set input_format_parquet_allow_missing_columns = true") + result = node.query(""" + DROP TABLE IF EXISTS default.demo_parquet; + CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) + """) + result = node.query(""" + SELECT day, count(*) FROM default.demo_parquet group by day order by day + """) + result = node.query(""" + SELECT day, count(*) FROM default.demo_parquet group by day order by day + """) + expected_result = """2021-11-01 1 +2021-11-05 2 +2021-11-11 1 +2021-11-16 2 +""" + time.sleep(120) + assert result == expected_result + result = node.query("select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0") + logging.info("Read bytes from cache:{}".format(result)) + assert result.strip() != '0' diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 6ea12342044..1887b18655a 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -286,6 +286,7 @@ def test_cmd_conf(started_cluster): assert result["snapshot_storage_path"] == "/var/lib/clickhouse/coordination/snapshots" assert result["session_timeout_ms"] == "30000" + assert result["min_session_timeout_ms"] == "10000" assert result["operation_timeout_ms"] == "5000" assert result["dead_session_check_period_ms"] == "500" assert result["heart_beat_interval_ms"] == "500" @@ -474,12 +475,17 @@ def test_cmd_crst(started_cluster): print(data) data = send_4lw_cmd(cmd='cons') + print("cons output(after crst) -------------------------------------") + print(data) # 2 connections, 1 for 'cons' command, 1 for zk cons = [n for n in data.split('\n') if len(n) > 0] assert len(cons) == 2 - conn_stat = re.match(r'(.*?)[:].*[(](.*?)[)].*', cons[0].strip(), re.S).group(2) + # connection for zk + zk_conn = [n for n in cons if not n.__contains__('sid=0xffffffffffffffff')][0] + + conn_stat = re.match(r'(.*?)[:].*[(](.*?)[)].*', zk_conn.strip(), re.S).group(2) assert conn_stat is not None result = {} diff --git a/tests/integration/test_keeper_session/__init__.py b/tests/integration/test_keeper_session/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_session/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_session/configs/keeper_config.xml b/tests/integration/test_keeper_session/configs/keeper_config.xml new file mode 100644 index 00000000000..ed0bb52bd51 --- /dev/null +++ b/tests/integration/test_keeper_session/configs/keeper_config.xml @@ -0,0 +1,27 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + node1 + 9234 + true + 3 + + + + diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py new file mode 100644 index 00000000000..6867f2bf5cd --- /dev/null +++ b/tests/integration/test_keeper_session/test.py @@ -0,0 +1,156 @@ +import pytest +from helpers.cluster import ClickHouseCluster +import time +import socket +import struct + +from kazoo.client import KazooClient +# from kazoo.protocol.serialization import Connect, read_buffer, write_buffer + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/keeper_config.xml'], stay_alive=True) + +bool_struct = struct.Struct('B') +int_struct = struct.Struct('!i') +int_int_struct = struct.Struct('!ii') +int_int_long_struct = struct.Struct('!iiq') + +int_long_int_long_struct = struct.Struct('!iqiq') +long_struct = struct.Struct('!q') +multiheader_struct = struct.Struct('!iBi') +reply_header_struct = struct.Struct('!iqi') +stat_struct = struct.Struct('!qqqqiiiqiiq') + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def destroy_zk_client(zk): + try: + if zk: + zk.stop() + zk.close() + except: + pass + + +def wait_node(node): + for _ in range(100): + zk = None + try: + zk = get_fake_zk(node.name, timeout=30.0) + print("node", node.name, "ready") + break + except Exception as ex: + time.sleep(0.2) + print("Waiting until", node.name, "will be ready, exception", ex) + finally: + destroy_zk_client(zk) + else: + raise Exception("Can't wait node", node.name, "to become ready") + + +def wait_nodes(): + for n in [node1]: + wait_node(n) + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + + +def get_keeper_socket(node_name): + hosts = cluster.get_instance_ip(node_name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, 9181)) + return client + + +def close_keeper_socket(cli): + if cli is not None: + cli.close() + + +def write_buffer(bytes): + if bytes is None: + return int_struct.pack(-1) + else: + return int_struct.pack(len(bytes)) + bytes + + +def read_buffer(bytes, offset): + length = int_struct.unpack_from(bytes, offset)[0] + offset += int_struct.size + if length < 0: + return None, offset + else: + index = offset + offset += length + return bytes[index:index + length], offset + + +def handshake(node_name=node1.name, session_timeout=1000, session_id=0): + client = None + try: + client = get_keeper_socket(node_name) + protocol_version = 0 + last_zxid_seen = 0 + session_passwd = b'\x00' * 16 + read_only = 0 + + # Handshake serialize and deserialize code is from 'kazoo.protocol.serialization'. + + # serialize handshake + req = bytearray() + req.extend(int_long_int_long_struct.pack(protocol_version, last_zxid_seen, session_timeout, session_id)) + req.extend(write_buffer(session_passwd)) + req.extend([1 if read_only else 0]) + # add header + req = int_struct.pack(45) + req + print("handshake request - len:", req.hex(), len(req)) + + # send request + client.send(req) + data = client.recv(1_000) + + # deserialize response + print("handshake response - len:", data.hex(), len(data)) + # ignore header + offset = 4 + proto_version, negotiated_timeout, session_id = int_int_long_struct.unpack_from(data, offset) + offset += int_int_long_struct.size + password, offset = read_buffer(data, offset) + try: + read_only = bool_struct.unpack_from(data, offset)[0] == 1 + offset += bool_struct.size + except struct.error: + read_only = False + + print("negotiated_timeout - session_id", negotiated_timeout, session_id) + return negotiated_timeout, session_id + finally: + if client is not None: + client.close() + + +def test_session_timeout(started_cluster): + wait_nodes() + + negotiated_timeout, _ = handshake(node1.name, session_timeout=1000, session_id=0) + assert negotiated_timeout == 5000 + + negotiated_timeout, _ = handshake(node1.name, session_timeout=8000, session_id=0) + assert negotiated_timeout == 8000 + + negotiated_timeout, _ = handshake(node1.name, session_timeout=20000, session_id=0) + assert negotiated_timeout == 10000 diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index fe1ea373e56..5d3a5ce00d7 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -366,6 +366,13 @@ def testHdfsDirectoryNotExist(started_cluster): node1.query(ddl) assert "Cannot list directory" in node1.query_and_get_error("select * from HDFSStorageWithNotExistDir") +def test_format_detection(started_cluster): + node1.query(f"create table arrow_table (x UInt64) engine=HDFS('hdfs://hdfs1:9000/data.arrow')") + node1.query(f"insert into arrow_table select 1") + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/data.arrow')") + assert(int(result) == 1) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 16358ed4cad..67b5b42b1ec 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -25,7 +25,7 @@ def get_mongo_connection(started_cluster, secure=False, with_credentials=True): if with_credentials: connection_str = 'mongodb://root:clickhouse@localhost:{}'.format(started_cluster.mongo_port) else: - connection_str = 'mongodb://localhost:27018' + connection_str = 'mongodb://localhost:{}'.format(started_cluster.mongo_no_cred_port) if secure: connection_str += '/?tls=true&tlsAllowInvalidCertificates=true' return pymongo.MongoClient(connection_str) diff --git a/tests/integration/test_storage_s3/configs/named_collections.xml b/tests/integration/test_storage_s3/configs/named_collections.xml index efadedc1bde..ef21ced4d0c 100644 --- a/tests/integration/test_storage_s3/configs/named_collections.xml +++ b/tests/integration/test_storage_s3/configs/named_collections.xml @@ -20,5 +20,10 @@ minio minio123 + + http://minio1:9001/root/test.arrow + minio + minio123 + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 885a37f875c..0584ccf79b0 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -126,7 +126,7 @@ def run_query(instance, query, stdin=None, settings=None): pytest.param("'wrongid','wrongkey',", False, 'xz', id="xz"), pytest.param("'wrongid','wrongkey',", False, 'zstd', id="zstd") ]) -def _test_put(started_cluster, maybe_auth, positive, compression): +def test_put(started_cluster, maybe_auth, positive, compression): # type: (ClickHouseCluster) -> None bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket @@ -148,7 +148,7 @@ def _test_put(started_cluster, maybe_auth, positive, compression): assert values_csv == get_s3_file_content(started_cluster, bucket, filename) -def _test_partition_by(started_cluster): +def test_partition_by(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" @@ -173,7 +173,7 @@ def _test_partition_by(started_cluster): assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test2_45.csv") -def _test_partition_by_string_column(started_cluster): +def test_partition_by_string_column(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "col_num UInt32, col_str String" @@ -191,7 +191,7 @@ def _test_partition_by_string_column(started_cluster): assert '78,"你好"\n' == get_s3_file_content(started_cluster, bucket, "test_你好.csv") -def _test_partition_by_const_column(started_cluster): +def test_partition_by_const_column(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" @@ -212,7 +212,7 @@ def _test_partition_by_const_column(started_cluster): "space", "plus" ]) -def _test_get_file_with_special(started_cluster, special): +def test_get_file_with_special(started_cluster, special): symbol = {"space": " ", "plus": "+"}[special] urlsafe_symbol = {"space": "%20", "plus": "%2B"}[special] auth = "'minio','minio123'," @@ -239,7 +239,7 @@ def _test_get_file_with_special(started_cluster, special): "plus", "plus2" ]) -def _test_get_path_with_special(started_cluster, special): +def test_get_path_with_special(started_cluster, special): symbol = {"space": "%20", "plus": "%2B", "plus2": "%2B"}[special] safe_symbol = {"space": "%20", "plus": "+", "plus2": "%2B"}[special] auth = "'minio','minio123'," @@ -253,7 +253,7 @@ def _test_get_path_with_special(started_cluster, special): @pytest.mark.parametrize("auth", [ pytest.param("'minio','minio123',", id="minio") ]) -def _test_empty_put(started_cluster, auth): +def test_empty_put(started_cluster, auth): # type: (ClickHouseCluster, str) -> None bucket = started_cluster.minio_bucket @@ -291,7 +291,7 @@ def _test_empty_put(started_cluster, auth): pytest.param("'minio','minio123',", True, id="auth_positive"), pytest.param("'wrongid','wrongkey',", False, id="negative"), ]) -def _test_put_csv(started_cluster, maybe_auth, positive): +def test_put_csv(started_cluster, maybe_auth, positive): # type: (ClickHouseCluster, bool, str) -> None bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket @@ -313,7 +313,7 @@ def _test_put_csv(started_cluster, maybe_auth, positive): # Test put and get with S3 server redirect. -def _test_put_get_with_redirect(started_cluster): +def test_put_get_with_redirect(started_cluster): # type: (ClickHouseCluster) -> None bucket = started_cluster.minio_bucket @@ -340,7 +340,7 @@ def _test_put_get_with_redirect(started_cluster): # Test put with restricted S3 server redirect. -def _test_put_with_zero_redirect(started_cluster): +def test_put_with_zero_redirect(started_cluster): # type: (ClickHouseCluster) -> None bucket = started_cluster.minio_bucket @@ -367,7 +367,7 @@ def _test_put_with_zero_redirect(started_cluster): assert exception_raised -def _test_put_get_with_globs(started_cluster): +def test_put_get_with_globs(started_cluster): # type: (ClickHouseCluster) -> None unique_prefix = random.randint(1,10000) bucket = started_cluster.minio_bucket @@ -399,7 +399,7 @@ def _test_put_get_with_globs(started_cluster): pytest.param("'wrongid','wrongkey'", False, id="negative"), # ("'minio','minio123',",True), Redirect with credentials not working with nginx. ]) -def _test_multipart_put(started_cluster, maybe_auth, positive): +def test_multipart_put(started_cluster, maybe_auth, positive): # type: (ClickHouseCluster) -> None bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket @@ -439,7 +439,7 @@ def _test_multipart_put(started_cluster, maybe_auth, positive): assert csv_data == get_s3_file_content(started_cluster, bucket, filename) -def _test_remote_host_filter(started_cluster): +def test_remote_host_filter(started_cluster): instance = started_cluster.instances["restricted_dummy"] format = "column1 UInt32, column2 UInt32, column3 UInt32" @@ -453,20 +453,21 @@ def _test_remote_host_filter(started_cluster): assert "not allowed in configuration file" in instance.query_and_get_error(query) -@pytest.mark.parametrize("s3_storage_args", [ - pytest.param("''", id="1_argument"), - pytest.param("'','','','','',''", id="6_arguments"), -]) -def _test_wrong_s3_syntax(started_cluster, s3_storage_args): +def test_wrong_s3_syntax(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance expected_err_msg = "Code: 42" # NUMBER_OF_ARGUMENTS_DOESNT_MATCH - query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3({})".format(s3_storage_args) + query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('', '', '', '', '', '')" + assert expected_err_msg in instance.query_and_get_error(query) + + expected_err_msg = "Code: 36" # BAD_ARGUMENTS + + query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('')" assert expected_err_msg in instance.query_and_get_error(query) # https://en.wikipedia.org/wiki/One_Thousand_and_One_Nights -def _test_s3_glob_scheherazade(started_cluster): +def test_s3_glob_scheherazade(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" @@ -535,7 +536,7 @@ def replace_config(old, new): config.close() -def _test_custom_auth_headers(started_cluster): +def test_custom_auth_headers(started_cluster): table_format = "column1 UInt32, column2 UInt32, column3 UInt32" filename = "test.csv" get_query = "select * from s3('http://resolver:8080/{bucket}/{file}', 'CSV', '{table_format}')".format( @@ -566,7 +567,7 @@ def _test_custom_auth_headers(started_cluster): instance.query("DROP TABLE test") -def _test_custom_auth_headers_exclusion(started_cluster): +def test_custom_auth_headers_exclusion(started_cluster): table_format = "column1 UInt32, column2 UInt32, column3 UInt32" filename = "test.csv" get_query = f"SELECT * FROM s3('http://resolver:8080/{started_cluster.minio_restricted_bucket}/restricteddirectory/{filename}', 'CSV', '{table_format}')" @@ -580,7 +581,7 @@ def _test_custom_auth_headers_exclusion(started_cluster): assert 'Forbidden Error' in ei.value.stderr -def _test_infinite_redirect(started_cluster): +def test_infinite_redirect(started_cluster): bucket = "redirected" table_format = "column1 UInt32, column2 UInt32, column3 UInt32" filename = "test.csv" @@ -598,7 +599,7 @@ def _test_infinite_redirect(started_cluster): pytest.param("bin", "gzip", id="bin"), pytest.param("gz", "auto", id="gz"), ]) -def _test_storage_s3_get_gzip(started_cluster, extension, method): +def test_storage_s3_get_gzip(started_cluster, extension, method): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] filename = f"test_get_gzip.{extension}" @@ -638,7 +639,7 @@ def _test_storage_s3_get_gzip(started_cluster, extension, method): run_query(instance, f"DROP TABLE {name}") -def _test_storage_s3_get_unstable(started_cluster): +def test_storage_s3_get_unstable(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64" @@ -647,7 +648,7 @@ def _test_storage_s3_get_unstable(started_cluster): assert result.splitlines() == ["500001,500000,0"] -def _test_storage_s3_put_uncompressed(started_cluster): +def test_storage_s3_put_uncompressed(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] filename = "test_put_uncompressed.bin" @@ -684,7 +685,7 @@ def _test_storage_s3_put_uncompressed(started_cluster): pytest.param("bin", "gzip", id="bin"), pytest.param("gz", "auto", id="gz") ]) -def _test_storage_s3_put_gzip(started_cluster, extension, method): +def test_storage_s3_put_gzip(started_cluster, extension, method): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] filename = f"test_put_gzip.{extension}" @@ -721,7 +722,7 @@ def _test_storage_s3_put_gzip(started_cluster, extension, method): assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708 -def _test_truncate_table(started_cluster): +def test_truncate_table(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance name = "truncate" @@ -745,7 +746,7 @@ def _test_truncate_table(started_cluster): assert instance.query("SELECT * FROM {}".format(name)) == "" -def _test_predefined_connection_configuration(started_cluster): +def test_predefined_connection_configuration(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance name = "test_table" @@ -762,7 +763,7 @@ def _test_predefined_connection_configuration(started_cluster): result = "" -def _test_url_reconnect_in_the_middle(started_cluster): +def test_url_reconnect_in_the_middle(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] table_format = "id String, data String" @@ -799,7 +800,7 @@ def _test_url_reconnect_in_the_middle(started_cluster): assert(int(result) == 3914219105369203805) -def _test_seekable_formats(started_cluster): +def test_seekable_formats(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance @@ -821,7 +822,7 @@ def _test_seekable_formats(started_cluster): assert(int(result[:3]) < 200) -def _test_seekable_formats_url(started_cluster): +def test_seekable_formats_url(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] @@ -915,3 +916,16 @@ def test_empty_file(started_cluster): result = instance.query(f"SELECT count() FROM {table_function}") assert(int(result) == 0) + +def test_format_detection(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query(f"create table arrow_table_s3 (x UInt64) engine=S3(s3_arrow)") + instance.query(f"insert into arrow_table_s3 select 1") + result = instance.query(f"select * from s3(s3_arrow)") + assert(int(result) == 1) + + result = instance.query(f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") + assert(int(result) == 1) + diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 3ab0269b42e..c0afa8cd555 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -68,3 +68,26 @@ def test_system_logs_recreate(): # IOW that the table created only when the structure is indeed different. for table in system_logs: assert len(node.query(f"SHOW TABLES FROM system LIKE '{table}%'").strip().split('\n')) == 3 + + +def test_drop_system_log(): + node.exec_in_container(['bash', '-c', f"""echo " + + + 1000000 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """]) + node.restart_clickhouse() + node.query("select 1") + node.query("system flush logs") + node.query("select 2") + node.query("system flush logs") + assert node.query("select count() > 0 from system.query_log") == "1\n" + node.query("drop table system.query_log sync") + node.query("select 3") + node.query("system flush logs") + assert node.query("select count() > 0 from system.query_log") == "1\n" + node.exec_in_container(['rm', f'/etc/clickhouse-server/config.d/yyy-override-query_log.xml']) + node.restart_clickhouse() diff --git a/tests/queries/0_stateless/00700_decimal_compare.reference b/tests/queries/0_stateless/00700_decimal_compare.reference index 6b2787642b7..623c3161b24 100644 --- a/tests/queries/0_stateless/00700_decimal_compare.reference +++ b/tests/queries/0_stateless/00700_decimal_compare.reference @@ -1,5 +1,9 @@ 0 1 +0 +1 +0 +1 -42 -42 1 0 0 0 1 1 42 42 1 0 0 0 1 1 -42 -42.42 0 0 1 1 0 1 @@ -17,6 +21,8 @@ 42 42 42 42 42 42 42 42 42 +42 42 42 +42 42 42 -42 -42.42 -42 -42 42 42 42 42.42 -42 -42 -42.42 diff --git a/tests/queries/0_stateless/00700_decimal_compare.sql b/tests/queries/0_stateless/00700_decimal_compare.sql index ae2f5790570..41ff8b38102 100644 --- a/tests/queries/0_stateless/00700_decimal_compare.sql +++ b/tests/queries/0_stateless/00700_decimal_compare.sql @@ -17,8 +17,8 @@ CREATE TABLE IF NOT EXISTS decimal INSERT INTO decimal (a, b, c, d, e, f, g, h, i, j) VALUES (42, 42, 42, 0.42, 0.42, 0.42, 42.42, 42.42, 42.42, 42.42); INSERT INTO decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-42, -42, -42, -0.42, -0.42, -0.42, -42.42, -42.42, -42.42, -42.42); -SELECT a > toFloat64(0) FROM decimal; -- { serverError 43 } -SELECT g > toFloat32(0) FROM decimal; -- { serverError 43 } +SELECT a > toFloat64(0) FROM decimal ORDER BY a; +SELECT g > toFloat32(0) FROM decimal ORDER BY g; SELECT a > '0.0' FROM decimal ORDER BY a; SELECT a, b, a = b, a < b, a > b, a != b, a <= b, a >= b FROM decimal ORDER BY a; @@ -36,8 +36,8 @@ SELECT a, b, c FROM decimal WHERE a = toInt8(42) AND b = toInt8(42) AND c = toIn SELECT a, b, c FROM decimal WHERE a = toInt16(42) AND b = toInt16(42) AND c = toInt16(42); SELECT a, b, c FROM decimal WHERE a = toInt32(42) AND b = toInt32(42) AND c = toInt32(42); SELECT a, b, c FROM decimal WHERE a = toInt64(42) AND b = toInt64(42) AND c = toInt64(42); -SELECT a, b, c FROM decimal WHERE a = toFloat32(42); -- { serverError 43 } -SELECT a, b, c FROM decimal WHERE a = toFloat64(42); -- { serverError 43 } +SELECT a, b, c FROM decimal WHERE a = toFloat32(42); +SELECT a, b, c FROM decimal WHERE a = toFloat64(42); SELECT least(a, b), least(a, g), greatest(a, b), greatest(a, g) FROM decimal ORDER BY a; SELECT least(a, 0), least(b, 0), least(g, 0) FROM decimal ORDER BY a; diff --git a/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql b/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql index bfedcc56496..1fb6770d49b 100644 --- a/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql +++ b/tests/queries/0_stateless/01030_storage_hdfs_syntax.sql @@ -3,7 +3,7 @@ drop table if exists test_table_hdfs_syntax ; create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('') -; -- { serverError 42 } +; -- { serverError 36 } create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('','','', '') ; -- { serverError 42 } drop table if exists test_table_hdfs_syntax diff --git a/tests/queries/0_stateless/01030_storage_url_syntax.sql b/tests/queries/0_stateless/01030_storage_url_syntax.sql index 0efb121eda5..e855383f0ac 100644 --- a/tests/queries/0_stateless/01030_storage_url_syntax.sql +++ b/tests/queries/0_stateless/01030_storage_url_syntax.sql @@ -1,7 +1,7 @@ drop table if exists test_table_url_syntax ; create table test_table_url_syntax (id UInt32) ENGINE = URL('') -; -- { serverError 42 } +; -- { serverError 36 } create table test_table_url_syntax (id UInt32) ENGINE = URL('','','','') ; -- { serverError 42 } drop table if exists test_table_url_syntax diff --git a/tests/queries/0_stateless/01039_test_setting_parse.reference b/tests/queries/0_stateless/01039_test_setting_parse.reference index 49233946390..199b64e7f4d 100644 --- a/tests/queries/0_stateless/01039_test_setting_parse.reference +++ b/tests/queries/0_stateless/01039_test_setting_parse.reference @@ -1,9 +1,9 @@ 1000000000 3221225472 1567000 -125952 +1263616 1567000 -125952 +1263616 12000000 32505856 1000000000000 diff --git a/tests/queries/0_stateless/01039_test_setting_parse.sql b/tests/queries/0_stateless/01039_test_setting_parse.sql index 6a4eadf6a40..fd8580d26a5 100644 --- a/tests/queries/0_stateless/01039_test_setting_parse.sql +++ b/tests/queries/0_stateless/01039_test_setting_parse.sql @@ -4,11 +4,11 @@ SET max_memory_usage = '3Gi'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; SET max_memory_usage = '1567k'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; -SET max_memory_usage = '123ki'; +SET max_memory_usage = '1234ki'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; SET max_memory_usage = '1567K'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; -SET max_memory_usage = '123Ki'; +SET max_memory_usage = '1234Ki'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; SET max_memory_usage = '12M'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; diff --git a/tests/queries/0_stateless/01282_system_parts_ttl_info.sql b/tests/queries/0_stateless/01282_system_parts_ttl_info.sql index dfa340636b3..ede5350ddd4 100644 --- a/tests/queries/0_stateless/01282_system_parts_ttl_info.sql +++ b/tests/queries/0_stateless/01282_system_parts_ttl_info.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS ttl; -CREATE TABLE ttl (d DateTime) ENGINE = MergeTree ORDER BY tuple() TTL d + INTERVAL 10 DAY; +CREATE TABLE ttl (d DateTime) ENGINE = MergeTree ORDER BY tuple() TTL d + INTERVAL 10 DAY SETTINGS remove_empty_parts=0; SYSTEM STOP MERGES ttl; INSERT INTO ttl VALUES ('2000-01-01 01:02:03'), ('2000-02-03 04:05:06'); SELECT rows, delete_ttl_info_min, delete_ttl_info_max, move_ttl_info.expression, move_ttl_info.min, move_ttl_info.max FROM system.parts WHERE database = currentDatabase() AND table = 'ttl'; diff --git a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh index 1412ea3be65..d995e3a1370 100755 --- a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh +++ b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh @@ -71,6 +71,7 @@ function test_distributed_push_down_limit_with_query_log() where event_date >= yesterday() and query_kind = 'Select' /* exclude DESC TABLE */ + and type = 'QueryFinish' and initial_query_id = '$query_id' and initial_query_id != query_id; " | xargs # convert new lines to spaces } diff --git a/tests/queries/0_stateless/02124_comparison_betwwen_decimal_and_float.reference b/tests/queries/0_stateless/02124_comparison_betwwen_decimal_and_float.reference new file mode 100644 index 00000000000..d718f64bb03 --- /dev/null +++ b/tests/queries/0_stateless/02124_comparison_betwwen_decimal_and_float.reference @@ -0,0 +1,34 @@ +0 +1 +0 +0 +1 +0 +0 +1 +0 +1 +0 +0 +1 +0 +0 +1 +1 +1 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 diff --git a/tests/queries/0_stateless/02124_comparison_betwwen_decimal_and_float.sql b/tests/queries/0_stateless/02124_comparison_betwwen_decimal_and_float.sql new file mode 100644 index 00000000000..d8dabbab552 --- /dev/null +++ b/tests/queries/0_stateless/02124_comparison_betwwen_decimal_and_float.sql @@ -0,0 +1,47 @@ +select CAST(1.0, 'Decimal(15,2)') > CAST(1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') = CAST(1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') < CAST(1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') != CAST(1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') > CAST(-1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') = CAST(-1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') < CAST(-1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') != CAST(-1, 'Float64'); +select CAST(1.0, 'Decimal(15,2)') > CAST(1, 'Float32'); +select CAST(1.0, 'Decimal(15,2)') = CAST(1, 'Float32'); +select CAST(1.0, 'Decimal(15,2)') < CAST(1, 'Float32'); +select CAST(1.0, 'Decimal(15,2)') != CAST(1, 'Float32'); +select CAST(1.0, 'Decimal(15,2)') > CAST(-1, 'Float32'); +select CAST(1.0, 'Decimal(15,2)') = CAST(-1, 'Float32'); +select CAST(1.0, 'Decimal(15,2)') < CAST(-1, 'Float32'); +select CAST(1.0, 'Decimal(15,2)') != CAST(-1, 'Float32'); + +SELECT toDecimal32('11.00', 2) > 1.; + +SELECT 0.1000000000000000055511151231257827021181583404541015625::Decimal256(70) = 0.1; + +DROP TABLE IF EXISTS t; + +CREATE TABLE t +( + d1 Decimal32(5), + d2 Decimal64(10), + d3 Decimal128(30), + d4 Decimal256(50), + f1 Float32, + f2 Float32 +)ENGINE = Memory; + +INSERT INTO t values (-1.5, -1.5, -1.5, -1.5, 1.5, 1.5); +INSERT INTO t values (1.5, 1.5, 1.5, 1.5, -1.5, -1.5); + +SELECT d1 > f1 FROM t ORDER BY f1; +SELECT d2 > f1 FROM t ORDER BY f1; +SELECT d3 > f1 FROM t ORDER BY f1; +SELECT d4 > f1 FROM t ORDER BY f1; + +SELECT d1 > f2 FROM t ORDER BY f2; +SELECT d2 > f2 FROM t ORDER BY f2; +SELECT d3 > f2 FROM t ORDER BY f2; +SELECT d4 > f2 FROM t ORDER BY f2; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh b/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh index 11c1522d10b..938b45fee98 100755 --- a/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh +++ b/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh @@ -26,6 +26,6 @@ GZDATA="H4sIAHTzuWEAA9VTuw3CMBB9+RCsyIULhFIwAC0SJQWZACkNi1CAxCCMwCCMQMEIKdkgPJ8P ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t1" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t1 ( x Int64, y Int64, z Int64 ) ENGINE = Memory" -echo ${GZDATA} | base64 --decode | gunzip | ${CLICKHOUSE_CLIENT} -q "INSERT INTO t1 FORMAT Arrow" 2>&1 | grep -qF "DUPLICATE_COLUMN" && echo 'OK' || echo 'FAIL' ||: +echo ${GZDATA} | base64 --decode | gunzip | ${CLICKHOUSE_CLIENT} -q "INSERT INTO t1 FORMAT Arrow settings input_format_arrow_allow_missing_columns = true" 2>&1 | grep -qF "DUPLICATE_COLUMN" && echo 'OK' || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t1" diff --git a/tests/queries/0_stateless/02154_default_keyword_insert.reference b/tests/queries/0_stateless/02154_default_keyword_insert.reference new file mode 100644 index 00000000000..158abdfd686 --- /dev/null +++ b/tests/queries/0_stateless/02154_default_keyword_insert.reference @@ -0,0 +1,3 @@ +0 42 33 +1 42 33 +2 33 33 diff --git a/tests/queries/0_stateless/02154_default_keyword_insert.sql b/tests/queries/0_stateless/02154_default_keyword_insert.sql new file mode 100644 index 00000000000..efccc35e6a8 --- /dev/null +++ b/tests/queries/0_stateless/02154_default_keyword_insert.sql @@ -0,0 +1,7 @@ +CREATE TEMPORARY TABLE IF NOT EXISTS default_table (x UInt32, y UInt32 DEFAULT 42, z UInt32 DEFAULT 33) ENGINE = Memory; + +INSERT INTO default_table(x) values (DEFAULT); +INSERT INTO default_table(x, z) values (1, DEFAULT); +INSERT INTO default_table values (2, 33, DEFAULT); + +SELECT * FROM default_table ORDER BY x; diff --git a/tests/queries/0_stateless/02155_default_keyword_format_values.reference b/tests/queries/0_stateless/02155_default_keyword_format_values.reference new file mode 100644 index 00000000000..022aeaff0a9 --- /dev/null +++ b/tests/queries/0_stateless/02155_default_keyword_format_values.reference @@ -0,0 +1,8 @@ +0 1 33 +0 42 33 +1 42 2 +2 42 33 +3 42 3 +4 42 3 +5 42 33 +6 6 33 diff --git a/tests/queries/0_stateless/02155_default_keyword_format_values.sh b/tests/queries/0_stateless/02155_default_keyword_format_values.sh new file mode 100755 index 00000000000..53f88747fdc --- /dev/null +++ b/tests/queries/0_stateless/02155_default_keyword_format_values.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "CREATE TABLE IF NOT EXISTS default_table (x UInt32, y UInt32 DEFAULT 42, z UInt32 DEFAULT 33) ENGINE = Memory;" + +echo "(DEFAULT, 1, DEFAULT), (1, DEFAULT, 2)" | $CLICKHOUSE_CLIENT --input_format_values_interpret_expressions=0 -q "INSERT INTO default_table FORMAT Values" +echo "(2, DEFAULT), (3, 3)" | $CLICKHOUSE_CLIENT --input_format_values_interpret_expressions=0 -q "INSERT INTO default_table(x, z) FORMAT Values" + +echo "(DEFAULT, DEFAULT, DEFAULT), (4, DEFAULT, 3)" | $CLICKHOUSE_CLIENT --input_format_values_interpret_expressions=1 -q "INSERT INTO default_table FORMAT Values" +echo "(5, DEFAULT), (6, 6)" | $CLICKHOUSE_CLIENT --input_format_values_interpret_expressions=1 -q "INSERT INTO default_table(x, y) FORMAT Values" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM default_table ORDER BY x, y"; +$CLICKHOUSE_CLIENT --query="DROP TABLE default_table" diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 6df60403ae0..2dc83f1eaa5 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -1,13 +1,13 @@ SELECT count() FROM t_02156_merge1 -PREWHERE k = 1 -WHERE (k = 1) AND notEmpty(v) +PREWHERE k = 3 +WHERE (k = 3) AND notEmpty(v) 2 SELECT count() FROM t_02156_merge2 -WHERE (k = 1) AND notEmpty(v) +WHERE (k = 3) AND notEmpty(v) 2 SELECT count() FROM t_02156_merge3 -WHERE (k = 1) AND notEmpty(v) +WHERE (k = 3) AND notEmpty(v) 2 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index e033005e014..69fa9ac5ee2 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -20,14 +20,14 @@ INSERT INTO t_02156_mt1 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_mt2 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_log SELECT number, toString(number) FROM numbers(10000); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge1 WHERE k = 1 AND notEmpty(v); -SELECT count() FROM t_02156_merge1 WHERE k = 1 AND notEmpty(v); +EXPLAIN SYNTAX SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge2 WHERE k = 1 AND notEmpty(v); -SELECT count() FROM t_02156_merge2 WHERE k = 1 AND notEmpty(v); +EXPLAIN SYNTAX SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge3 WHERE k = 1 AND notEmpty(v); -SELECT count() FROM t_02156_merge3 WHERE k = 1 AND notEmpty(v); +EXPLAIN SYNTAX SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); DROP TABLE IF EXISTS t_02156_mt1; DROP TABLE IF EXISTS t_02156_mt2; diff --git a/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.reference b/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.reference deleted file mode 100644 index 3c26be9d9b2..00000000000 --- a/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.reference +++ /dev/null @@ -1,9 +0,0 @@ --360 --180.6 --180 --1 -0 -1 -180 -180.5 -360 diff --git a/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.sql b/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.sql deleted file mode 100644 index b30fc68725b..00000000000 --- a/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.sql +++ /dev/null @@ -1,21 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS h3_indexes; - - -CREATE TABLE h3_indexes (degrees Float64) ENGINE = Memory; - - -INSERT INTO h3_indexes VALUES (-1); -INSERT INTO h3_indexes VALUES (-180); -INSERT INTO h3_indexes VALUES (-180.6); -INSERT INTO h3_indexes VALUES (-360); -INSERT INTO h3_indexes VALUES (0); -INSERT INTO h3_indexes VALUES (1); -INSERT INTO h3_indexes VALUES (180); -INSERT INTO h3_indexes VALUES (180.5); -INSERT INTO h3_indexes VALUES (360); - -select h3RadsToDegs(h3DegsToRads(degrees)) from h3_indexes order by degrees; - -DROP TABLE h3_indexes; diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect new file mode 100755 index 00000000000..4f006b926bd --- /dev/null +++ b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect @@ -0,0 +1,21 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 20 +match_max 100000 + +expect_after { + eof { exp_continue } + timeout { exit 1 } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" + +expect ":) " +send -- "insert into table function null() format TSV some trash here 123 \n 456\r" +expect -re ".*DB::Exception: Table function 'null' requires 'structure'.*\r" +expect ":) " + +send -- "" +expect eof diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02165_auto_format_by_file_extension.reference b/tests/queries/0_stateless/02165_auto_format_by_file_extension.reference new file mode 100644 index 00000000000..ca3d2dd1d80 --- /dev/null +++ b/tests/queries/0_stateless/02165_auto_format_by_file_extension.reference @@ -0,0 +1,40 @@ +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +{ + "meta": + [ + { + "name": "id", + "type": "UInt64" + }, + { + "name": "name", + "type": "String" + } + ], + + "data": + [ + { + "id": "1", + "name": "one" + }, + { + "id": "2", + "name": "tow" + } + ], + + "rows": 2, diff --git a/tests/queries/0_stateless/02165_auto_format_by_file_extension.sh b/tests/queries/0_stateless/02165_auto_format_by_file_extension.sh new file mode 100755 index 00000000000..ba39f4b59bf --- /dev/null +++ b/tests/queries/0_stateless/02165_auto_format_by_file_extension.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +[ -e "${CLICKHOUSE_TMP}"/hello.csv ] && rm "${CLICKHOUSE_TMP}"/hello.csv +[ -e "${CLICKHOUSE_TMP}"/world.csv.gz ] && rm "${CLICKHOUSE_TMP}"/world.csv.gz +[ -e "${CLICKHOUSE_TMP}"/hello.world.csv ] && rm "${CLICKHOUSE_TMP}"/hello.world.csv +[ -e "${CLICKHOUSE_TMP}"/hello.world.csv.xz ] && rm "${CLICKHOUSE_TMP}"/hello.world.csv.xz +[ -e "${CLICKHOUSE_TMP}"/.htaccess.json ] && rm "${CLICKHOUSE_TMP}"/.htaccess.json +[ -e "${CLICKHOUSE_TMP}"/example.com. ] && rm "${CLICKHOUSE_TMP}"/example.com. +[ -e "${CLICKHOUSE_TMP}"/museum...protobuf ] && rm "${CLICKHOUSE_TMP}"/museum...protobuf + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_out_tb;" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE 02165_out_tb (id UInt64, name String) Engine=Memory;" +${CLICKHOUSE_CLIENT} --query "INSERT INTO 02165_out_tb Values(1, 'one'), (2, 'tow');" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE 02165_in_tb (id UInt64, name String) Engine=Memory;" + + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/hello.csv';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/hello.csv' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/world.csv.gz';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/world.csv.gz' COMPRESSION 'gz' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/hello.world.csv';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/hello.world.csv' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/hello.world.csv.xz';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/hello.world.csv.xz' COMPRESSION 'xz' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/example.com.';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/example.com.' FORMAT TabSeparated;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/museum...JSONEachRow';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/museum...JSONEachRow';" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/world.csv.gz';" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/.htaccess.json';" +head -n 26 ${CLICKHOUSE_TMP}/.htaccess.json + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_out_tb;" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_in_tb;" + +rm "${CLICKHOUSE_TMP}"/hello.csv +rm "${CLICKHOUSE_TMP}"/world.csv.gz +rm "${CLICKHOUSE_TMP}"/hello.world.csv +rm "${CLICKHOUSE_TMP}"/hello.world.csv.xz +rm "${CLICKHOUSE_TMP}"/.htaccess.json +rm "${CLICKHOUSE_TMP}"/example.com. +rm "${CLICKHOUSE_TMP}"/museum...JSONEachRow diff --git a/tests/queries/0_stateless/02165_insert_from_infile.reference b/tests/queries/0_stateless/02165_insert_from_infile.reference index e69de29bb2d..2a00a8faa31 100644 --- a/tests/queries/0_stateless/02165_insert_from_infile.reference +++ b/tests/queries/0_stateless/02165_insert_from_infile.reference @@ -0,0 +1,5 @@ +INSERT INTO test FROM INFILE data.file SELECT x +FROM input(\'x UInt32\') +INSERT INTO test FROM INFILE data.file WITH number AS x +SELECT number +FROM input(\'number UInt32\') diff --git a/tests/queries/0_stateless/02165_insert_from_infile.sql b/tests/queries/0_stateless/02165_insert_from_infile.sql index 1979c618c06..8cc851fa4e5 100644 --- a/tests/queries/0_stateless/02165_insert_from_infile.sql +++ b/tests/queries/0_stateless/02165_insert_from_infile.sql @@ -1,4 +1,4 @@ -EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' SELECT 1; -- { clientError SYNTAX_ERROR } +EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' SELECT x from input('x UInt32') FORMAT TSV; EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' WATCH view; -- { clientError SYNTAX_ERROR } EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' VALUES (1) -- { clientError SYNTAX_ERROR } -EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' WITH number AS x SELECT number FROM numbers(10); -- { clientError SYNTAX_ERROR } +EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' WITH number AS x SELECT number FROM input('number UInt32'); diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference new file mode 100644 index 00000000000..46f448cfba7 --- /dev/null +++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference @@ -0,0 +1 @@ +x LowCardinality(UInt64) diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh new file mode 100755 index 00000000000..e560dc10d2c --- /dev/null +++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1" + +$CLICKHOUSE_CLIENT -q "desc file('arrow.dict', 'Arrow')" + diff --git a/tests/queries/0_stateless/02167_format_from_file_extension.reference b/tests/queries/0_stateless/02167_format_from_file_extension.reference new file mode 100644 index 00000000000..b20551a19b9 --- /dev/null +++ b/tests/queries/0_stateless/02167_format_from_file_extension.reference @@ -0,0 +1,56 @@ +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 diff --git a/tests/queries/0_stateless/02167_format_from_file_extension.sh b/tests/queries/0_stateless/02167_format_from_file_extension.sh new file mode 100755 index 00000000000..a5902935450 --- /dev/null +++ b/tests/queries/0_stateless/02167_format_from_file_extension.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes +do + $CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)" + $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format')" + $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format', '$format')" +done + +$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'auto', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'RowBinary', 'x UInt64')" + +$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson')" +$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson', 'JSONEachRow', 'x UInt64')" + +$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack') settings input_format_msgpack_number_of_columns=1" +$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack', 'MsgPack', 'x UInt64')" + diff --git a/tests/queries/0_stateless/02168_avro_bug.reference b/tests/queries/0_stateless/02168_avro_bug.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02168_avro_bug.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02168_avro_bug.sql b/tests/queries/0_stateless/02168_avro_bug.sql new file mode 100644 index 00000000000..78eedf3258e --- /dev/null +++ b/tests/queries/0_stateless/02168_avro_bug.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest +insert into table function file('data.avro', 'Avro', 'x UInt64') select * from numbers(10); +insert into table function file('data.avro', 'Avro', 'x UInt64') select * from numbers(10); +insert into table function file('data.avro', 'Avro', 'x UInt64') select * from numbers(10); +select 'OK'; diff --git a/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.reference b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.reference new file mode 100644 index 00000000000..32c54e3eeea --- /dev/null +++ b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.reference @@ -0,0 +1,12 @@ +5 +6 +7 +8 +9 +10 +0 +1 +2 +3 +4 +5 diff --git a/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.sql b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.sql new file mode 100644 index 00000000000..8ac88ebc5c0 --- /dev/null +++ b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS counter; +CREATE TABLE counter (id UInt64, createdAt DateTime) ENGINE = MergeTree() ORDER BY id; +INSERT INTO counter SELECT number, now() FROM numbers(500); + +DROP TABLE IF EXISTS vcounter; +CREATE VIEW vcounter AS SELECT intDiv(id, 10) AS tens, max(createdAt) AS maxid FROM counter GROUP BY tens; + +SELECT tens FROM vcounter ORDER BY tens ASC LIMIT 100 SETTINGS limit = 6, offset = 5; + +SELECT tens FROM vcounter ORDER BY tens ASC LIMIT 100 SETTINGS limit = 6, offset = 0; +DROP TABLE vcounter; +DROP TABLE counter; diff --git a/tests/queries/0_stateless/02175_distributed_join_current_database.reference b/tests/queries/0_stateless/02175_distributed_join_current_database.reference new file mode 100644 index 00000000000..cfa2db0708e --- /dev/null +++ b/tests/queries/0_stateless/02175_distributed_join_current_database.reference @@ -0,0 +1,14 @@ +-- { echoOn } +select * from dist_02175 l join local_02175 r using dummy; +0 +0 +select * from dist_02175 l global join local_02175 r using dummy; +0 +0 +-- explicit database for distributed table +select * from remote('127.1', currentDatabase(), dist_02175) l join local_02175 r using dummy; +0 +0 +select * from remote('127.1', currentDatabase(), dist_02175) l global join local_02175 r using dummy; +0 +0 diff --git a/tests/queries/0_stateless/02175_distributed_join_current_database.sql b/tests/queries/0_stateless/02175_distributed_join_current_database.sql new file mode 100644 index 00000000000..94b949df0de --- /dev/null +++ b/tests/queries/0_stateless/02175_distributed_join_current_database.sql @@ -0,0 +1,19 @@ +-- Tags: shard + +drop table if exists local_02175; +drop table if exists dist_02175; + +create table local_02175 engine=Memory() as select * from system.one; +create table dist_02175 as local_02175 engine=Distributed(test_cluster_two_shards, currentDatabase(), local_02175); + +-- { echoOn } +select * from dist_02175 l join local_02175 r using dummy; +select * from dist_02175 l global join local_02175 r using dummy; + +-- explicit database for distributed table +select * from remote('127.1', currentDatabase(), dist_02175) l join local_02175 r using dummy; +select * from remote('127.1', currentDatabase(), dist_02175) l global join local_02175 r using dummy; + +-- { echoOff } +drop table local_02175; +drop table dist_02175; diff --git a/tests/queries/0_stateless/02176_dict_get_has_implicit_key_cast.reference b/tests/queries/0_stateless/02176_dict_get_has_implicit_key_cast.reference new file mode 100644 index 00000000000..d48bdc315ec --- /dev/null +++ b/tests/queries/0_stateless/02176_dict_get_has_implicit_key_cast.reference @@ -0,0 +1,14 @@ +Value +Value +Value +1 +1 +1 +Value +Value +Value +Value +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02176_dict_get_has_implicit_key_cast.sql b/tests/queries/0_stateless/02176_dict_get_has_implicit_key_cast.sql new file mode 100644 index 00000000000..54f5c12cdb4 --- /dev/null +++ b/tests/queries/0_stateless/02176_dict_get_has_implicit_key_cast.sql @@ -0,0 +1,67 @@ +DROP TABLE IF EXISTS 02176_test_simple_key_table; +CREATE TABLE 02176_test_simple_key_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 02176_test_simple_key_table VALUES (0, 'Value'); + +DROP DICTIONARY IF EXISTS 02176_test_simple_key_dictionary; +CREATE DICTIONARY 02176_test_simple_key_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE '02176_test_simple_key_table')) +LAYOUT(DIRECT()); + +SELECT dictGet('02176_test_simple_key_dictionary', 'value', toUInt64(0)); +SELECT dictGet('02176_test_simple_key_dictionary', 'value', toUInt8(0)); +SELECT dictGet('02176_test_simple_key_dictionary', 'value', '0'); +SELECT dictGet('02176_test_simple_key_dictionary', 'value', [0]); --{serverError 43} + +SELECT dictHas('02176_test_simple_key_dictionary', toUInt64(0)); +SELECT dictHas('02176_test_simple_key_dictionary', toUInt8(0)); +SELECT dictHas('02176_test_simple_key_dictionary', '0'); +SELECT dictHas('02176_test_simple_key_dictionary', [0]); --{serverError 43} + +DROP DICTIONARY 02176_test_simple_key_dictionary; +DROP TABLE 02176_test_simple_key_table; + +DROP TABLE IF EXISTS 02176_test_complex_key_table; +CREATE TABLE 02176_test_complex_key_table +( + id UInt64, + id_key String, + value String +) ENGINE=TinyLog; + +INSERT INTO 02176_test_complex_key_table VALUES (0, '0', 'Value'); + +DROP DICTIONARY IF EXISTS 02176_test_complex_key_dictionary; +CREATE DICTIONARY 02176_test_complex_key_dictionary +( + id UInt64, + id_key String, + value String +) +PRIMARY KEY id, id_key +SOURCE(CLICKHOUSE(TABLE '02176_test_complex_key_table')) +LAYOUT(COMPLEX_KEY_DIRECT()); + +SELECT dictGet('02176_test_complex_key_dictionary', 'value', tuple(toUInt64(0), '0')); +SELECT dictGet('02176_test_complex_key_dictionary', 'value', tuple(toUInt8(0), '0')); +SELECT dictGet('02176_test_complex_key_dictionary', 'value', tuple('0', '0')); +SELECT dictGet('02176_test_complex_key_dictionary', 'value', tuple([0], '0')); --{serverError 43} +SELECT dictGet('02176_test_complex_key_dictionary', 'value', tuple(toUInt64(0), 0)); + +SELECT dictHas('02176_test_complex_key_dictionary', tuple(toUInt64(0), '0')); +SELECT dictHas('02176_test_complex_key_dictionary', tuple(toUInt8(0), '0')); +SELECT dictHas('02176_test_complex_key_dictionary', tuple('0', '0')); +SELECT dictHas('02176_test_complex_key_dictionary', tuple([0], '0')); --{serverError 43} +SELECT dictHas('02176_test_complex_key_dictionary', tuple(toUInt64(0), 0)); + +DROP DICTIONARY 02176_test_complex_key_dictionary; +DROP TABLE 02176_test_complex_key_table; diff --git a/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.reference b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.reference new file mode 100644 index 00000000000..ae3a2b909c5 --- /dev/null +++ b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.reference @@ -0,0 +1,6 @@ +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-04 +1 +1 diff --git a/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql new file mode 100644 index 00000000000..7f62e187241 --- /dev/null +++ b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql @@ -0,0 +1,17 @@ +SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'UTC')); +SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Europe/Moscow')); +SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Canada/Atlantic')); +SELECT toStartOfWeek(toDateTime('1970-01-04 00:00:00')); + + +DROP TABLE IF EXISTS t02176; +CREATE TABLE t02176(timestamp DateTime) ENGINE = MergeTree +PARTITION BY toStartOfWeek(timestamp) +ORDER BY tuple(); + +INSERT INTO t02176 VALUES (1559952000); + +SELECT count() FROM t02176 WHERE timestamp >= toDateTime('1970-01-01 00:00:00'); +SELECT count() FROM t02176 WHERE identity(timestamp) >= toDateTime('1970-01-01 00:00:00'); + +DROP TABLE t02176; diff --git a/tests/queries/0_stateless/02177_temporary_table_current_database_http_session.reference b/tests/queries/0_stateless/02177_temporary_table_current_database_http_session.reference new file mode 100644 index 00000000000..78f5d214b4d --- /dev/null +++ b/tests/queries/0_stateless/02177_temporary_table_current_database_http_session.reference @@ -0,0 +1 @@ +test_02177 diff --git a/tests/queries/0_stateless/02177_temporary_table_current_database_http_session.sh b/tests/queries/0_stateless/02177_temporary_table_current_database_http_session.sh new file mode 100755 index 00000000000..abc5fb49750 --- /dev/null +++ b/tests/queries/0_stateless/02177_temporary_table_current_database_http_session.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATABASE='test_02177' +SESSION_ID="$RANDOM$RANDOM$RANDOM" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP DATABASE IF EXISTS ${DATABASE}" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE DATABASE ${DATABASE}" + +CLICKHOUSE_URL_PARAMS="database=${DATABASE}" +[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}" +CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?${CLICKHOUSE_URL_PARAMS}" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'CREATE TEMPORARY TABLE t AS SELECT currentDatabase()' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'SELECT * FROM t' + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP DATABASE ${DATABASE}" diff --git a/tests/queries/0_stateless/02178_column_function_insert_from.reference b/tests/queries/0_stateless/02178_column_function_insert_from.reference new file mode 100644 index 00000000000..480bb5ef6e7 --- /dev/null +++ b/tests/queries/0_stateless/02178_column_function_insert_from.reference @@ -0,0 +1,2 @@ +1 0 ['1'] +1 1 ['1'] diff --git a/tests/queries/0_stateless/02178_column_function_insert_from.sql b/tests/queries/0_stateless/02178_column_function_insert_from.sql new file mode 100644 index 00000000000..13d1ebb4788 --- /dev/null +++ b/tests/queries/0_stateless/02178_column_function_insert_from.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS TESTTABLE; + +CREATE TABLE TESTTABLE ( + _id UInt64, pt String, attr_list Array(String) +) ENGINE = MergeTree() PARTITION BY (pt) ORDER BY tuple(); + +INSERT INTO TESTTABLE values (0,'0',['1']), (1,'1',['1']); + +SET max_threads = 1; + +SELECT attr, _id, arrayFilter(x -> (x IN (select '1')), attr_list) z +FROM TESTTABLE ARRAY JOIN z AS attr ORDER BY _id LIMIT 3 BY attr; + +DROP TABLE TESTTABLE; diff --git a/utils/changelog/README.md b/utils/changelog/README.md index 28135d088ac..cd8f8da9b61 100644 --- a/utils/changelog/README.md +++ b/utils/changelog/README.md @@ -5,7 +5,7 @@ Generate github token: Dependencies: ``` - apt-get install git curl jq python3 python3-fuzzywuzzy +sudo apt-get install git curl jq python3 python3-fuzzywuzzy ``` Update information about tags: diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 22b5faa0fcb..6170f2ee28a 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -256,7 +256,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | # Trailing whitespaces find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | grep -vP $EXCLUDE_DIRS | - xargs grep -P ' $' | grep -P '.' && echo "^ Trailing whitespaces." + xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces." # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | diff --git a/utils/check-style/check-workflows b/utils/check-style/check-workflows new file mode 100644 index 00000000000..1a0c0f9ecbe --- /dev/null +++ b/utils/check-style/check-workflows @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +act --list 1>/dev/null 2>&1 || act --list 2>&1 + +actionlint diff --git a/website/blog/en/2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md b/website/blog/en/2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md similarity index 98% rename from website/blog/en/2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md rename to website/blog/en/2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md index 3f38d31b2f7..45a4628ed3a 100644 --- a/website/blog/en/2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md +++ b/website/blog/en/2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md @@ -1,6 +1,6 @@ --- title: 'Admixer Aggregates Over 1 Billion Unique Users a Day using ClickHouse' -image: 'https://blog-images.clickhouse.com/en/2022/admixer-case-study/featured.jpg' +image: 'https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/featured.jpg' date: '2022-01-11' author: 'Vladimir Zakrevsky' tags: ['company'] @@ -44,7 +44,7 @@ Thus we needed to: * Be able to scale the data warehouse as the number of requests grew; * Have full control over our costs. -![Profile Report](https://blog-images.clickhouse.com/en/2022/admixer-case-study/profile-report.png) +![Profile Report](https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/profile-report.png) This image shows the Profile Report. Any Ad Campaign in Admixer is split by Line Items (Profiles). It is possible to overview detailed reports by each Profile including Date-Time Statistics, Geo, Domans, SSPs. This report is also updated in real time. @@ -69,11 +69,11 @@ ClickHouse helps to cope with the challenges above and provides the following be Our architecture changed from 2016 to 2020. There are two diagrams below: the state we started and the state we came to. -![Architecture 2016](https://blog-images.clickhouse.com/en/2022/admixer-case-study/architecture-2016.png) +![Architecture 2016](https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/architecture-2016.png) _Architecture 2016_ -![Architecture 2020](https://blog-images.clickhouse.com/en/2022/admixer-case-study/architecture-2020.png) +![Architecture 2020](https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/architecture-2020.png) _Architecture 2020_ @@ -131,5 +131,3 @@ Today, the company has over 100 supply and demand partners, 3,000+ customers, an For more information please visit: [https://admixer.com/](https://admixer.com/) - - diff --git a/website/blog/en/redirects.txt b/website/blog/en/redirects.txt index 80a57d38ebc..2e4cd6e2dd4 100644 --- a/website/blog/en/redirects.txt +++ b/website/blog/en/redirects.txt @@ -30,3 +30,4 @@ clickhouse-at-percona-live-2019.md 2019/clickhouse-at-percona-live-2019.md clickhouse-meetup-in-madrid-on-april-2-2019.md 2019/clickhouse-meetup-in-madrid-on-april-2-2019.md clickhouse-meetup-in-beijing-on-june-8-2019.md 2019/clickhouse-meetup-in-beijing-on-june-8-2019.md five-methods-for-database-obfuscation.md 2020/five-methods-for-database-obfuscation.md +2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md 2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md