diff --git a/.github/ISSUE_TEMPLATE/96_installation-issues.md b/.github/ISSUE_TEMPLATE/96_installation-issues.md index c322ccc92ce..e4be8af86b6 100644 --- a/.github/ISSUE_TEMPLATE/96_installation-issues.md +++ b/.github/ISSUE_TEMPLATE/96_installation-issues.md @@ -7,6 +7,8 @@ assignees: '' --- +**I have tried the following solutions**: https://clickhouse.com/docs/en/faq/troubleshooting/#troubleshooting-installation-errors + **Installation type** Packages, docker, single binary, curl? diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d3580f4c4d7..c1562d933a9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -1157,7 +1157,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1194,7 +1194,81 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseDatabaseReplicated2: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_database_replicated + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, DatabaseReplicated) + REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseDatabaseReplicated3: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_database_replicated + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, DatabaseReplicated) + REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1253,7 +1327,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - FunctionalStatelessTestReleaseS3: + FunctionalStatelessTestReleaseS3_0: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] steps: @@ -1265,6 +1339,45 @@ jobs: CHECK_NAME=Stateless tests (release, s3 storage) REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=2 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseS3_1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1301,7 +1414,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1338,7 +1451,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1375,7 +1488,118 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Debug3: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Debug4: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Debug5: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1412,7 +1636,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1449,7 +1673,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1486,7 +1710,81 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Tsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Tsan4: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1558,7 +1856,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1595,7 +1893,81 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (asan) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (asan) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1632,7 +2004,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1669,7 +2041,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1706,7 +2078,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1730,7 +2102,81 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - FunctionalStatelessTestUBsan: + FunctionalStatelessTestTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestTsan4: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestUBsan0: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] steps: @@ -1742,6 +2188,45 @@ jobs: CHECK_NAME=Stateless tests (ubsan) REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=2 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestUBsan1: + needs: [BuilderDebUBsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_ubsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (ubsan) + REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1778,7 +2263,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1815,7 +2300,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1852,7 +2337,118 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestMsan3: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (msan) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestMsan4: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (msan) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestMsan5: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (msan) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1889,7 +2485,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1926,7 +2522,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1963,7 +2559,81 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestDebug3: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestDebug4: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2681,7 +3351,7 @@ jobs: CHECK_NAME=Integration tests (asan) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2717,7 +3387,7 @@ jobs: CHECK_NAME=Integration tests (asan) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2753,7 +3423,115 @@ jobs: CHECK_NAME=Integration tests (asan) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2789,7 +3567,7 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2825,7 +3603,7 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2861,7 +3639,7 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2897,7 +3675,79 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=3 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsTsan4: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (tsan) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsTsan5: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (tsan) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2933,7 +3783,7 @@ jobs: CHECK_NAME=Integration tests (release) REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -2969,7 +3819,79 @@ jobs: CHECK_NAME=Integration tests (release) REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsRelease2: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (release) + REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsRelease3: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (release) + REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -3584,20 +4506,32 @@ jobs: - FunctionalStatelessTestDebug0 - FunctionalStatelessTestDebug1 - FunctionalStatelessTestDebug2 + - FunctionalStatelessTestDebug3 + - FunctionalStatelessTestDebug4 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseReplicated0 - FunctionalStatelessTestReleaseDatabaseReplicated1 + - FunctionalStatelessTestReleaseDatabaseReplicated2 + - FunctionalStatelessTestReleaseDatabaseReplicated3 - FunctionalStatelessTestReleaseWideParts - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 + - FunctionalStatelessTestAsan2 + - FunctionalStatelessTestAsan3 - FunctionalStatelessTestTsan0 - FunctionalStatelessTestTsan1 - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestTsan3 + - FunctionalStatelessTestTsan4 - FunctionalStatelessTestMsan0 - FunctionalStatelessTestMsan1 - FunctionalStatelessTestMsan2 - - FunctionalStatelessTestUBsan + - FunctionalStatelessTestMsan3 + - FunctionalStatelessTestMsan4 + - FunctionalStatelessTestMsan5 + - FunctionalStatelessTestUBsan0 + - FunctionalStatelessTestUBsan1 - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease - FunctionalStatefulTestAarch64 @@ -3605,13 +4539,17 @@ jobs: - FunctionalStatefulTestTsan - FunctionalStatefulTestMsan - FunctionalStatefulTestUBsan - - FunctionalStatelessTestReleaseS3 + - FunctionalStatelessTestReleaseS3_0 + - FunctionalStatelessTestReleaseS3_1 - FunctionalStatelessTestS3Debug0 - FunctionalStatelessTestS3Debug1 - FunctionalStatelessTestS3Debug2 + - FunctionalStatelessTestS3Debug4 + - FunctionalStatelessTestS3Debug5 - FunctionalStatelessTestS3Tsan0 - FunctionalStatelessTestS3Tsan1 - FunctionalStatelessTestS3Tsan2 + - FunctionalStatelessTestS3Tsan4 - StressTestDebug - StressTestAsan - StressTestTsan @@ -3625,12 +4563,19 @@ jobs: - IntegrationTestsAsan0 - IntegrationTestsAsan1 - IntegrationTestsAsan2 + - IntegrationTestsAsan3 + - IntegrationTestsAsan4 + - IntegrationTestsAsan5 - IntegrationTestsRelease0 - IntegrationTestsRelease1 + - IntegrationTestsRelease2 + - IntegrationTestsRelease3 - IntegrationTestsTsan0 - IntegrationTestsTsan1 - IntegrationTestsTsan2 - IntegrationTestsTsan3 + - IntegrationTestsTsan4 + - IntegrationTestsTsan5 - PerformanceComparisonX86-0 - PerformanceComparisonX86-1 - PerformanceComparisonX86-2 diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index a9172a8a2e2..e03e5c543c2 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -38,7 +38,7 @@ jobs: with: ref: master fetch-depth: 0 - - name: Generate versions + - name: Update versions, docker version, changelog, security env: GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} run: | @@ -51,6 +51,7 @@ jobs: --gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \ --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}" git add "./docs/changelogs/${GITHUB_TAG}.md" + python ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - name: Create Pull Request uses: peter-evans/create-pull-request@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 68767612892..0e41894b8bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v22.11, 2022-11-17](#2211)**
**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
@@ -11,6 +12,109 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+### ClickHouse release 22.11, 2022-11-17 + +#### Backward Incompatible Change +* `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). + +#### New Feature +* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). This is controlled by the `insert_keeper_max_retries` setting, which is disabled by default. [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). +* Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)). +* Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). +* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In the case where the types of Intervals are different, they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `**` glob support for recursive directory traversal of the filesystem and S3. Resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Introduce `s3_plain` disk type for write-once-read-many operations. Implement `ATTACH` of `MergeTree` table for `s3_plain` disk. [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). +* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). +* Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). +* Add function `ascii` like in Apache Spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). +* Add function `positive_modulo` (`pmod`) which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). +* Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). +* Add function `randCanonical`, which is similar to the `rand` function in Apache Spark or Impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). +* Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). +* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). +* Add generic implementation for arbitrary structured named collections, access type and `system.named_collections`. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Performance Improvement +* Parallelized merging of `uniqExact` states for aggregation without key, i.e. queries like `SELECT uniqExact(number) FROM table`. The improvement becomes noticeable when the number of unique keys approaches 10^6. Also `uniq` performance is slightly optimized. [#43072](https://github.com/ClickHouse/ClickHouse/pull/43072) ([Nikita Taranov](https://github.com/nickitat)). +* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)). +* Speed up AND and OR operators when they are sequenced. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Support parallel parsing for `LineAsString` input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). +* ClickHouse Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). +* A condition like `NOT LIKE 'prefix%'` can use the primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). + +#### Experimental Feature +* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). +* Ignore MySQL binlog SAVEPOINT event for MaterializedMySQL. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)). + +#### Improvement +* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so that the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). +* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode. Resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). +* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)). +* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to STDERR. It allows getting progress even if STDERR is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for `FixedString` input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). +* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). +* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)). +* Refactor function `tokens` to enable max tokens returned for related functions (disabled by default). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)). +* Allow to use `Date32` arguments for `formatDateTime` and `FROM_UNIXTIME` functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)). +* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Increase the size of upload part exponentially for backup to S3 to avoid errors about max 10 000 parts limit of the multipart upload to s3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)). +* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)). +* Add `oss` function and `OSS` table engine (this is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modify the `INFORMATION_SCHEMA` tables in a way so that ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add some functions for compatibility with PowerBI, when it connects using MySQL protocol [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Better usability for Dashboard on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)). + +#### Build/Testing/Packaging Improvement +* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)). +* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)). +* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)). +* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove some libraries from Ubuntu Docker image. [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Updated normaliser to clone the alias ast. Resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix race for backup of tables in `Lazy` databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix for `skip_unavailable_shards`: it did not work with the `s3Cluster` table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)). +* Fix schema inference in `s3Cluster` and improvement in `hdfsCluster`. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix retries while reading from URL table engines / table function. (retriable errors could be retries more times than needed, non-retriable errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). +* A segmentation fault related to DNS & c-ares has been reported and fixed. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). +* Fix typo in a setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix creating a Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). +* `(U)Int128` and `(U)Int256` values were correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a bug in functions parser that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix the locking in `truncate table`. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). +* Fix possible crash in `web` disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). +* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). +* Fix stack-use-after-return under ASAN build in the Create User query parser. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `lowerUTF8`/`upperUTF8` in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). +* Additional bound check was added to LZ4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). +* Fix rare possible hang on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect behavior with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). +* A null pointer will be generated when select if as from ‘three table join’ , For example, this SQL query: [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix memory sanitizer report in Cluster Discovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). +* Improve DateTime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). +* Fix ATTACH TABLE in `PostgreSQL` database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)). +* Fix several buffer over-reads in deserialization of carefully crafted aggregate function states. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). +* Fix function `if` in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix decimal math overflow in parsing DateTime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). +* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed unexpected behaviour of `Interval` types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)). + ### ClickHouse release 22.10, 2022-10-26 #### Backward Incompatible Change @@ -570,7 +674,7 @@ * Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). * Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). * Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). -* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). +* Remove subprocess run for Kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). * * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)). * Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)). * Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)). @@ -850,8 +954,8 @@ #### Upgrade Notes -* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. +* Now, background merges, mutations, and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes to the metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant in this area. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. * `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB. * Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/CMakeLists.txt b/CMakeLists.txt index d10bc63c15e..06e6f943fd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -442,8 +442,9 @@ elseif (OS_DARWIN) include(cmake/darwin/default_libs.cmake) elseif (OS_FREEBSD) include(cmake/freebsd/default_libs.cmake) +else() + link_libraries(global-group) endif () -link_libraries(global-group) if (NOT (OS_LINUX OR OS_DARWIN)) # Using system libs can cause a lot of warnings in includes (on macro expansion). @@ -592,7 +593,7 @@ add_subdirectory (programs) add_subdirectory (tests) add_subdirectory (utils) -include (cmake/sanitize_target_link_libraries.cmake) +include (cmake/sanitize_targets.cmake) # Build native targets if necessary get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS) diff --git a/SECURITY.md b/SECURITY.md index 0fb333c8ea3..a4f431d7552 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,3 +1,6 @@ + # Security Policy @@ -10,6 +13,7 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 22.11 | ✔️ | | 22.10 | ✔️ | | 22.9 | ✔️ | | 22.8 | ✔️ | @@ -61,5 +65,5 @@ As the security issue moves from triage, to identified fix, to release planning ## Public Disclosure Timing -A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. +A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index d10bc6ba723..bae03ad590a 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -220,13 +220,13 @@ struct statx { uint32_t stx_dev_minor; uint64_t spare[14]; }; -#endif int statx(int fd, const char *restrict path, int flag, unsigned int mask, struct statx *restrict statxbuf) { return syscall(SYS_statx, fd, path, flag, mask, statxbuf); } +#endif #include diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 11b37f5a7c8..d06d3918612 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54468) +SET(VERSION_REVISION 54469) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 11) +SET(VERSION_MINOR 12) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 98ab5a3c189232ea2a3dddb9d2be7196ae8b3434) -SET(VERSION_DESCRIBE v22.11.1.1-testing) -SET(VERSION_STRING 22.11.1.1) +SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77) +SET(VERSION_DESCRIBE v22.12.1.1-testing) +SET(VERSION_STRING 22.12.1.1) # end of autochange diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 1f92663a4b9..3e6e4907a71 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -23,6 +23,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) include (cmake/cxx.cmake) +link_libraries(global-group) target_link_libraries(global-group INTERFACE $ diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 65d5f0511d9..3e1f22ef2e4 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -24,6 +24,7 @@ find_package(Threads REQUIRED) include (cmake/unwind.cmake) include (cmake/cxx.cmake) +link_libraries(global-group) target_link_libraries(global-group INTERFACE $ diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 21bead7020c..23c5fc3e14f 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -34,6 +34,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +include (cmake/unwind.cmake) +include (cmake/cxx.cmake) + +# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies +# which are ok with static libraries but not with dynamic ones +link_libraries(global-group) + if (NOT OS_ANDROID) if (NOT USE_MUSL) # Our compatibility layer doesn't build under Android, many errors in musl. @@ -42,9 +49,6 @@ if (NOT OS_ANDROID) add_subdirectory(base/harmful) endif () -include (cmake/unwind.cmake) -include (cmake/cxx.cmake) - target_link_libraries(global-group INTERFACE -Wl,--start-group $ diff --git a/cmake/sanitize_target_link_libraries.cmake b/cmake/sanitize_targets.cmake similarity index 65% rename from cmake/sanitize_target_link_libraries.cmake rename to cmake/sanitize_targets.cmake index d66ea338a52..8f61da2009d 100644 --- a/cmake/sanitize_target_link_libraries.cmake +++ b/cmake/sanitize_targets.cmake @@ -1,3 +1,13 @@ +# https://stackoverflow.com/a/62311397/328260 +macro (get_all_targets_recursive targets dir) + get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach (subdir ${subdirectories}) + get_all_targets_recursive (${targets} ${subdir}) + endforeach () + get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + list (APPEND ${targets} ${current_targets}) +endmacro () + # When you will try to link target with the directory (that exists), cmake will # skip this without an error, only the following warning will be reported: # @@ -18,23 +28,12 @@ # -- but cannot be used with link_libraries() # - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize # -- this will work. - -# https://stackoverflow.com/a/62311397/328260 function (get_all_targets var) set (targets) get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}) set (${var} ${targets} PARENT_SCOPE) endfunction() -macro (get_all_targets_recursive targets dir) - get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) - foreach (subdir ${subdirectories}) - get_all_targets_recursive (${targets} ${subdir}) - endforeach () - get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) - list (APPEND ${targets} ${current_targets}) -endmacro () - -macro (sanitize_link_libraries target) +function (sanitize_link_libraries target) get_target_property(target_type ${target} TYPE) if (${target_type} STREQUAL "INTERFACE_LIBRARY") get_property(linked_libraries TARGET ${target} PROPERTY INTERFACE_LINK_LIBRARIES) @@ -48,9 +47,35 @@ macro (sanitize_link_libraries target) message(FATAL_ERROR "${target} requested to link with directory: ${linked_library}") endif() endforeach() -endmacro() - +endfunction() get_all_targets (all_targets) foreach (target ${all_targets}) sanitize_link_libraries(${target}) endforeach() + +# +# Do not allow to define -W* from contrib publically (INTERFACE/PUBLIC). +# +function (get_contrib_targets var) + set (targets) + get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}/contrib) + set (${var} ${targets} PARENT_SCOPE) +endfunction() +function (sanitize_interface_flags target) + get_target_property(target_type ${target} TYPE) + get_property(compile_definitions TARGET ${target} PROPERTY INTERFACE_COMPILE_DEFINITIONS) + get_property(compile_options TARGET ${target} PROPERTY INTERFACE_COMPILE_OPTIONS) + if (NOT "${compile_options}" STREQUAL "") + message(FATAL_ERROR "${target} set INTERFACE_COMPILE_OPTIONS to ${compile_options}. This is forbidden.") + endif() + if ("${compile_definitions}" MATCHES "-Wl,") + # linker option - OK + elseif ("${compile_definitions}" MATCHES "-W") + message(FATAL_ERROR "${target} contains ${compile_definitions} flags in INTERFACE_COMPILE_DEFINITIONS. This is forbidden.") + endif() +endfunction() +get_contrib_targets (contrib_targets) +foreach (contrib_target ${contrib_targets}) + sanitize_interface_flags(${contrib_target}) +endforeach() + diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 53c6ff58f83..8dc154e9d91 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -57,7 +57,7 @@ add_library(cxx ${SRCS}) set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") target_include_directories(cxx SYSTEM BEFORE PRIVATE $) -target_include_directories(cxx SYSTEM BEFORE PUBLIC $) +target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$:$>) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 8f1cf6ee98b..b717cec2d33 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.10.2.11" +ARG VERSION="22.11.1.1360" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index f50160321e1..8a5dc04681e 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.10.2.11" +ARG VERSION="22.11.1.1360" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 78f627bf45e..0afd6cc19f3 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -254,7 +254,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau start -./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" \ +./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \ || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv @@ -388,6 +388,11 @@ else rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: + # Turn on after 22.12 + rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: + # it uses recently introduced settings which previous versions may not have + rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: + start clickhouse-client --query="SELECT 'Server version: ', version()" @@ -448,6 +453,7 @@ else # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility echo "Check for Error messages in server log:" zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \ @@ -482,6 +488,7 @@ else -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ -e "Code: 269. DB::Exception: Destination table is myself" \ -e "Coordination::Exception: Connection loss" \ + -e "MutateFromLogEntryTask" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv diff --git a/docs/changelogs/v22.11.1.1360-stable.md b/docs/changelogs/v22.11.1.1360-stable.md new file mode 100644 index 00000000000..77ad54b4fd8 --- /dev/null +++ b/docs/changelogs/v22.11.1.1360-stable.md @@ -0,0 +1,249 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.11.1.1360-stable (0d211ed1984) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189) + +#### Backward Incompatible Change +* JSONExtract family of functions will now attempt to coerce to the request type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). + +#### New Feature +* - Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). +* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). +* Add Hudi and DeltaLake table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do)). +* Add 4LW command `csnp` for manually creating snapshots. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). +* Support for keeper request retries during insert into replicated merge trees. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). +* Add function ascii like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). +* Add function pmod which return non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). +* Published function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). +* Added S3 PUTs and GETs request per second rate throttling. Settings `s3_max_get_rps`, `s3_max_get_burst`, `s3_max_put_rps`, `s3_max_put_burst` are used to configure token bucket throttler. Can be used with both S3 ObjectStorage and S3 table function. Different limits can be configured for different S3 disks or endpoints. [#43014](https://github.com/ClickHouse/ClickHouse/pull/43014) ([Sergei Trifonov](https://github.com/serxa)). +* Add table function hudi and deltaLake. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). +* Add function factorial, as in Impala or Spark. [#43110](https://github.com/ClickHouse/ClickHouse/pull/43110) ([李扬](https://github.com/taiyang-li)). +* Add function randCanonical, which is similar to rand function in spark or impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). + +#### Performance Improvement +* Currently, the only saturable operators are And and Or, and their code paths are affected by this change. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)). +* Fixed slowness in JSONExtract with LowCardinality(String) tuples. [#42761](https://github.com/ClickHouse/ClickHouse/pull/42761) ([AlfVII](https://github.com/AlfVII)). +* Support parallel parsing for LineAsString input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). +* Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). +* Parallelized merging of `uniqExact` states for aggregation without a key, i.e. queries like `SELECT uniqExact(number) FROM table`. The improvement becomes noticeable when the number of unique keys approaches 10^6. Also `uniq` performance is slightly optimized. This closes [#4510](https://github.com/ClickHouse/ClickHouse/issues/4510). [#43072](https://github.com/ClickHouse/ClickHouse/pull/43072) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). +* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)). +* ClickHouse Client and ClickHouse Local will show progress by default even in non-interactive mode. If `/dev/tty` is available, the progress will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). +* - Add `notLike` to key condition atom map, so condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). +* Add support for FixedString input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). +* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). +* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)). +* Added ** glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add a new variable call `limit` in query_info, indicating whether this query is a limit-trivial query. If so, we will adjust the approximate total rows for later estimation. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). +* Implement `ATTACH` of `MergeTree` table for `s3_plain` disk (plus some fixes for `s3_plain`). [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). +* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)). +* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). +* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). +* Refactor FunctionTokens to enable max tokens returned for related functions(default disabled). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)). +* Added new field allow_readonly in system.table_functions to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow to use Date32 arguments for formatDateTime and FROM_UNIXTIME functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)). +* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Increase the size of upload part exponentially for backup to S3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)). +* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)). +* bugfix: [#42856](https://github.com/ClickHouse/ClickHouse/issues/42856) ignore Mysql binlog SAVEPOINT event. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). +* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add generic implementation for arbitrary structured named collections, access type and system.named_collections. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). +* add oss function and StorageOSS (This is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modify the `INFORMATION_SCHEMA` tables in a way so that now ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Disable `deltaLake` and `hudi` table functions in readonly mode. [#43316](https://github.com/ClickHouse/ClickHouse/pull/43316) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Bug Fix +* Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix race for backup of tables in Lazy databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)). +* fix skip_unavailable_shards does not work using s3Cluster table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)). + +#### Build/Testing/Packaging Improvement +* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)). +* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)). +* use llvm `l64.lld` in macOS suppress ld warnings, close [#42282](https://github.com/ClickHouse/ClickHouse/issues/42282). [#42470](https://github.com/ClickHouse/ClickHouse/pull/42470) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)). +* * Improve bugfix validation check: fix bug with skipping the check, port separate status in CI, run after check labels and style check. Close [#40349](https://github.com/ClickHouse/ClickHouse/issues/40349). [#42702](https://github.com/ClickHouse/ClickHouse/pull/42702) ([Vladimir C](https://github.com/vdimir)). +* Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Fix schema inference in s3Cluster and improve in hdfsCluster. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix retries while reading from http table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). +* A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). +* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix create Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). +* `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). +* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). +* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). +* Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). +* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). +* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* * Fix incorrect saved_block_sample with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). +* A null pointer will be generated when select if as from ‘three table join’ , For example, the SQL:. [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix memory sanitizer report in ClusterDiscovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). +* Fix datetime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). +* Fixes for s3_plain disk that will allow to attach Wide parts. [#42950](https://github.com/ClickHouse/ClickHouse/pull/42950) ([Azat Khuzhin](https://github.com/azat)). +* Fix ATTACH TABLE in PostgreSQL database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). +* Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)). +* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)). +* - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). +* Fix function if in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix decimal math overflow in parsing datetime with 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). +* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed unexpected behaviour of Interval types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)). +* * Fix logical error in `sumMap/minMap/maxMap` functions executing `TOTALS/ROLLUP/CUBE` on `NULL` values. Close [#43022](https://github.com/ClickHouse/ClickHouse/issues/43022). [#43232](https://github.com/ClickHouse/ClickHouse/pull/43232) ([Vladimir C](https://github.com/vdimir)). +* - Fix ubsan in AggregateFunctionMinMaxAny::read with high sizes. [#43249](https://github.com/ClickHouse/ClickHouse/pull/43249) ([Raúl Marín](https://github.com/Algunenano)). +* Fix IS (NOT) NULL operator priority in regard to other operators. [#43265](https://github.com/ClickHouse/ClickHouse/pull/43265) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Build Improvement + +* ... Add support for format ipv6 on s390x. [#42412](https://github.com/ClickHouse/ClickHouse/pull/42412) ([Suzy Wang](https://github.com/SuzyWangIBMer)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Sonar Cloud Workflow"'. [#42725](https://github.com/ClickHouse/ClickHouse/pull/42725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert " Keeper retries during insert (clean)"'. [#43116](https://github.com/ClickHouse/ClickHouse/pull/43116) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert " Keeper retries during insert (clean)""'. [#43122](https://github.com/ClickHouse/ClickHouse/pull/43122) ([Igor Nikonov](https://github.com/devcrafter)). +* NO CL ENTRY: 'Revert "Optimize TTL merge, completely expired parts can be removed in time"'. [#43134](https://github.com/ClickHouse/ClickHouse/pull/43134) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Randomize keeper fault injection settings in stress tests"'. [#43218](https://github.com/ClickHouse/ClickHouse/pull/43218) ([Alexander Gololobov](https://github.com/davenger)). +* NO CL ENTRY: 'Revert "S3 request per second rate throttling"'. [#43306](https://github.com/ClickHouse/ClickHouse/pull/43306) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Better logging for docs builder [#41903](https://github.com/ClickHouse/ClickHouse/pull/41903) ([filimonov](https://github.com/filimonov)). +* Save full server log in AST Fuzzer checks [#42316](https://github.com/ClickHouse/ClickHouse/pull/42316) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Build with libcxx(abi) 15 [#42513](https://github.com/ClickHouse/ClickHouse/pull/42513) ([Robert Schulze](https://github.com/rschu1ze)). +* Sonar Cloud Workflow [#42534](https://github.com/ClickHouse/ClickHouse/pull/42534) ([Julio Jimenez](https://github.com/juliojimenez)). +* Invalid type in where for Merge table (logical error) [#42576](https://github.com/ClickHouse/ClickHouse/pull/42576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix frequent memory drift message and clarify things in comments [#42582](https://github.com/ClickHouse/ClickHouse/pull/42582) ([Azat Khuzhin](https://github.com/azat)). +* Add functions for PowerBI connect [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Try to save `IDataPartStorage` interface [#42618](https://github.com/ClickHouse/ClickHouse/pull/42618) ([Anton Popov](https://github.com/CurtizJ)). +* Remove Ubuntu cruft [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer change setting into allow_experimental_analyzer [#42649](https://github.com/ClickHouse/ClickHouse/pull/42649) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer IQueryTreeNode remove getName method [#42651](https://github.com/ClickHouse/ClickHouse/pull/42651) ([Maksim Kita](https://github.com/kitaisreal)). +* Minor fix iotest_nonblock build [#42658](https://github.com/ClickHouse/ClickHouse/pull/42658) ([Jordi Villar](https://github.com/jrdi)). +* Add tests and doc for some url-related functions [#42664](https://github.com/ClickHouse/ClickHouse/pull/42664) ([Vladimir C](https://github.com/vdimir)). +* Update version_date.tsv and changelogs after v22.10.1.1875-stable [#42676](https://github.com/ClickHouse/ClickHouse/pull/42676) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix error handling in clickhouse_helper.py [#42678](https://github.com/ClickHouse/ClickHouse/pull/42678) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix execution of version_helper.py to use git tweaks [#42679](https://github.com/ClickHouse/ClickHouse/pull/42679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* MergeTree indexes use RPNBuilderTree [#42681](https://github.com/ClickHouse/ClickHouse/pull/42681) ([Maksim Kita](https://github.com/kitaisreal)). +* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Support optimize_syntax_fuse_functions for sum/count/avg via analyzer [#42689](https://github.com/ClickHouse/ClickHouse/pull/42689) ([Vladimir C](https://github.com/vdimir)). +* Update version after release [#42699](https://github.com/ClickHouse/ClickHouse/pull/42699) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.10.1.1877-stable [#42700](https://github.com/ClickHouse/ClickHouse/pull/42700) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* OrderByLimitByDuplicateEliminationPass improve performance [#42704](https://github.com/ClickHouse/ClickHouse/pull/42704) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer improve subqueries representation [#42705](https://github.com/ClickHouse/ClickHouse/pull/42705) ([Maksim Kita](https://github.com/kitaisreal)). +* Update version_date.tsv and changelogs after v22.9.4.32-stable [#42712](https://github.com/ClickHouse/ClickHouse/pull/42712) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.7.34-lts [#42713](https://github.com/ClickHouse/ClickHouse/pull/42713) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.7.7.24-stable [#42714](https://github.com/ClickHouse/ClickHouse/pull/42714) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Move SonarCloud Job to nightly [#42718](https://github.com/ClickHouse/ClickHouse/pull/42718) ([Julio Jimenez](https://github.com/juliojimenez)). +* Update version_date.tsv and changelogs after v22.8.8.3-lts [#42738](https://github.com/ClickHouse/ClickHouse/pull/42738) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Minor fix implicit cast CaresPTRResolver [#42747](https://github.com/ClickHouse/ClickHouse/pull/42747) ([Jordi Villar](https://github.com/jrdi)). +* Fix build on master [#42752](https://github.com/ClickHouse/ClickHouse/pull/42752) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v22.3.14.18-lts [#42759](https://github.com/ClickHouse/ClickHouse/pull/42759) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix anchor links [#42760](https://github.com/ClickHouse/ClickHouse/pull/42760) ([Sergei Trifonov](https://github.com/serxa)). +* Update version_date.tsv and changelogs after v22.3.14.23-lts [#42764](https://github.com/ClickHouse/ClickHouse/pull/42764) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update README.md [#42783](https://github.com/ClickHouse/ClickHouse/pull/42783) ([Yuko Takagi](https://github.com/yukotakagi)). +* Slightly better code with projections [#42794](https://github.com/ClickHouse/ClickHouse/pull/42794) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some races in MergeTree [#42805](https://github.com/ClickHouse/ClickHouse/pull/42805) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix typo in comments [#42809](https://github.com/ClickHouse/ClickHouse/pull/42809) ([Gabriel](https://github.com/Gabriel39)). +* Fix compilation of LLVM with cmake cache [#42816](https://github.com/ClickHouse/ClickHouse/pull/42816) ([Azat Khuzhin](https://github.com/azat)). +* Fix link in docs [#42821](https://github.com/ClickHouse/ClickHouse/pull/42821) ([Sergei Trifonov](https://github.com/serxa)). +* Link to proper place in docs [#42822](https://github.com/ClickHouse/ClickHouse/pull/42822) ([Sergei Trifonov](https://github.com/serxa)). +* Fix argument type check in AggregateFunctionAnalysisOfVariance [#42823](https://github.com/ClickHouse/ClickHouse/pull/42823) ([Vladimir C](https://github.com/vdimir)). +* Tests/lambda analyzer [#42824](https://github.com/ClickHouse/ClickHouse/pull/42824) ([Denny Crane](https://github.com/den-crane)). +* Fix Missing Quotes - Sonar Nightly [#42831](https://github.com/ClickHouse/ClickHouse/pull/42831) ([Julio Jimenez](https://github.com/juliojimenez)). +* Add exclusions from the Snyk scan [#42834](https://github.com/ClickHouse/ClickHouse/pull/42834) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Missing Env Vars - Sonar Nightly [#42843](https://github.com/ClickHouse/ClickHouse/pull/42843) ([Julio Jimenez](https://github.com/juliojimenez)). +* Fix typo [#42855](https://github.com/ClickHouse/ClickHouse/pull/42855) ([GoGoWen](https://github.com/GoGoWen)). +* Add timezone to 02458_datediff_date32 [#42857](https://github.com/ClickHouse/ClickHouse/pull/42857) ([Vladimir C](https://github.com/vdimir)). +* Adjust cancel and rerun workflow names to the actual [#42862](https://github.com/ClickHouse/ClickHouse/pull/42862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer subquery in JOIN TREE with aggregation [#42865](https://github.com/ClickHouse/ClickHouse/pull/42865) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix getauxval for sanitizer builds [#42866](https://github.com/ClickHouse/ClickHouse/pull/42866) ([Amos Bird](https://github.com/amosbird)). +* Update version_date.tsv and changelogs after v22.10.2.11-stable [#42871](https://github.com/ClickHouse/ClickHouse/pull/42871) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Better usability for dashboard.html on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)). +* Some fixes for ReplicatedMergeTree [#42878](https://github.com/ClickHouse/ClickHouse/pull/42878) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Validate Query Tree in debug [#42879](https://github.com/ClickHouse/ClickHouse/pull/42879) ([Dmitry Novik](https://github.com/novikd)). +* changed type name for s3 plain storage [#42890](https://github.com/ClickHouse/ClickHouse/pull/42890) ([Aleksandr](https://github.com/AVMusorin)). +* Cleanup implementation of regexpReplace(All|One) [#42907](https://github.com/ClickHouse/ClickHouse/pull/42907) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not show status for Bugfix validate check in non bugfix PRs [#42932](https://github.com/ClickHouse/ClickHouse/pull/42932) ([Vladimir C](https://github.com/vdimir)). +* fix(typo): Passible -> Possible [#42933](https://github.com/ClickHouse/ClickHouse/pull/42933) ([Yakko Majuri](https://github.com/yakkomajuri)). +* Pin the cryptography version to not break lambdas [#42934](https://github.com/ClickHouse/ClickHouse/pull/42934) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix: bad cast from type DB::ColumnLowCardinality to DB::ColumnString [#42937](https://github.com/ClickHouse/ClickHouse/pull/42937) ([Igor Nikonov](https://github.com/devcrafter)). +* Attach thread pool for loading parts to the query [#42947](https://github.com/ClickHouse/ClickHouse/pull/42947) ([Azat Khuzhin](https://github.com/azat)). +* Fix macOS M1 builds due to sprintf deprecation [#42962](https://github.com/ClickHouse/ClickHouse/pull/42962) ([Jordi Villar](https://github.com/jrdi)). +* Less use of CH-specific bit_cast() [#42968](https://github.com/ClickHouse/ClickHouse/pull/42968) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove some utils [#42972](https://github.com/ClickHouse/ClickHouse/pull/42972) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a bug in CAST function parser [#42980](https://github.com/ClickHouse/ClickHouse/pull/42980) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix old bug to remove `refs/head` from ref name [#42981](https://github.com/ClickHouse/ClickHouse/pull/42981) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add debug information to nightly builds [#42997](https://github.com/ClickHouse/ClickHouse/pull/42997) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add some guard rails around aggregation memory management [#42999](https://github.com/ClickHouse/ClickHouse/pull/42999) ([Raúl Marín](https://github.com/Algunenano)). +* Add `on: workflow_call` to debug CI [#43000](https://github.com/ClickHouse/ClickHouse/pull/43000) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer added identifier typo corrections [#43002](https://github.com/ClickHouse/ClickHouse/pull/43002) ([Maksim Kita](https://github.com/kitaisreal)). +* Simple fixes for restart replica description [#43004](https://github.com/ClickHouse/ClickHouse/pull/43004) ([Igor Nikonov](https://github.com/devcrafter)). +* Cleanup match code [#43006](https://github.com/ClickHouse/ClickHouse/pull/43006) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix TSan errors (correctly ignore _exit interception) [#43009](https://github.com/ClickHouse/ClickHouse/pull/43009) ([Azat Khuzhin](https://github.com/azat)). +* fix bandwidth throttlers initialization order [#43015](https://github.com/ClickHouse/ClickHouse/pull/43015) ([Sergei Trifonov](https://github.com/serxa)). +* Add test for issue [#42520](https://github.com/ClickHouse/ClickHouse/issues/42520) [#43027](https://github.com/ClickHouse/ClickHouse/pull/43027) ([Robert Schulze](https://github.com/rschu1ze)). +* Analyzer improve ARRAY JOIN with JOIN [#43048](https://github.com/ClickHouse/ClickHouse/pull/43048) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix projection part removal with zero-copy replication [#43060](https://github.com/ClickHouse/ClickHouse/pull/43060) ([alesapin](https://github.com/alesapin)). +* Fix msan warning [#43065](https://github.com/ClickHouse/ClickHouse/pull/43065) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer AST key condition crash fix [#43070](https://github.com/ClickHouse/ClickHouse/pull/43070) ([Maksim Kita](https://github.com/kitaisreal)). +* Better logging for mark range filtering on projection parts [#43076](https://github.com/ClickHouse/ClickHouse/pull/43076) ([Duc Canh Le](https://github.com/canhld94)). +* Fix ub type punning [#43088](https://github.com/ClickHouse/ClickHouse/pull/43088) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Analyzer improve aliases support for table expressions [#43089](https://github.com/ClickHouse/ClickHouse/pull/43089) ([Maksim Kita](https://github.com/kitaisreal)). +* Throw not implemented for window frame type 'groups' in analyzer [#43090](https://github.com/ClickHouse/ClickHouse/pull/43090) ([Vladimir C](https://github.com/vdimir)). +* Disable clickhouse local and client non-interactive progress by default. [#43092](https://github.com/ClickHouse/ClickHouse/pull/43092) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make error message after dropping current user more correct. [#43097](https://github.com/ClickHouse/ClickHouse/pull/43097) ([Vitaly Baranov](https://github.com/vitlibar)). +* More stable test [#43102](https://github.com/ClickHouse/ClickHouse/pull/43102) ([alesapin](https://github.com/alesapin)). +* Rewrite tests for memory overcommit [#43105](https://github.com/ClickHouse/ClickHouse/pull/43105) ([Dmitry Novik](https://github.com/novikd)). +* Fix trailing \n from SQLancer status [#43114](https://github.com/ClickHouse/ClickHouse/pull/43114) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix `test_keeper_four_word_command::test_cmd_stat` [#43115](https://github.com/ClickHouse/ClickHouse/pull/43115) ([Antonio Andelic](https://github.com/antonio2368)). +* Enable keeper fault injection for inserts in functional tests [#43117](https://github.com/ClickHouse/ClickHouse/pull/43117) ([Igor Nikonov](https://github.com/devcrafter)). +* Analyzer aggregation crash fix [#43118](https://github.com/ClickHouse/ClickHouse/pull/43118) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer aggregation totals crash fix [#43119](https://github.com/ClickHouse/ClickHouse/pull/43119) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve commit_status_helper.py [#43121](https://github.com/ClickHouse/ClickHouse/pull/43121) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Skip hash logging on sanitizer builds [#43129](https://github.com/ClickHouse/ClickHouse/pull/43129) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer improve JOIN with constants [#43141](https://github.com/ClickHouse/ClickHouse/pull/43141) ([Maksim Kita](https://github.com/kitaisreal)). +* Remove POCO_CLICKHOUSE_PATCH [#43146](https://github.com/ClickHouse/ClickHouse/pull/43146) ([Azat Khuzhin](https://github.com/azat)). +* Update CompressionCodecDeflateQpl.cpp [#43150](https://github.com/ClickHouse/ClickHouse/pull/43150) ([Tiaonmmn](https://github.com/Tiaonmmn)). +* Randomize keeper fault injection settings in stress tests [#43187](https://github.com/ClickHouse/ClickHouse/pull/43187) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix for missing columns bug with projections an ALTER UPDATE [#43189](https://github.com/ClickHouse/ClickHouse/pull/43189) ([Alexander Gololobov](https://github.com/davenger)). +* A workaround for LLVM bug, https://github.com/llvm/llvm-project/issues/58633 [#43195](https://github.com/ClickHouse/ClickHouse/pull/43195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Stop `ConfigReloader` first to avoid data race [#43201](https://github.com/ClickHouse/ClickHouse/pull/43201) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix typo [#43203](https://github.com/ClickHouse/ClickHouse/pull/43203) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Miscellaneous changes [#43206](https://github.com/ClickHouse/ClickHouse/pull/43206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky 02449_check_dependencies_and_table_shutdown [#43212](https://github.com/ClickHouse/ClickHouse/pull/43212) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add test to check [#43167](https://github.com/ClickHouse/ClickHouse/issues/43167) for all builds [#43216](https://github.com/ClickHouse/ClickHouse/pull/43216) ([Ilya Yatsishin](https://github.com/qoega)). +* Don't throw if shared ID already created in `StorageReplicatedMergeTree` [#43244](https://github.com/ClickHouse/ClickHouse/pull/43244) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix nullptr dereference in collectScopeValidIdentifiersForTypoCorrection [#43245](https://github.com/ClickHouse/ClickHouse/pull/43245) ([Vladimir C](https://github.com/vdimir)). +* Better message in wait_zookeeper_to_start [#43256](https://github.com/ClickHouse/ClickHouse/pull/43256) ([Vladimir C](https://github.com/vdimir)). +* Make test_global_overcommit_tracker non-parallel [#43266](https://github.com/ClickHouse/ClickHouse/pull/43266) ([Dmitry Novik](https://github.com/novikd)). +* Rename canonicalRand to randCanonical [#43283](https://github.com/ClickHouse/ClickHouse/pull/43283) ([Nikita Taranov](https://github.com/nickitat)). +* check limits for an AST in select parser fuzzer [#43285](https://github.com/ClickHouse/ClickHouse/pull/43285) ([Sema Checherinda](https://github.com/CheSema)). +* Allow autoremoval of old parts if detach_not_byte_identical_parts enabled [#43287](https://github.com/ClickHouse/ClickHouse/pull/43287) ([filimonov](https://github.com/filimonov)). +* `pmod`: compatibility with Spark, better documentation [#43313](https://github.com/ClickHouse/ClickHouse/pull/43313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v22.8.9.24-lts.md b/docs/changelogs/v22.8.9.24-lts.md new file mode 100644 index 00000000000..e1f4c2bcdf0 --- /dev/null +++ b/docs/changelogs/v22.8.9.24-lts.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.8.9.24-lts (a1b69551d40) FIXME as compared to v22.8.8.3-lts (ac5a6cababc) + +#### Performance Improvement +* Backported in [#43012](https://github.com/ClickHouse/ClickHouse/issues/43012): Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Improvement +* Backported in [#42840](https://github.com/ClickHouse/ClickHouse/issues/42840): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42964](https://github.com/ClickHouse/ClickHouse/issues/42964): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#43040](https://github.com/ClickHouse/ClickHouse/issues/43040): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42720](https://github.com/ClickHouse/ClickHouse/issues/42720): Fixed `Unknown identifier (aggregate-function)` exception which appears when a user tries to calculate WINDOW ORDER BY/PARTITION BY expressions over aggregate functions: ``` CREATE TABLE default.tenk1 ( `unique1` Int32, `unique2` Int32, `ten` Int32 ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192; SELECT ten, sum(unique1) + sum(unique2) AS res, rank() OVER (ORDER BY sum(unique1) + sum(unique2) ASC) AS rank FROM _complex GROUP BY ten ORDER BY ten ASC; ``` which gives: ``` Code: 47. DB::Exception: Received from localhost:9000. DB::Exception: Unknown identifier: sum(unique1); there are columns: unique1, unique2, ten: While processing sum(unique1) + sum(unique2) ASC. (UNKNOWN_IDENTIFIER) ```. [#39762](https://github.com/ClickHouse/ClickHouse/pull/39762) ([Vladimir Chebotaryov](https://github.com/quickhouse)). +* Backported in [#42748](https://github.com/ClickHouse/ClickHouse/issues/42748): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#43062](https://github.com/ClickHouse/ClickHouse/issues/43062): Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Do not warn about kvm-clock [#41217](https://github.com/ClickHouse/ClickHouse/pull/41217) ([Sergei Trifonov](https://github.com/serxa)). +* Revert revert 41268 disable s3 parallel write for part moves to disk s3 [#42617](https://github.com/ClickHouse/ClickHouse/pull/42617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index db983ab9c68..484fd265c3d 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -127,6 +127,10 @@ The following settings can be set before query execution or placed into configur - `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`. - `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. - `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. @@ -142,6 +146,7 @@ The following settings can be specified in configuration file for given endpoint - `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. - `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. +- `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional. **Example:** diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md index 486c41c2496..21f857510f7 100644 --- a/docs/en/engines/table-engines/log-family/index.md +++ b/docs/en/engines/table-engines/log-family/index.md @@ -28,7 +28,7 @@ Engines: During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently. -- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md/#alter-mutations). +- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). - Do not support indexes. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7dfb5a9fed7..7614a09c018 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -537,7 +537,7 @@ TTL time_column TTL time_column + interval ``` -To define `interval`, use [time interval](/docs/en/sql-reference/operators/index.md/#operators-datetime) operators, for example: +To define `interval`, use [time interval](/docs/en/sql-reference/operators/index.md#operators-datetime) operators, for example: ``` sql TTL date_time + INTERVAL 1 MONTH @@ -860,7 +860,7 @@ The number of threads performing background moves of data parts can be changed b In the case of `MergeTree` tables, data is getting to disk in different ways: - As a result of an insert (`INSERT` query). -- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md/#alter-mutations). +- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). - When downloading from another replica. - As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). @@ -940,6 +940,10 @@ Optional parameters: - `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. - `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. S3 disk can be configured as `main` or `cold` storage: ``` xml diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 67b595d0fa0..ead1a76992e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -20,7 +20,7 @@ Replication works at the level of an individual table, not the entire server. A Replication does not depend on sharding. Each shard has its own independent replication. -Compressed data for `INSERT` and `ALTER` queries is replicated (for more information, see the documentation for [ALTER](/docs/en/sql-reference/statements/alter/index.md/#query_language_queries_alter)). +Compressed data for `INSERT` and `ALTER` queries is replicated (for more information, see the documentation for [ALTER](/docs/en/sql-reference/statements/alter/index.md#query_language_queries_alter)). `CREATE`, `DROP`, `ATTACH`, `DETACH` and `RENAME` queries are executed on a single server and are not replicated: diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index 0e51a8b7696..a49214bd00a 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -59,7 +59,7 @@ Main use-cases for `Join`-engine tables are following: ### Deleting Data {#deleting-data} -`ALTER DELETE` queries for `Join`-engine tables are implemented as [mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations). `DELETE` mutation reads filtered data and overwrites data of memory and disk. +`ALTER DELETE` queries for `Join`-engine tables are implemented as [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations). `DELETE` mutation reads filtered data and overwrites data of memory and disk. ### Limitations and Settings {#join-limitations-and-settings} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 56708def497..3221b1a06fa 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -13,7 +13,7 @@ The supported formats are: | Format | Input | Output | |-------------------------------------------------------------------------------------------|------|--------| | [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | | [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | | [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | | [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | @@ -48,6 +48,7 @@ The supported formats are: | [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | | [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | | [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | +| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | | [TSKV](#tskv) | ✔ | ✔ | | [Pretty](#pretty) | ✗ | ✔ | | [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | @@ -1210,6 +1211,69 @@ SELECT * FROM json_each_row_nested - [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. - [output_format_json_validate_utf8](../operations/settings/settings.md#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. +## BSONEachRow {#bsoneachrow} + +In this format, ClickHouse formats/parses data as a sequence of BSON documents without any separator between them. +Each row is formatted as a single document and each column is formatted as a single BSON document field with column name as a key. + +For output it uses the following correspondence between ClickHouse types and BSON types: + +| ClickHouse type | BSON Type | +|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------| +| [Bool](../sql-reference/data-types/boolean.md) | `\x08` boolean | +| [Int8/UInt8](../sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int16UInt16](../sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int32](../sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [UInt32](../sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Int64/UInt64](../sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Float32/Float64](../sql-reference/data-types/float.md) | `\x01` double | +| [Date](../sql-reference/data-types/date.md)/[Date32](../sql-reference/data-types/date32.md) | `\x10` int32 | +| [DateTime](../sql-reference/data-types/datetime.md) | `\x12` int64 | +| [DateTime64](../sql-reference/data-types/datetime64.md) | `\x09` datetime | +| [Decimal32](../sql-reference/data-types/decimal.md) | `\x10` int32 | +| [Decimal64](../sql-reference/data-types/decimal.md) | `\x12` int64 | +| [Decimal128](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Decimal256](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [Int128/UInt128](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Int256/UInt256](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled | +| [UUID](../sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | +| [Array](../sql-reference/data-types/array.md) | `\x04` array | +| [Tuple](../sql-reference/data-types/tuple.md) | `\x04` array | +| [Named Tuple](../sql-reference/data-types/tuple.md) | `\x03` document | +| [Map](../sql-reference/data-types/map.md) (with String keys) | `\x03` document | + +For input it uses the following correspondence between BSON types and ClickHouse types: + +| BSON Type | ClickHouse Type | +|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `\x01` double | [Float32/Float64](../sql-reference/data-types/float.md) | +| `\x02` string | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x03` document | [Map](../sql-reference/data-types/map.md)/[Named Tuple](../sql-reference/data-types/tuple.md) | +| `\x04` array | [Array](../sql-reference/data-types/array.md)/[Tuple](../sql-reference/data-types/tuple.md) | +| `\x05` binary, `\x00` binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x02` old binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x03` old uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | +| `\x05` binary, `\x04` uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | +| `\x07` ObjectId | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x08` boolean | [Bool](../sql-reference/data-types/boolean.md) | +| `\x09` datetime | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `\x0A` null value | [NULL](../sql-reference/data-types/nullable.md) | +| `\x0D` JavaScript code | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x0E` symbol | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x10` int32 | [Int32/UInt32](../sql-reference/data-types/int-uint.md)/[Decimal32](../sql-reference/data-types/decimal.md) | +| `\x12` int64 | [Int64/UInt64](../sql-reference/data-types/int-uint.md)/[Decimal64](../sql-reference/data-types/decimal.md)/[DateTime64](../sql-reference/data-types/datetime64.md) | + +Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). +Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value. + +Note: this format don't work properly on Big-Endian platforms. + +### BSON format settings {#bson-format-settings} + +- [output_format_bson_string_as_string](../operations/settings/settings.md#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. +- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. + ## Native {#native} The most efficient format. Data is written and read by blocks in binary format. For each block, the number of rows, number of columns, column names and types, and parts of columns in this block are recorded one after another. In other words, this format is “columnar” – it does not convert columns to rows. This is the format used in the native interface for interaction between servers, for using the command-line client, and for C++ clients. diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 0324f742988..3f11cc3cf7b 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -57,7 +57,7 @@ Internal coordination settings are located in the `..` section and contain servers description. @@ -126,7 +126,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif`. +The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`. You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -328,6 +328,12 @@ target_committed_log_idx 101 last_snapshot_idx 50 ``` +- `rqld`: Request to become new leader. Return `Sent leadership request to leader.` if request sent or `Failed to send leadership request to leader.` if request not sent. Note that if node is already leader the outcome is same as the request is sent. + +``` +Sent leadership request to leader. +``` + ## Migration from ZooKeeper {#migration-from-zookeeper} Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration: diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 8603257ea55..eee4058c230 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -26,7 +26,7 @@ Ways to configure settings, in order of priority: - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). - - Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed. + - Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed. Settings that can only be made in the server config file are not covered in this section. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 7abe4affbd1..7494f3db71a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -276,7 +276,7 @@ Default value: 0. Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md/#create-default-values) instead of [NULL](../../sql-reference/syntax.md/#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable) data type. If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. -This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause. +This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#inserting-the-results-of-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause. Possible values: @@ -1619,8 +1619,8 @@ These functions can be transformed: - [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. - [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. - [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [isNull](../../sql-reference/operators/index.md/#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [isNotNull](../../sql-reference/operators/index.md/#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. - [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. - [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. - [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. @@ -2041,7 +2041,7 @@ Default value: 16. ## validate_polygons {#validate_polygons} -Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md/#pointinpolygon) function, if the polygon is self-intersecting or self-tangent. +Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent. Possible values: @@ -2227,7 +2227,7 @@ Default value: `0`. ## mutations_sync {#mutations_sync} -Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md/#mutations)) synchronously. +Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md#mutations)) synchronously. Possible values: @@ -2239,8 +2239,8 @@ Default value: `0`. **See Also** -- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries) -- [Mutations](../../sql-reference/statements/alter/index.md/#mutations) +- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [Mutations](../../sql-reference/statements/alter/index.md#mutations) ## ttl_only_drop_parts {#ttl_only_drop_parts} @@ -4784,7 +4784,7 @@ Possible values: Default value: 1. -## SQLInsert format settings {$sqlinsert-format-settings} +## SQLInsert format settings {#sqlinsert-format-settings} ### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} @@ -4815,3 +4815,17 @@ Default value: `false`. Quote column names with "`" characters Default value: `true`. + +## BSONEachRow format settings {#bson-each-row-format-settings} + +### output_format_bson_string_as_string {#output_format_bson_string_as_string} + +Use BSON String type instead of Binary for String columns. + +Disabled by default. + +### input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format BSONEachRow. + +Disabled by default. diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index 0d3b764846b..d8fb91a63f5 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -3,7 +3,7 @@ slug: /en/operations/system-tables/mutations --- # mutations -The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row. +The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row. Columns: @@ -45,7 +45,7 @@ If there were problems with mutating some data parts, the following columns cont **See Also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) - [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine - [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index cbabd9b27b1..bbd5385f44b 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -9,7 +9,7 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md/#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: diff --git a/docs/en/operations/system-tables/parts_columns.md b/docs/en/operations/system-tables/parts_columns.md index d934e01f245..68757ddfbff 100644 --- a/docs/en/operations/system-tables/parts_columns.md +++ b/docs/en/operations/system-tables/parts_columns.md @@ -9,7 +9,7 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md/#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 827a7e33ea3..0201462c0b6 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -189,10 +189,12 @@ preAllocSize=131072 # especially if there are a lot of clients. To prevent ZooKeeper from running # out of memory due to queued requests, ZooKeeper will throttle clients so that # there is no more than globalOutstandingLimit outstanding requests in the -# system. The default limit is 1,000.ZooKeeper logs transactions to a -# transaction log. After snapCount transactions are written to a log file a -# snapshot is started and a new transaction log file is started. The default -# snapCount is 10,000. +# system. The default limit is 1000. +# globalOutstandingLimit=1000 + +# ZooKeeper logs transactions to a transaction log. After snapCount transactions +# are written to a log file a snapshot is started and a new transaction log file +# is started. The default snapCount is 100000. snapCount=3000000 # If this option is defined, requests will be will logged to a trace file named diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index c8c7470d2cb..db41fdf2fc3 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -6,7 +6,7 @@ sidebar_label: Date32 # Date32 -A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1900-01-01. Allows storing values till 2299-12-31. +A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored as a signed 32-bit integer in native byte order with the value representing the days since 1970-01-01 (0 represents 1970-01-01 and negative values represent the days before 1970). **Examples** diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 06b5b8a6746..9f922a2cccb 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -7,7 +7,9 @@ import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dict # Dictionaries - +:::tip Tutorial +If you are getting started with Dictionaries in ClickHouse we have a tutorial that covers that topic. Take a look [here](/docs/en/tutorial.md). +::: You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. @@ -27,6 +29,8 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl - Configuration parameters. - Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. + + ## Creating a dictionary with a DDL query Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries: diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 4a6e46e1759..cccc02c2553 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -185,7 +185,7 @@ unhex(arg) **Arguments** -- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md). +- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex(). diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 430762a1885..bcd118ce0be 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -549,3 +549,33 @@ Result: │ 3.141592653589793 │ └───────────────────┘ ``` + + +## factorial(n) + +Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. + +The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater will cause exception throw. + + +**Syntax** + +``` sql +factorial(n) +``` + +**Example** + +Query: + +``` sql +SELECT factorial(10); +``` + +Result: + +``` text +┌─factorial(10)─┐ +│ 3628800 │ +└───────────────┘ +``` diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 08f2620a009..4efa2131eb6 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -24,7 +24,7 @@ Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type Uses a linear congruential generator. -## canonicalRand +## randCanonical The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). Non-deterministic. Return type is Float64. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 6bca0dbff42..ae8671ffa9d 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -254,7 +254,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#insert_query_insert-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index 4dcab030d13..30ed96c0b9c 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -10,7 +10,7 @@ sidebar_label: DELETE ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr ``` -Deletes data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Deletes data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). :::note @@ -25,6 +25,6 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries) +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) - [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 146c15e776e..a8cea63380c 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -270,7 +270,7 @@ ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' ## UPDATE IN PARTITION -Manipulates data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Manipulates data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). Syntax: @@ -290,7 +290,7 @@ ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2; ## DELETE IN PARTITION -Deletes data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Deletes data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). Syntax: diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index 99cb8fb8fd1..908d28d7ab1 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -11,6 +11,14 @@ Projections store data in a format that optimizes query execution, this feature You can define one or more projections for a table, and during the query analysis the projection with the least data to scan will be selected by ClickHouse without modifying the query provided by the user. +:::note Disk usage + +Projections will create internally a new hidden table, this means that more IO and space on disk will be required. +Example, If the projection has defined a different primary key, all the data from the original table will be duplicated. +::: + +You can see more technical details about how projections work internally on this [page](/docs/en/guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-multiple.md/#option-3-projections). + ## Example filtering without using primary keys Creating the table: @@ -138,15 +146,15 @@ The following operations with [projections](/docs/en/engines/table-engines/merge ## DROP PROJECTION -`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). ## MATERIALIZE PROJECTION -`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). ## CLEAR PROJECTION -`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files. diff --git a/docs/en/sql-reference/statements/alter/skipping-index.md b/docs/en/sql-reference/statements/alter/skipping-index.md index 2dadffc4527..037e4bc38c5 100644 --- a/docs/en/sql-reference/statements/alter/skipping-index.md +++ b/docs/en/sql-reference/statements/alter/skipping-index.md @@ -14,7 +14,7 @@ The following operations are available: - `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. -- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. +- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. The first two commands are lightweight in a sense that they only change metadata or remove files. diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index f40b72f7ab3..5d27c382982 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -10,7 +10,7 @@ sidebar_label: UPDATE ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr ``` -Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md/#mutations). +Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). :::note The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. @@ -24,7 +24,7 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md/#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md/#synchronicity-of-alter-queries) +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) - [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 5833c43f55d..85741117d2a 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -60,7 +60,7 @@ If you specify `POPULATE`, the existing table data is inserted into the view whe A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data won’t be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`. -The execution of [ALTER](../../../sql-reference/statements/alter/view.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view. +The execution of [ALTER](/docs/en/sql-reference/statements/alter/view.md) queries on materialized views has limitations, for example, you can not update the `SELECT` query, so this might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view. Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view. diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index f4a6ccb0c7d..c5995e067d1 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -47,6 +47,7 @@ Union - `AST` — Abstract syntax tree. - `SYNTAX` — Query text after AST-level optimizations. +- `QUERY TREE` — Query tree after Query Tree level optimizations. - `PLAN` — Query execution plan. - `PIPELINE` — Query execution pipeline. @@ -110,6 +111,32 @@ FROM CROSS JOIN system.numbers AS c ``` +### EXPLAIN QUERY TREE + +Settings: + +- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`. +- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`. +- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`. + +Example: +```sql +EXPLAIN QUERY TREE SELECT id, value FROM test_table; +``` + +``` +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + value String + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: value, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test_table +``` + ### EXPLAIN PLAN Dump query plan steps. diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index ac02e9ab5a1..2df8581c447 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -243,6 +243,54 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../../../sql-reference/statements/select/join.md) clause (in this case, the respective total values are combined). +## GROUP BY ALL + +`GROUP BY ALL` is equivalent to listing all the SELECT-ed expressions that are not aggregate functions. + +For example: + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY ALL +``` + +is the same as + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY a * 2, b +``` + +For a special case that if there is a function having both aggregate functions and other fields as its arguments, the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it. + +For example: + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY ALL +``` + +is the same as + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY substring(a, 4, 2), substring(a, 1, 2) +``` + ## Examples Example: diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md new file mode 100644 index 00000000000..4d1488ea640 --- /dev/null +++ b/docs/en/sql-reference/table-functions/format.md @@ -0,0 +1,75 @@ +--- +slug: /en/sql-reference/table-functions/format +sidebar_position: 56 +sidebar_label: format +--- + +# format + +Extracts table structure from data and parses it according to specified input format. + +**Syntax** + +``` sql +format(format_name, data) +``` + +**Parameters** + +- `format_name` — The [format](../../interfaces/formats.md#formats) of the data. +- `data` — String literal or constant expression that returns a string containing data in specified format + +**Returned value** + +A table with data parsed from `data` argument according specified format and extracted schema. + +**Examples** + +**Query:** +``` sql +:) select * from format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 112} +{"a": "World", "b": 124} +$$) +``` + +**Result:** + +```text +┌───b─┬─a─────┐ +│ 111 │ Hello │ +│ 123 │ World │ +│ 112 │ Hello │ +│ 124 │ World │ +└─────┴───────┘ +``` + +**Query:** +```sql + +:) desc format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 112} +{"a": "World", "b": 124} +$$) +``` + +**Result:** + +```text +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ b │ Nullable(Float64) │ │ │ │ │ │ +│ a │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +**See Also** + +- [Formats](../../interfaces/formats.md) + +[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/format) diff --git a/docs/ru/operations/external-authenticators/kerberos.md b/docs/ru/operations/external-authenticators/kerberos.md index 7b0702b2132..865ea639c89 100644 --- a/docs/ru/operations/external-authenticators/kerberos.md +++ b/docs/ru/operations/external-authenticators/kerberos.md @@ -98,7 +98,7 @@ ClickHouse предоставляет возможность аутентифи :::danger "Важно" - Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. + Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. :::info "" Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`. diff --git a/docs/ru/operations/settings/index.md b/docs/ru/operations/settings/index.md index 4e055405847..6806aea5135 100644 --- a/docs/ru/operations/settings/index.md +++ b/docs/ru/operations/settings/index.md @@ -24,7 +24,7 @@ slug: /ru/operations/settings/ - При запуске консольного клиента ClickHouse в не интерактивном режиме установите параметр запуска `--setting=value`. - При использовании HTTP API передавайте cgi-параметры (`URL?setting_1=value&setting_2=value...`). - - Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. + - Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. Настройки, которые можно задать только в конфигурационном файле сервера, в разделе не рассматриваются. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a070dbd5e10..58894611386 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -479,7 +479,7 @@ SELECT * FROM table_with_enum_column_for_tsv_insert; Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable). Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки. -Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`. +Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`. Возможные значения: diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 11ec72596c4..a8ace213075 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp; Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`). -Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). +Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 573b8d39926..4fa6ac4ce66 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -95,7 +95,7 @@ INSERT INTO t FORMAT TabSeparated Если в таблице объявлены [ограничения](../../sql-reference/statements/create/table.md#constraints), то их выполнимость будет проверена для каждой вставляемой строки. Если для хотя бы одной строки ограничения не будут выполнены, запрос будет остановлен. -### Вставка результатов `SELECT` {#insert_query_insert-select} +### Вставка результатов `SELECT` {#inserting-the-results-of-select} **Синтаксис** diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index 4479e24000b..f360a09eb10 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -270,7 +270,7 @@ SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers; └─────────────────┴────────┘ ``` -## SETTINGS в запросе SELECT {#settings-in-select} +## SETTINGS в запросе SELECT {#settings-in-select-query} Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md new file mode 120000 index 00000000000..cc5e3a5a142 --- /dev/null +++ b/docs/ru/sql-reference/table-functions/format.md @@ -0,0 +1 @@ +../../../en/sql-reference/table-functions/format.md \ No newline at end of file diff --git a/docs/zh/engines/table-engines/integrations/kafka.md b/docs/zh/engines/table-engines/integrations/kafka.md index 707ee962ace..c6f11d9efce 100644 --- a/docs/zh/engines/table-engines/integrations/kafka.md +++ b/docs/zh/engines/table-engines/integrations/kafka.md @@ -74,7 +74,7 @@ Kafka 特性: 消费的消息会被自动追踪,因此每个消息在不同的消费组里只会记录一次。如果希望获得两次数据,则使用另一个组名创建副本。 -消费组可以灵活配置并且在集群之间同步。例如,如果群集中有10个主题和5个表副本,则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 http://kafka.apache.org/intro。 +消费组可以灵活配置并且在集群之间同步。例如,如果群集中有10个主题和5个表副本,则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 [http://kafka.apache.org/intro](http://kafka.apache.org/intro)。 `SELECT` 查询对于读取消息并不是很有用(调试除外),因为每条消息只能被读取一次。使用物化视图创建实时线程更实用。您可以这样做: diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index 13b4c368a96..7e847c02dcc 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -164,7 +164,7 @@ SETTINGS index_granularity = 8192, index_granularity_bytes = 0;
  • index_granularity: 显式设置为其默认值8192。这意味着对于每一组8192行,主索引将有一个索引条目,例如,如果表包含16384行,那么索引将有两个索引条目。

  • -
  • index_granularity_bytes: 设置为0表示禁止字适应索引粒度。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目 +
  • index_granularity_bytes: 设置为0表示禁止自适应索引粒度。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目
    • 如果n小于8192,但n行的合并行数据大小大于或等于10MB (index_granularity_bytes的默认值)或
    • n达到8192
    • diff --git a/docs/zh/sql-reference/functions/encoding-functions.md b/docs/zh/sql-reference/functions/encoding-functions.md index 5e9260ff94a..846d6c58f40 100644 --- a/docs/zh/sql-reference/functions/encoding-functions.md +++ b/docs/zh/sql-reference/functions/encoding-functions.md @@ -181,7 +181,7 @@ unhex(arg) **参数** -- `arg` — 包含任意数量的十六进制数字的字符串。类型为:[String](../../sql-reference/data-types/string.md)。 +- `arg` — 包含任意数量的十六进制数字的字符串。类型为:[String](../../sql-reference/data-types/string.md),[FixedString](../../sql-reference/data-types/fixedstring.md)。 支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数,则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。对于数字参数, unhex()不执行 hex(N) 的倒数。 diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 23edfd633db..fd73be4fd93 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) 不支持对primary key或者sampling key中的列(在 `ENGINE` 表达式中用到的列)进行删除操作。改变包含在primary key中的列的类型时,如果操作不会导致数据的变化(例如,往Enum中添加一个值,或者将`DateTime` 类型改成 `UInt32`),那么这种操作是可行的。 -如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 +如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 `ALTER` 操作会阻塞对表的所有读写操作。换句话说,当一个大的 `SELECT` 语句和 `ALTER`同时执行时,`ALTER`会等待,直到 `SELECT` 执行结束。与此同时,当 `ALTER` 运行时,新的 sql 语句将会等待。 diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index 69762bf43bc..f199329829c 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -90,7 +90,7 @@ INSERT INTO t FORMAT TabSeparated 如果表中有一些[限制](../../sql-reference/statements/create/table.mdx#constraints),,数据插入时会逐行进行数据校验,如果这里面包含了不符合限制条件的数据,服务将会抛出包含限制信息的异常,这个语句也会被停止执行。 -### 使用`SELECT`的结果写入 {#insert_query_insert-select} +### 使用`SELECT`的结果写入 {#inserting-the-results-of-select} ``` sql INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... diff --git a/docs/zh/sql-reference/statements/select/group-by.md b/docs/zh/sql-reference/statements/select/group-by.md index 90b3c7660ee..31c1649bc30 100644 --- a/docs/zh/sql-reference/statements/select/group-by.md +++ b/docs/zh/sql-reference/statements/select/group-by.md @@ -77,6 +77,54 @@ sidebar_label: GROUP BY 您可以使用 `WITH TOTALS` 在子查询中,包括在子查询 [JOIN](../../../sql-reference/statements/select/join.md) 子句(在这种情况下,将各自的总值合并)。 +## GROUP BY ALL {#group-by-all} + +`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的字段进行`GROUP BY`。 + +例如 + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY ALL +``` + +效果等同于 + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY a * 2, b +``` + +对于一种特殊情况,如果一个 function 的参数中同时有聚合函数和其他字段,会对参数中能提取的最大非聚合字段进行`GROUP BY`。 + +例如: + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY ALL +``` + +效果等同于 + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY substring(a, 4, 2), substring(a, 1, 2) +``` + ## 例子 {#examples} 示例: diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md new file mode 120000 index 00000000000..cc5e3a5a142 --- /dev/null +++ b/docs/zh/sql-reference/table-functions/format.md @@ -0,0 +1 @@ +../../../en/sql-reference/table-functions/format.md \ No newline at end of file diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 028b4fbf8ab..1581b95213e 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -9,7 +9,10 @@ After=time-sync.target network-online.target Wants=time-sync.target [Service] -Type=simple +Type=notify + +# Switching off watchdog is very important for sd_notify to work correctly. +Environment=CLICKHOUSE_WATCHDOG_ENABLE=0 User=clickhouse Group=clickhouse Restart=always diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index a0d5344236e..bd505b319bb 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -160,7 +160,7 @@ void ClusterCopierApp::mainImpl() registerTableFunctions(); registerStorages(); registerDictionaries(); - registerDisks(); + registerDisks(/* global_skip_access_check= */ true); registerFormats(); static const std::string default_database = "_local"; diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp index 5b09a9c99a7..65eaf8b7108 100644 --- a/programs/copier/TaskTable.cpp +++ b/programs/copier/TaskTable.cpp @@ -45,7 +45,7 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati engine_push_str = config.getString(table_prefix + "engine", "rand()"); { - ParserStorage parser_storage; + ParserStorage parser_storage{ParserStorage::TABLE_ENGINE}; engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); engine_push_partition_key_ast = extractPartitionKey(engine_push_ast); primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", "); diff --git a/programs/diagnostics/go.mod b/programs/diagnostics/go.mod index ca85773ff7b..0a8bd007627 100644 --- a/programs/diagnostics/go.mod +++ b/programs/diagnostics/go.mod @@ -80,8 +80,8 @@ require ( go.opentelemetry.io/otel v1.4.1 // indirect go.opentelemetry.io/otel/trace v1.4.1 // indirect golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 // indirect - golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect - golang.org/x/text v0.3.7 // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect + golang.org/x/text v0.3.8 // indirect google.golang.org/genproto v0.0.0-20220617124728-180714bec0ad // indirect google.golang.org/grpc v1.47.0 // indirect google.golang.org/protobuf v1.28.0 // indirect diff --git a/programs/diagnostics/go.sum b/programs/diagnostics/go.sum index 0c90176eca9..1ee2e09f81c 100644 --- a/programs/diagnostics/go.sum +++ b/programs/diagnostics/go.sum @@ -1195,8 +1195,8 @@ golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1206,8 +1206,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 91472a8df33..ea46bb1ba8d 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -176,7 +176,7 @@ int DisksApp::main(const std::vector & /*args*/) Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level)); } - registerDisks(); + registerDisks(/* global_skip_access_check= */ true); registerFormats(); shared_context = Context::createShared(); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a1bf324f482..8f65141b533 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -262,6 +262,7 @@ void Keeper::defineOptions(Poco::Util::OptionSet & options) } int Keeper::main(const std::vector & /*args*/) +try { Poco::Logger * log = &logger(); @@ -473,6 +474,12 @@ int Keeper::main(const std::vector & /*args*/) return Application::EXIT_OK; } +catch (...) +{ + /// Poco does not provide stacktrace. + tryLogCurrentException("Application"); + throw; +} void Keeper::logRevision() const diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 4c07fa0a02d..ce7e27026f1 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -413,7 +413,7 @@ try registerTableFunctions(); registerStorages(); registerDictionaries(); - registerDisks(); + registerDisks(/* global_skip_access_check= */ true); registerFormats(); processConfig(); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a5321997779..4702985c0ae 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -99,6 +99,10 @@ #include "config_version.h" #if defined(OS_LINUX) +# include +# include +# include +# include # include # include # include @@ -273,6 +277,7 @@ namespace ErrorCodes extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int NETWORK_ERROR; extern const int CORRUPTED_DATA; + extern const int SYSTEM_ERROR; } @@ -646,7 +651,53 @@ static void sanityChecks(Server & server) } } +#if defined(OS_LINUX) +/// Sends notification to systemd, analogous to sd_notify from libsystemd +static void systemdNotify(const std::string_view & command) +{ + const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe) + + if (path == nullptr) + return; /// not using systemd + + int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0); + + if (s == -1) + throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR); + + SCOPE_EXIT({ close(s); }); + + const size_t len = strlen(path); + + struct sockaddr_un addr; + + addr.sun_family = AF_UNIX; + + if (len < 2 || len > sizeof(addr.sun_path) - 1) + throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path); + + memcpy(addr.sun_path, path, len + 1); /// write last zero as well. + + size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len; + + /// '@' meass this is Linux abstract socket, per documentation it must be sun_path[0] must be set to '\0' for it. + if (path[0] == '@') + addr.sun_path[0] = 0; + else if (path[0] == '/') + addrlen += 1; /// non-abstract-addresses should be zero terminated. + else + throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path); + + const struct sockaddr *sock_addr = reinterpret_cast (&addr); + + if (sendto(s, command.data(), command.size(), 0, sock_addr, static_cast (addrlen)) != static_cast (command.size())) + throw Exception("Failed to notify systemd.", ErrorCodes::SYSTEM_ERROR); + +} +#endif + int Server::main(const std::vector & /*args*/) +try { Poco::Logger * log = &logger(); @@ -679,7 +730,7 @@ int Server::main(const std::vector & /*args*/) registerTableFunctions(); registerStorages(); registerDictionaries(); - registerDisks(); + registerDisks(/* global_skip_access_check= */ false); registerFormats(); registerRemoteFileMetadatas(); @@ -1148,6 +1199,9 @@ int Server::main(const std::vector & /*args*/) total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + bool allow_use_jemalloc_memory = config->getBool("allow_use_jemalloc_memory", true); + total_memory_tracker.setAllowUseJemallocMemory(allow_use_jemalloc_memory); + auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); total_memory_tracker.setOvercommitTracker(global_overcommit_tracker); @@ -1776,6 +1830,10 @@ int Server::main(const std::vector & /*args*/) tryLogCurrentException(log, "Caught exception while starting cluster discovery"); } +#if defined(OS_LINUX) + systemdNotify("READY=1\n"); +#endif + SCOPE_EXIT_SAFE({ LOG_DEBUG(log, "Received termination signal."); @@ -1845,6 +1903,12 @@ int Server::main(const std::vector & /*args*/) return Application::EXIT_OK; } +catch (...) +{ + /// Poco does not provide stacktrace. + tryLogCurrentException("Application"); + throw; +} std::unique_ptr Server::buildProtocolStackFromConfig( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 8263f50d1b0..ed87b13f01a 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -130,8 +130,8 @@ enum class AccessType M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \ M(SHOW_QUOTAS, "SHOW CREATE QUOTA", GLOBAL, SHOW_ACCESS) \ M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \ - M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GLOBAL, SHOW_ACCESS) \ M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \ + M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GROUP, ACCESS_MANAGEMENT) \ M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 8da16b86f4e..e1c598f26f5 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -2,6 +2,8 @@ #include #include #include +#include + #include #include @@ -73,6 +75,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf const bool has_tls_ca_cert_file = config.has(ldap_server_config + ".tls_ca_cert_file"); const bool has_tls_ca_cert_dir = config.has(ldap_server_config + ".tls_ca_cert_dir"); const bool has_tls_cipher_suite = config.has(ldap_server_config + ".tls_cipher_suite"); + const bool has_search_limit = config.has(ldap_server_config + ".search_limit"); if (!has_host) throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS); @@ -91,8 +94,8 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf } else if (has_auth_dn_prefix || has_auth_dn_suffix) { - const auto auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); - const auto auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); + std::string auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); + std::string auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); params.bind_dn = auth_dn_prefix + "{user_name}" + auth_dn_suffix; } @@ -176,14 +179,17 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf if (has_port) { - const auto port = config.getInt64(ldap_server_config + ".port"); - if (port < 0 || port > 65535) + UInt32 port = config.getUInt(ldap_server_config + ".port"); + if (port > 65535) throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS); params.port = port; } else params.port = (params.enable_tls == LDAPClient::Params::TLSEnable::YES ? 636 : 389); + + if (has_search_limit) + params.search_limit = static_cast(config.getUInt64(ldap_server_config + ".search_limit")); } void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::AbstractConfiguration & config) @@ -313,11 +319,26 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur } } +UInt128 computeParamsHash(const LDAPClient::Params & params, const LDAPClient::RoleSearchParamsList * role_search_params) +{ + SipHash hash; + params.updateHash(hash); + if (role_search_params) + { + for (const auto & params_instance : *role_search_params) + { + params_instance.updateHash(hash); + } + } + + return hash.get128(); +} + bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const BasicCredentials & credentials, const LDAPClient::RoleSearchParamsList * role_search_params, LDAPClient::SearchResultsList * role_search_results) const { std::optional params; - std::size_t params_hash = 0; + UInt128 params_hash = 0; { std::scoped_lock lock(mutex); @@ -331,14 +352,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B params->user = credentials.getUserName(); params->password = credentials.getPassword(); - params->combineCoreHash(params_hash); - if (role_search_params) - { - for (const auto & params_instance : *role_search_params) - { - params_instance.combineHash(params_hash); - } - } + params_hash = computeParamsHash(*params, role_search_params); // Check the cache, but only if the caching is enabled at all. if (params->verification_cooldown > std::chrono::seconds{0}) @@ -408,15 +422,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B new_params.user = credentials.getUserName(); new_params.password = credentials.getPassword(); - std::size_t new_params_hash = 0; - new_params.combineCoreHash(new_params_hash); - if (role_search_params) - { - for (const auto & params_instance : *role_search_params) - { - params_instance.combineHash(new_params_hash); - } - } + const UInt128 new_params_hash = computeParamsHash(new_params, role_search_params); // If the critical server params have changed while we were checking the password, we discard the current result. if (params_hash != new_params_hash) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 2affbc293ec..856ffd18a32 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -2,10 +2,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -15,6 +15,22 @@ #include +namespace +{ + +template >>> +void updateHash(SipHash & hash, const T & value) +{ + hash.update(value); +} + +void updateHash(SipHash & hash, const std::string & value) +{ + hash.update(value.size()); + hash.update(value); +} + +} namespace DB { @@ -26,30 +42,30 @@ namespace ErrorCodes extern const int LDAP_ERROR; } -void LDAPClient::SearchParams::combineHash(std::size_t & seed) const +void LDAPClient::SearchParams::updateHash(SipHash & hash) const { - boost::hash_combine(seed, base_dn); - boost::hash_combine(seed, static_cast(scope)); - boost::hash_combine(seed, search_filter); - boost::hash_combine(seed, attribute); + ::updateHash(hash, base_dn); + ::updateHash(hash, static_cast(scope)); + ::updateHash(hash, search_filter); + ::updateHash(hash, attribute); } -void LDAPClient::RoleSearchParams::combineHash(std::size_t & seed) const +void LDAPClient::RoleSearchParams::updateHash(SipHash & hash) const { - SearchParams::combineHash(seed); - boost::hash_combine(seed, prefix); + SearchParams::updateHash(hash); + ::updateHash(hash, prefix); } -void LDAPClient::Params::combineCoreHash(std::size_t & seed) const +void LDAPClient::Params::updateHash(SipHash & hash) const { - boost::hash_combine(seed, host); - boost::hash_combine(seed, port); - boost::hash_combine(seed, bind_dn); - boost::hash_combine(seed, user); - boost::hash_combine(seed, password); + ::updateHash(hash, host); + ::updateHash(hash, port); + ::updateHash(hash, bind_dn); + ::updateHash(hash, user); + ::updateHash(hash, password); if (user_dn_detection) - user_dn_detection->combineHash(seed); + user_dn_detection->updateHash(hash); } LDAPClient::LDAPClient(const Params & params_) @@ -153,13 +169,13 @@ namespace } -void LDAPClient::diag(int rc, String text) +void LDAPClient::handleError(int result_code, String text) { std::scoped_lock lock(ldap_global_mutex); - if (rc != LDAP_SUCCESS) + if (result_code != LDAP_SUCCESS) { - const char * raw_err_str = ldap_err2string(rc); + const char * raw_err_str = ldap_err2string(result_code); if (raw_err_str && *raw_err_str != '\0') { if (!text.empty()) @@ -214,7 +230,7 @@ bool LDAPClient::openConnection() SCOPE_EXIT({ ldap_memfree(uri); }); - diag(ldap_initialize(&handle, uri)); + handleError(ldap_initialize(&handle, uri)); if (!handle) throw Exception("ldap_initialize() failed", ErrorCodes::LDAP_ERROR); } @@ -226,13 +242,13 @@ bool LDAPClient::openConnection() case LDAPClient::Params::ProtocolVersion::V2: value = LDAP_VERSION2; break; case LDAPClient::Params::ProtocolVersion::V3: value = LDAP_VERSION3; break; } - diag(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value)); + handleError(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value)); } - diag(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON)); + handleError(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON)); #ifdef LDAP_OPT_KEEPCONN - diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON)); + handleError(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON)); #endif #ifdef LDAP_OPT_TIMEOUT @@ -240,7 +256,7 @@ bool LDAPClient::openConnection() ::timeval operation_timeout; operation_timeout.tv_sec = params.operation_timeout.count(); operation_timeout.tv_usec = 0; - diag(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout)); + handleError(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout)); } #endif @@ -249,18 +265,18 @@ bool LDAPClient::openConnection() ::timeval network_timeout; network_timeout.tv_sec = params.network_timeout.count(); network_timeout.tv_usec = 0; - diag(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout)); + handleError(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout)); } #endif { const int search_timeout = static_cast(params.search_timeout.count()); - diag(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout)); + handleError(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout)); } { - const int size_limit = params.search_limit; - diag(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit)); + const int size_limit = static_cast(params.search_limit); + handleError(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit)); } #ifdef LDAP_OPT_X_TLS_PROTOCOL_MIN @@ -274,7 +290,7 @@ bool LDAPClient::openConnection() case LDAPClient::Params::TLSProtocolVersion::TLS1_1: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_1; break; case LDAPClient::Params::TLSProtocolVersion::TLS1_2: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_2; break; } - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value)); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value)); } #endif @@ -288,44 +304,44 @@ bool LDAPClient::openConnection() case LDAPClient::Params::TLSRequireCert::TRY: value = LDAP_OPT_X_TLS_TRY; break; case LDAPClient::Params::TLSRequireCert::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break; } - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value)); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value)); } #endif #ifdef LDAP_OPT_X_TLS_CERTFILE if (!params.tls_cert_file.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str())); #endif #ifdef LDAP_OPT_X_TLS_KEYFILE if (!params.tls_key_file.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str())); #endif #ifdef LDAP_OPT_X_TLS_CACERTFILE if (!params.tls_ca_cert_file.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str())); #endif #ifdef LDAP_OPT_X_TLS_CACERTDIR if (!params.tls_ca_cert_dir.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str())); #endif #ifdef LDAP_OPT_X_TLS_CIPHER_SUITE if (!params.tls_cipher_suite.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str())); #endif #ifdef LDAP_OPT_X_TLS_NEWCTX { const int i_am_a_server = 0; - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server)); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server)); } #endif if (params.enable_tls == LDAPClient::Params::TLSEnable::YES_STARTTLS) - diag(ldap_start_tls_s(handle, nullptr, nullptr)); + handleError(ldap_start_tls_s(handle, nullptr, nullptr)); final_user_name = escapeForDN(params.user); final_bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", final_user_name} }); @@ -346,7 +362,7 @@ bool LDAPClient::openConnection() if (rc == LDAP_INVALID_CREDENTIALS) return false; - diag(rc); + handleError(rc); } // Once bound, run the user DN search query and update the default value, if asked. @@ -425,7 +441,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) } }); - diag(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); + handleError(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); for ( auto * msg = ldap_first_message(handle, msgs); @@ -452,7 +468,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) ::berval bv; - diag(ldap_get_dn_ber(handle, msg, &ber, &bv)); + handleError(ldap_get_dn_ber(handle, msg, &ber, &bv)); if (bv.bv_val && bv.bv_len > 0) result.emplace(bv.bv_val, bv.bv_len); @@ -504,7 +520,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) case LDAP_RES_SEARCH_REFERENCE: { char ** referrals = nullptr; - diag(ldap_parse_reference(handle, msg, &referrals, nullptr, 0)); + handleError(ldap_parse_reference(handle, msg, &referrals, nullptr, 0)); if (referrals) { @@ -528,7 +544,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) char * matched_msg = nullptr; char * error_msg = nullptr; - diag(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0)); + handleError(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0)); if (rc != LDAP_SUCCESS) { @@ -610,7 +626,7 @@ bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList * role_search #else // USE_LDAP -void LDAPClient::diag(const int, String) +void LDAPClient::handleError(const int, String) { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index f228bac5926..bf193bf6bb2 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -16,6 +16,7 @@ #include #include +class SipHash; namespace DB { @@ -38,7 +39,7 @@ public: String search_filter; String attribute = "cn"; - void combineHash(std::size_t & seed) const; + void updateHash(SipHash & hash) const; }; struct RoleSearchParams @@ -46,7 +47,7 @@ public: { String prefix; - void combineHash(std::size_t & seed) const; + void updateHash(SipHash & hash) const; }; using RoleSearchParamsList = std::vector; @@ -95,7 +96,7 @@ public: ProtocolVersion protocol_version = ProtocolVersion::V3; String host; - std::uint16_t port = 636; + UInt16 port = 636; TLSEnable enable_tls = TLSEnable::YES; TLSProtocolVersion tls_minimum_protocol_version = TLSProtocolVersion::TLS1_2; @@ -119,9 +120,9 @@ public: std::chrono::seconds operation_timeout{40}; std::chrono::seconds network_timeout{30}; std::chrono::seconds search_timeout{20}; - std::uint32_t search_limit = 100; + UInt32 search_limit = 256; /// An arbitrary number, no particular motivation for this value. - void combineCoreHash(std::size_t & seed) const; + void updateHash(SipHash & hash) const; }; explicit LDAPClient(const Params & params_); @@ -133,7 +134,7 @@ public: LDAPClient & operator= (LDAPClient &&) = delete; protected: - MAYBE_NORETURN void diag(int rc, String text = ""); + MAYBE_NORETURN void handleError(int result_code, String text = ""); MAYBE_NORETURN bool openConnection(); void closeConnection() noexcept; SearchResults search(const SearchParams & search_params); diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 28ef4ebaa7a..e3690f36cc3 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -228,6 +228,12 @@ namespace user->access.revokeGrantOption(AccessType::ALL); } + bool show_named_collections = config.getBool(user_config + ".show_named_collections", false); + if (!show_named_collections) + { + user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS); + } + String default_database = config.getString(user_config + ".default_database", ""); user->default_database = default_database; diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h index 516d33f42de..decb572b019 100644 --- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -13,6 +13,7 @@ struct Settings; namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int CORRUPTED_DATA; } @@ -89,6 +90,13 @@ public: { this->data(place).result.read(buf, *serialization_res, arena); this->data(place).value.read(buf, *serialization_val, arena); + if (unlikely(this->data(place).value.has() != this->data(place).result.has())) + throw Exception( + ErrorCodes::CORRUPTED_DATA, + "Invalid state of the aggregate function {}: has_value ({}) != has_result ({})", + getName(), + this->data(place).value.has(), + this->data(place).result.has()); } bool allocatesMemoryInArena() const override diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 46be7331195..ef812c0361e 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -29,6 +29,8 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; + extern const int TOO_LARGE_STRING_SIZE; + extern const int LOGICAL_ERROR; } /** Aggregate functions that store one of passed values. @@ -447,6 +449,34 @@ public: }; +struct Compatibility +{ + /// Old versions used to store terminating null-character in SingleValueDataString. + /// Then -WithTerminatingZero methods were removed from IColumn interface, + /// because these methods are quite dangerous and easy to misuse. It introduced incompatibility. + /// See https://github.com/ClickHouse/ClickHouse/pull/41431 and https://github.com/ClickHouse/ClickHouse/issues/42916 + /// Here we keep these functions for compatibility. + /// It's safe because there's no way unsanitized user input (without \0 at the end) can reach these functions. + + static StringRef getDataAtWithTerminatingZero(const ColumnString & column, size_t n) + { + auto res = column.getDataAt(n); + /// ColumnString always reserves extra byte for null-character after string. + /// But getDataAt returns StringRef without the null-character. Let's add it. + chassert(res.data[res.size] == '\0'); + ++res.size; + return res; + } + + static void insertDataWithTerminatingZero(ColumnString & column, const char * pos, size_t length) + { + /// String already has terminating null-character. + /// But insertData will add another one unconditionally. Trim existing null-character to avoid duplication. + chassert(0 < length); + chassert(pos[length - 1] == '\0'); + column.insertData(pos, length - 1); + } +}; /** For strings. Short strings are stored in the object itself, and long strings are allocated separately. * NOTE It could also be suitable for arrays of numbers. @@ -456,13 +486,15 @@ struct SingleValueDataString //-V730 private: using Self = SingleValueDataString; - Int32 size = -1; /// -1 indicates that there is no value. - Int32 capacity = 0; /// power of two or zero + /// 0 size indicates that there is no value. Empty string must has terminating '\0' and, therefore, size of empty string is 1 + UInt32 size = 0; + UInt32 capacity = 0; /// power of two or zero char * large_data; public: - static constexpr Int32 AUTOMATIC_STORAGE_SIZE = 64; - static constexpr Int32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data); + static constexpr UInt32 AUTOMATIC_STORAGE_SIZE = 64; + static constexpr UInt32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data); + static constexpr UInt32 MAX_STRING_SIZE = std::numeric_limits::max(); private: char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero. @@ -473,12 +505,22 @@ public: bool has() const { - return size >= 0; + return size; + } + +private: + char * getDataMutable() + { + return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data; } const char * getData() const { - return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data; + const char * data_ptr = size <= MAX_SMALL_STRING_SIZE ? small_data : large_data; + /// It must always be terminated with null-character + chassert(0 < size); + chassert(data_ptr[size - 1] == '\0'); + return data_ptr; } StringRef getStringRef() const @@ -486,61 +528,105 @@ public: return StringRef(getData(), size); } +public: void insertResultInto(IColumn & to) const { if (has()) - assert_cast(to).insertData(getData(), size); + Compatibility::insertDataWithTerminatingZero(assert_cast(to), getData(), size); else assert_cast(to).insertDefault(); } void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const { - writeBinary(size, buf); + if (unlikely(MAX_STRING_SIZE < size)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "String size is too big ({}), it's a bug", size); + + /// For serialization we use signed Int32 (for historical reasons), -1 means "no value" + Int32 size_to_write = size ? size : -1; + writeBinary(size_to_write, buf); if (has()) buf.write(getData(), size); } + void allocateLargeDataIfNeeded(UInt32 size_to_reserve, Arena * arena) + { + if (capacity < size_to_reserve) + { + if (unlikely(MAX_STRING_SIZE < size_to_reserve)) + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve); + + size_t rounded_capacity = roundUpToPowerOfTwoOrZero(size_to_reserve); + chassert(rounded_capacity <= MAX_STRING_SIZE + 1); /// rounded_capacity <= 2^31 + capacity = static_cast(rounded_capacity); + + /// Don't free large_data here. + large_data = arena->alloc(capacity); + } + } + void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena) { - Int32 rhs_size; - readBinary(rhs_size, buf); + /// For serialization we use signed Int32 (for historical reasons), -1 means "no value" + Int32 rhs_size_signed; + readBinary(rhs_size_signed, buf); - if (rhs_size >= 0) + if (rhs_size_signed < 0) { - if (rhs_size <= MAX_SMALL_STRING_SIZE) - { - /// Don't free large_data here. - - size = rhs_size; - - if (size > 0) - buf.readStrict(small_data, size); - } - else - { - if (capacity < rhs_size) - { - capacity = static_cast(roundUpToPowerOfTwoOrZero(rhs_size)); - /// Don't free large_data here. - large_data = arena->alloc(capacity); - } - - size = rhs_size; - buf.readStrict(large_data, size); - } + /// Don't free large_data here. + size = 0; + return; } - else + + UInt32 rhs_size = rhs_size_signed; + if (rhs_size <= MAX_SMALL_STRING_SIZE) { /// Don't free large_data here. size = rhs_size; + buf.readStrict(small_data, size); } + else + { + /// Reserve one byte more for null-character + allocateLargeDataIfNeeded(rhs_size + 1, arena); + size = rhs_size; + buf.readStrict(large_data, size); + } + + /// Check if the string we read is null-terminated (getDataMutable does not have the assertion) + if (0 < size && getDataMutable()[size - 1] == '\0') + return; + + /// It's not null-terminated, but it must be (for historical reasons). There are two variants: + /// - The value was serialized by one of the incompatible versions of ClickHouse. We had some range of versions + /// that used to serialize SingleValueDataString without terminating '\0'. Let's just append it. + /// - An attacker sent crafted data. Sanitize it and append '\0'. + /// In all other cases the string must be already null-terminated. + + /// NOTE We cannot add '\0' unconditionally, because it will be duplicated. + /// NOTE It's possible that a string that actually ends with '\0' was written by one of the incompatible versions. + /// Unfortunately, we cannot distinguish it from normal string written by normal version. + /// So such strings will be trimmed. + + if (size == MAX_SMALL_STRING_SIZE) + { + /// Special case: We have to move value to large_data + allocateLargeDataIfNeeded(size + 1, arena); + memcpy(large_data, small_data, size); + } + + /// We have enough space to append + ++size; + getDataMutable()[size - 1] = '\0'; } /// Assuming to.has() void changeImpl(StringRef value, Arena * arena) { - Int32 value_size = static_cast(value.size); + if (unlikely(MAX_STRING_SIZE < value.size)) + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", value.size); + + UInt32 value_size = static_cast(value.size); if (value_size <= MAX_SMALL_STRING_SIZE) { @@ -552,13 +638,7 @@ public: } else { - if (capacity < value_size) - { - /// Don't free large_data here. - capacity = static_cast(roundUpToPowerOfTwoOrZero(value_size)); - large_data = arena->alloc(capacity); - } - + allocateLargeDataIfNeeded(value_size, arena); size = value_size; memcpy(large_data, value.data, size); } @@ -566,7 +646,7 @@ public: void change(const IColumn & column, size_t row_num, Arena * arena) { - changeImpl(assert_cast(column).getDataAt(row_num), arena); + changeImpl(Compatibility::getDataAtWithTerminatingZero(assert_cast(column), row_num), arena); } void change(const Self & to, Arena * arena) @@ -615,7 +695,7 @@ public: bool changeIfLess(const IColumn & column, size_t row_num, Arena * arena) { - if (!has() || assert_cast(column).getDataAt(row_num) < getStringRef()) + if (!has() || Compatibility::getDataAtWithTerminatingZero(assert_cast(column), row_num) < getStringRef()) { change(column, row_num, arena); return true; @@ -637,7 +717,7 @@ public: bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena) { - if (!has() || assert_cast(column).getDataAt(row_num) > getStringRef()) + if (!has() || Compatibility::getDataAtWithTerminatingZero(assert_cast(column), row_num) > getStringRef()) { change(column, row_num, arena); return true; @@ -664,7 +744,7 @@ public: bool isEqualTo(const IColumn & column, size_t row_num) const { - return has() && assert_cast(column).getDataAt(row_num) == getStringRef(); + return has() && Compatibility::getDataAtWithTerminatingZero(assert_cast(column), row_num) == getStringRef(); } static bool allocatesMemoryInArena() diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 6a8fc9e99d8..e9db1a71511 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -155,7 +155,7 @@ public: "Values for {} are expected to be Numeric, Float or Decimal, passed type {}", getName(), value_type->getName()}; - WhichDataType value_type_to_check(value_type); + WhichDataType value_type_to_check(value_type_without_nullable); /// Do not promote decimal because of implementation issues of this function design /// Currently we cannot get result column type in case of decimal we cannot get decimal scale @@ -202,7 +202,7 @@ public: auto & merged_maps = this->data(place).merged_maps; for (size_t col = 0, size = values_types.size(); col < size; ++col) { - const auto & array_column = assert_cast(*columns[col + 1]); + const auto & array_column = assert_cast(*columns[col + 1]); const IColumn & value_column = array_column.getData(); const IColumn::Offsets & offsets = array_column.getOffsets(); const size_t values_vec_offset = offsets[row_num - 1]; @@ -532,7 +532,12 @@ private: public: explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {} - bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Null &) const + { + /// Do not update current value, skip nulls + return false; + } + bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot compare AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array & x) const { return compareImpl(x); } @@ -567,7 +572,13 @@ private: public: explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {} - bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + + bool operator() (Null &) const + { + /// Do not update current value, skip nulls + return false; + } + bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array & x) const { return compareImpl(x); } diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index 0d1c831c839..1c90767131c 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -28,8 +29,9 @@ namespace /** `DataForVariadic` is a data structure that will be used for `uniq` aggregate function of multiple arguments. * It differs, for example, in that it uses a trivial hash function, since `uniq` of many arguments first hashes them out itself. */ -template -AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) +template typename DataForVariadic> +AggregateFunctionPtr +createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) { assertNoParameters(name, params); @@ -61,21 +63,22 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const else if (which.isTuple()) { if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } } /// "Variadic" method also works as a fallback generic case for single argument. if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } -template class Data, typename DataForVariadic> -AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) +template typename Data, template typename DataForVariadic, bool is_able_to_parallelize_merge> +AggregateFunctionPtr +createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) { assertNoParameters(name, params); @@ -91,35 +94,35 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const { const IDataType & argument_type = *argument_types[0]; - AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types)); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types)); WhichDataType which(argument_type); if (res) return res; else if (which.isDate()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isDate32()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isDateTime()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isStringOrFixedString()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isUUID()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isTuple()) { if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } } /// "Variadic" method also works as a fallback generic case for single argument. if (use_exact_hash_function) - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); else - return std::make_shared>(argument_types); + return std::make_shared>>(argument_types); } } @@ -132,14 +135,23 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) {createAggregateFunctionUniq, properties}); factory.registerFunction("uniqHLL12", - {createAggregateFunctionUniq, properties}); + {createAggregateFunctionUniq, properties}); - factory.registerFunction("uniqExact", - {createAggregateFunctionUniq>, properties}); + auto assign_bool_param = [](const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings) + { + /// Using two level hash set if we wouldn't be able to merge in parallel can cause ~10% slowdown. + if (settings && settings->max_threads > 1) + return createAggregateFunctionUniq< + true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, true /* is_able_to_parallelize_merge */>(name, argument_types, params, settings); + else + return createAggregateFunctionUniq< + true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, false /* is_able_to_parallelize_merge */>(name, argument_types, params, settings); + }; + factory.registerFunction("uniqExact", {assign_bool_param, properties}); #if USE_DATASKETCHES factory.registerFunction("uniqTheta", - {createAggregateFunctionUniq, properties}); + {createAggregateFunctionUniq, properties}); #endif } diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index fe2530800cc..1a98bfc8456 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -1,7 +1,10 @@ #pragma once -#include +#include +#include #include +#include +#include #include @@ -13,17 +16,18 @@ #include +#include #include #include #include -#include -#include #include +#include -#include #include #include +#include #include +#include namespace DB @@ -37,94 +41,128 @@ struct AggregateFunctionUniqUniquesHashSetData using Set = UniquesHashSet>; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + static String getName() { return "uniq"; } }; /// For a function that takes multiple arguments. Such a function pre-hashes them in advance, so TrivialHash is used here. +template struct AggregateFunctionUniqUniquesHashSetDataForVariadic { using Set = UniquesHashSet; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; + static String getName() { return "uniq"; } }; /// uniqHLL12 -template +template struct AggregateFunctionUniqHLL12Data { using Set = HyperLogLogWithSmallSetOptimization; Set set; - static String getName() { return "uniqHLL12"; } -}; - -template <> -struct AggregateFunctionUniqHLL12Data -{ - using Set = HyperLogLogWithSmallSetOptimization; - Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } }; template <> -struct AggregateFunctionUniqHLL12Data +struct AggregateFunctionUniqHLL12Data { using Set = HyperLogLogWithSmallSetOptimization; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + static String getName() { return "uniqHLL12"; } }; +template <> +struct AggregateFunctionUniqHLL12Data +{ + using Set = HyperLogLogWithSmallSetOptimization; + Set set; + + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + + static String getName() { return "uniqHLL12"; } +}; + +template struct AggregateFunctionUniqHLL12DataForVariadic { using Set = HyperLogLogWithSmallSetOptimization; Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; + static String getName() { return "uniqHLL12"; } }; /// uniqExact -template +template struct AggregateFunctionUniqExactData { using Key = T; /// When creating, the hash table must be small. - using Set = HashSet< - Key, - HashCRC32, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory>; + using SingleLevelSet = HashSet, HashTableGrower<4>, HashTableAllocatorWithStackMemory>; + using TwoLevelSet = TwoLevelHashSet>; + using Set = UniqExactSet; Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = false; + static String getName() { return "uniqExact"; } }; /// For rows, we put the SipHash values (128 bits) into the hash table. -template <> -struct AggregateFunctionUniqExactData +template +struct AggregateFunctionUniqExactData { using Key = UInt128; /// When creating, the hash table must be small. - using Set = HashSet< - Key, - UInt128TrivialHash, - HashTableGrower<3>, - HashTableAllocatorWithStackMemory>; + using SingleLevelSet = HashSet, HashTableAllocatorWithStackMemory>; + using TwoLevelSet = TwoLevelHashSet; + using Set = UniqExactSet; Set set; + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = false; + static String getName() { return "uniqExact"; } }; +template +struct AggregateFunctionUniqExactDataForVariadic : AggregateFunctionUniqExactData +{ + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; +}; /// uniqTheta #if USE_DATASKETCHES @@ -134,14 +172,37 @@ struct AggregateFunctionUniqThetaData using Set = ThetaSketchData; Set set; + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + static String getName() { return "uniqTheta"; } }; +template +struct AggregateFunctionUniqThetaDataForVariadic : AggregateFunctionUniqThetaData +{ + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = true; + constexpr static bool is_exact = is_exact_; + constexpr static bool argument_is_tuple = argument_is_tuple_; +}; + #endif namespace detail { +template +struct IsUniqExactSet : std::false_type +{ +}; + +template +struct IsUniqExactSet> : std::true_type +{ +}; + + /** Hash function for uniq. */ template struct AggregateFunctionUniqTraits @@ -162,17 +223,31 @@ template struct AggregateFunctionUniqTraits }; -/** The structure for the delegation work to add one element to the `uniq` aggregate functions. +/** The structure for the delegation work to add elements to the `uniq` aggregate functions. * Used for partial specialization to add strings. */ template -struct OneAdder +struct Adder { - static void ALWAYS_INLINE add(Data & data, const IColumn & column, size_t row_num) + /// We have to introduce this template parameter (and a bunch of ugly code dealing with it), because we cannot + /// add runtime branches in whatever_hash_set::insert - it will immediately pop up in the perf top. + template + static void ALWAYS_INLINE add(Data & data, const IColumn ** columns, size_t num_args, size_t row_num) { - if constexpr (std::is_same_v - || std::is_same_v>) + if constexpr (Data::is_variadic) { + if constexpr (IsUniqExactSet::value) + data.set.template insert( + UniqVariadicHash::apply(num_args, columns, row_num)); + else + data.set.insert(T{UniqVariadicHash::apply(num_args, columns, row_num)}); + } + else if constexpr ( + std::is_same_v< + Data, + AggregateFunctionUniqUniquesHashSetData> || std::is_same_v>) + { + const auto & column = *columns[0]; if constexpr (!std::is_same_v) { using ValueType = typename decltype(data.set)::value_type; @@ -185,11 +260,13 @@ struct OneAdder data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); } } - else if constexpr (std::is_same_v>) + else if constexpr (std::is_same_v>) { + const auto & column = *columns[0]; if constexpr (!std::is_same_v) { - data.set.insert(assert_cast &>(column).getData()[row_num]); + data.set.template insert( + assert_cast &>(column).getData()[row_num]); } else { @@ -200,16 +277,72 @@ struct OneAdder hash.update(value.data, value.size); hash.get128(key); - data.set.insert(key); + data.set.template insert(key); } } #if USE_DATASKETCHES else if constexpr (std::is_same_v) { + const auto & column = *columns[0]; data.set.insertOriginal(column.getDataAt(row_num)); } #endif } + + static void ALWAYS_INLINE + add(Data & data, const IColumn ** columns, size_t num_args, size_t row_begin, size_t row_end, const char8_t * flags, const UInt8 * null_map) + { + bool use_single_level_hash_table = true; + if constexpr (Data::is_able_to_parallelize_merge) + use_single_level_hash_table = data.set.isSingleLevel(); + + if (use_single_level_hash_table) + addImpl(data, columns, num_args, row_begin, row_end, flags, null_map); + else + addImpl(data, columns, num_args, row_begin, row_end, flags, null_map); + + if constexpr (Data::is_able_to_parallelize_merge) + { + if (data.set.isSingleLevel() && data.set.size() > 100'000) + data.set.convertToTwoLevel(); + } + } + +private: + template + static void ALWAYS_INLINE + addImpl(Data & data, const IColumn ** columns, size_t num_args, size_t row_begin, size_t row_end, const char8_t * flags, const UInt8 * null_map) + { + if (!flags) + { + if (!null_map) + { + for (size_t row = row_begin; row < row_end; ++row) + add(data, columns, num_args, row); + } + else + { + for (size_t row = row_begin; row < row_end; ++row) + if (!null_map[row]) + add(data, columns, num_args, row); + } + } + else + { + if (!null_map) + { + for (size_t row = row_begin; row < row_end; ++row) + if (flags[row]) + add(data, columns, num_args, row); + } + else + { + for (size_t row = row_begin; row < row_end; ++row) + if (!null_map[row] && flags[row]) + add(data, columns, num_args, row); + } + } + } }; } @@ -219,9 +352,15 @@ struct OneAdder template class AggregateFunctionUniq final : public IAggregateFunctionDataHelper> { +private: + static constexpr size_t num_args = 1; + static constexpr bool is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge; + public: - AggregateFunctionUniq(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) {} + explicit AggregateFunctionUniq(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) + { + } String getName() const override { return Data::getName(); } @@ -235,7 +374,18 @@ public: /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - detail::OneAdder::add(this->data(place), *columns[0], row_num); + detail::Adder::add(this->data(place), columns, num_args, row_num); + } + + void ALWAYS_INLINE addBatchSinglePlace( + size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) + const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, nullptr /* null_map */); } void addManyDefaults( @@ -244,7 +394,23 @@ public: size_t /*length*/, Arena * /*arena*/) const override { - detail::OneAdder::add(this->data(place), *columns[0], 0); + detail::Adder::add(this->data(place), columns, num_args, 0); + } + + void addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** columns, + const UInt8 * null_map, + Arena *, + ssize_t if_argument_pos) const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -252,6 +418,16 @@ public: this->data(place).set.merge(this->data(rhs).set); } + bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override + { + if constexpr (is_able_to_parallelize_merge) + this->data(place).set.merge(this->data(rhs).set, &thread_pool); + else + this->data(place).set.merge(this->data(rhs).set); + } + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { this->data(place).set.write(buf); @@ -273,15 +449,20 @@ public: * You can pass multiple arguments as is; You can also pass one argument - a tuple. * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples. */ -template -class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper> +template +class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper> { private: + using T = typename Data::Set::value_type; + + static constexpr size_t is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge; + static constexpr size_t argument_is_tuple = Data::argument_is_tuple; + size_t num_args = 0; public: - AggregateFunctionUniqVariadic(const DataTypes & arguments) - : IAggregateFunctionDataHelper>(arguments, {}) + explicit AggregateFunctionUniqVariadic(const DataTypes & arguments) + : IAggregateFunctionDataHelper>(arguments, {}) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); @@ -300,8 +481,34 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - this->data(place).set.insert(typename Data::Set::value_type( - UniqVariadicHash::apply(num_args, columns, row_num))); + detail::Adder::add(this->data(place), columns, num_args, row_num); + } + + void addBatchSinglePlace( + size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) + const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, nullptr /* null_map */); + } + + void addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** columns, + const UInt8 * null_map, + Arena *, + ssize_t if_argument_pos) const override + { + const char8_t * flags = nullptr; + if (if_argument_pos >= 0) + flags = assert_cast(*columns[if_argument_pos]).getData().data(); + + detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -309,6 +516,16 @@ public: this->data(place).set.merge(this->data(rhs).set); } + bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override + { + if constexpr (is_able_to_parallelize_merge) + this->data(place).set.merge(this->data(rhs).set, &thread_pool); + else + this->data(place).set.merge(this->data(rhs).set); + } + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { this->data(place).set.write(buf); diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index 6e140f4b9cf..c97733571a3 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -74,6 +74,19 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ return nullptr; } +template